diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt index a30c27b1183a75de8c0bb50ef3617d747b239fae..98334760a694fab995a9322f1b725caa7307c28d 100644 --- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -1,6 +1,7 @@ cc_library(ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager) cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager) -cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass) +cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager analysis_helper) +cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass ir_params_sync_among_devices_pass) set(analysis_deps ${analysis_deps} ir_graph_build_pass diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc index 108cb6f74b1208395a4faabdf6184152c300d244..c3a2b3ca1d3b09e71921fde0b0bad8d195aaa38f 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc @@ -61,6 +61,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) { void IrAnalysisComposePass::ApplyIrPasses(Argument *argument) { std::vector passes({ "ir_graph_build_pass", "ir_analysis_pass", + "ir_params_sync_among_devices_pass", }); for (const auto &pass : passes) { VLOG(2) << "Run pass " << pass; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index d5e0d90de1da8e54e2411c266f7a8c09c33b0336..740030c3a80e4d7e2ac47998a304be97758b95cb 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -36,12 +36,7 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { // so that the parameters will on the same device, or they will keep copying // between difference devices. platform::Place place; - if (argument->use_gpu()) { - PADDLE_ENFORCE(argument->gpu_device_id_valid()); - place = platform::CUDAPlace(argument->gpu_device_id()); - } else { - place = platform::CPUPlace(); - } + place = platform::CPUPlace(); if (argument->model_dir_valid()) { auto program = diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..e42f1350525962b1b7509a9feb029f571ca05e26 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace { +bool IsPersistable(const framework::VarDesc *var) { + if (var->Persistable() && + var->GetType() != framework::proto::VarType::FEED_MINIBATCH && + var->GetType() != framework::proto::VarType::FETCH_LIST) { + return true; + } + return false; +} +} // namespace +namespace inference { +namespace analysis { + +void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { + PADDLE_ENFORCE(argument->scope_valid()); + PADDLE_ENFORCE(argument->use_gpu_valid()); + + platform::Place place; + + // The parameters are on the cpu, therefore, synchronization is not necessary. + if (!argument->use_gpu()) return; + + LOG(INFO) << "Sync params from CPU to GPU"; + + PADDLE_ENFORCE(argument->gpu_device_id_valid()); + place = platform::CUDAPlace(argument->gpu_device_id()); + + auto *scope = argument->scope_ptr(); + // Get the program which has been processed by several passes. + analysis_program_.reset( + new framework::ProgramDesc(argument->ir_analyzed_program())); + + const auto &global_block = analysis_program_->Block(0); + + // sync the params from cpu to gpu. + for (auto &var : global_block.AllVars()) { + if (IsPersistable(var)) { + std::string var_name = var->Name(); + LOG(INFO) << var_name; + auto &t = inference::analysis::GetFromScope( + *scope, var_name); + + platform::CPUPlace cpu_place; + framework::LoDTensor temp_tensor; + temp_tensor.Resize(t.dims()); + temp_tensor.mutable_data(cpu_place); + + // Copy the parameter data to a tmp tensor. + TensorCopySync(t, cpu_place, &temp_tensor); + // Reallocation the space on GPU + t.mutable_data(place); + + // Copy parameter data to newly allocated GPU space. + TensorCopySync(temp_tensor, place, &t); + } + } +} + +std::string IrParamsSyncAmongDevicesPass::repr() const { + return "ir-params-sync-among-devices-pass"; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..6818887b96c246f1c05962531cb639de1cf7a1b1 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h @@ -0,0 +1,42 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Sync parameter from CPU to GPU. + */ +class IrParamsSyncAmongDevicesPass : public AnalysisPass { + public: + void RunImpl(Argument *argument) override; + std::string repr() const override; + + private: + std::unique_ptr analysis_program_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc index 2ef515f45f2483df8d1238b4758d6729d0299ce9..9245e32cee28473c21e2acbc1c64165d8b475d3b 100644 --- a/paddle/fluid/inference/analysis/passes/passes.cc +++ b/paddle/fluid/inference/analysis/passes/passes.cc @@ -16,6 +16,7 @@ #include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" +#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" namespace paddle { namespace inference { @@ -27,6 +28,9 @@ PassRegistry::PassRegistry() { std::unique_ptr(new IrGraphBuildPass)); passes_.emplace("ir_analysis_compose_pass", std::unique_ptr(new IrAnalysisComposePass)); + passes_.emplace( + "ir_params_sync_among_devices_pass", + std::unique_ptr(new IrParamsSyncAmongDevicesPass)); } } // namespace analysis diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 12e3a6f42e14010feedbbb5d8f8a98f60cea4556..825bee833bf918067497f56adebbbcaf55f892a2 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -116,12 +116,8 @@ class CpuPassStrategy : public PassStrategy { class GpuPassStrategy : public PassStrategy { public: GpuPassStrategy() : PassStrategy({}) { - // TODO(NHZlX) Problem with Data synchronization between GPU and CPU - // When running in GPU mode, the parameters are all on GPU. But the - // opearations of "conv_bn_fuse_pass" are on CPU. passes_.assign({ - "infer_clean_graph_pass", - // "infer_clean_graph_pass", "conv_bn_fuse_pass", + "infer_clean_graph_pass", "conv_bn_fuse_pass", }); }