From 900fbb83f920b0492c7b7aca37f8fc6f8e58295a Mon Sep 17 00:00:00 2001 From: nhzlx Date: Wed, 28 Nov 2018 11:23:05 +0000 Subject: [PATCH] add params sync pass --- .../inference/analysis/passes/CMakeLists.txt | 3 +- .../passes/ir_analysis_compose_pass.cc | 1 + .../analysis/passes/ir_graph_build_pass.cc | 7 +- .../ir_params_sync_among_devices_pass.cc | 86 +++++++++++++++++++ .../ir_params_sync_among_devices_pass.h | 42 +++++++++ .../fluid/inference/analysis/passes/passes.cc | 4 + .../fluid/inference/api/paddle_pass_builder.h | 6 +- 7 files changed, 137 insertions(+), 12 deletions(-) create mode 100644 paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc create mode 100644 paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt index a30c27b118..98334760a6 100644 --- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -1,6 +1,7 @@ cc_library(ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager) cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager) -cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass) +cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager analysis_helper) +cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass ir_params_sync_among_devices_pass) set(analysis_deps ${analysis_deps} ir_graph_build_pass diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc index 108cb6f74b..c3a2b3ca1d 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc @@ -61,6 +61,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) { void IrAnalysisComposePass::ApplyIrPasses(Argument *argument) { std::vector passes({ "ir_graph_build_pass", "ir_analysis_pass", + "ir_params_sync_among_devices_pass", }); for (const auto &pass : passes) { VLOG(2) << "Run pass " << pass; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index d5e0d90de1..740030c3a8 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -36,12 +36,7 @@ void IrGraphBuildPass::RunImpl(Argument *argument) { // so that the parameters will on the same device, or they will keep copying // between difference devices. platform::Place place; - if (argument->use_gpu()) { - PADDLE_ENFORCE(argument->gpu_device_id_valid()); - place = platform::CUDAPlace(argument->gpu_device_id()); - } else { - place = platform::CPUPlace(); - } + place = platform::CPUPlace(); if (argument->model_dir_valid()) { auto program = diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc new file mode 100644 index 0000000000..e42f135052 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace { +bool IsPersistable(const framework::VarDesc *var) { + if (var->Persistable() && + var->GetType() != framework::proto::VarType::FEED_MINIBATCH && + var->GetType() != framework::proto::VarType::FETCH_LIST) { + return true; + } + return false; +} +} // namespace +namespace inference { +namespace analysis { + +void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { + PADDLE_ENFORCE(argument->scope_valid()); + PADDLE_ENFORCE(argument->use_gpu_valid()); + + platform::Place place; + + // The parameters are on the cpu, therefore, synchronization is not necessary. + if (!argument->use_gpu()) return; + + LOG(INFO) << "Sync params from CPU to GPU"; + + PADDLE_ENFORCE(argument->gpu_device_id_valid()); + place = platform::CUDAPlace(argument->gpu_device_id()); + + auto *scope = argument->scope_ptr(); + // Get the program which has been processed by several passes. + analysis_program_.reset( + new framework::ProgramDesc(argument->ir_analyzed_program())); + + const auto &global_block = analysis_program_->Block(0); + + // sync the params from cpu to gpu. + for (auto &var : global_block.AllVars()) { + if (IsPersistable(var)) { + std::string var_name = var->Name(); + LOG(INFO) << var_name; + auto &t = inference::analysis::GetFromScope( + *scope, var_name); + + platform::CPUPlace cpu_place; + framework::LoDTensor temp_tensor; + temp_tensor.Resize(t.dims()); + temp_tensor.mutable_data(cpu_place); + + // Copy the parameter data to a tmp tensor. + TensorCopySync(t, cpu_place, &temp_tensor); + // Reallocation the space on GPU + t.mutable_data(place); + + // Copy parameter data to newly allocated GPU space. + TensorCopySync(temp_tensor, place, &t); + } + } +} + +std::string IrParamsSyncAmongDevicesPass::repr() const { + return "ir-params-sync-among-devices-pass"; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h new file mode 100644 index 0000000000..6818887b96 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h @@ -0,0 +1,42 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Sync parameter from CPU to GPU. + */ +class IrParamsSyncAmongDevicesPass : public AnalysisPass { + public: + void RunImpl(Argument *argument) override; + std::string repr() const override; + + private: + std::unique_ptr analysis_program_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc index 2ef515f45f..9245e32cee 100644 --- a/paddle/fluid/inference/analysis/passes/passes.cc +++ b/paddle/fluid/inference/analysis/passes/passes.cc @@ -16,6 +16,7 @@ #include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" +#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" namespace paddle { namespace inference { @@ -27,6 +28,9 @@ PassRegistry::PassRegistry() { std::unique_ptr(new IrGraphBuildPass)); passes_.emplace("ir_analysis_compose_pass", std::unique_ptr(new IrAnalysisComposePass)); + passes_.emplace( + "ir_params_sync_among_devices_pass", + std::unique_ptr(new IrParamsSyncAmongDevicesPass)); } } // namespace analysis diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 12e3a6f42e..825bee833b 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -116,12 +116,8 @@ class CpuPassStrategy : public PassStrategy { class GpuPassStrategy : public PassStrategy { public: GpuPassStrategy() : PassStrategy({}) { - // TODO(NHZlX) Problem with Data synchronization between GPU and CPU - // When running in GPU mode, the parameters are all on GPU. But the - // opearations of "conv_bn_fuse_pass" are on CPU. passes_.assign({ - "infer_clean_graph_pass", - // "infer_clean_graph_pass", "conv_bn_fuse_pass", + "infer_clean_graph_pass", "conv_bn_fuse_pass", }); } -- GitLab