From 7673b39acd6e16c26560b7149519f012d56ab5eb Mon Sep 17 00:00:00 2001 From: WangZhen <23097963+0x45f@users.noreply.github.com> Date: Wed, 22 Jun 2022 11:57:17 +0800 Subject: [PATCH] [JITLayer]Polish Layer implement and refine interface code (#43607) * Fix some TODO and polish code * Support GPU place * Fix layer_test ci error * Polish some code * Make GetFunction as const function * Remove COMMAND tar to fix CI error * Fix comments * Merge develop to fix codestyle error --- paddle/fluid/jit/CMakeLists.txt | 7 ++- paddle/fluid/jit/base_function.cc | 67 +++++++++++++++---------- paddle/fluid/jit/base_function.h | 28 +++++------ paddle/fluid/jit/exector_function.h | 20 +++++--- paddle/fluid/jit/layer.cc | 13 ++--- paddle/fluid/jit/layer.h | 9 ++-- paddle/fluid/jit/layer_test.cc | 77 +++++++++++++++++++++++------ paddle/fluid/jit/pe_function.h | 25 ++++++---- paddle/fluid/jit/serializer.cc | 30 ++++++----- paddle/fluid/jit/serializer.h | 7 ++- 10 files changed, 182 insertions(+), 101 deletions(-) diff --git a/paddle/fluid/jit/CMakeLists.txt b/paddle/fluid/jit/CMakeLists.txt index b44060c0fa..dabaabff8c 100644 --- a/paddle/fluid/jit/CMakeLists.txt +++ b/paddle/fluid/jit/CMakeLists.txt @@ -16,8 +16,11 @@ cc_library( if(WITH_TESTING AND NOT WIN32) add_custom_target( jit_download_program - COMMAND wget -nc https://paddle-ci.gz.bcebos.com/dy2st/Testing.tar.gz - COMMAND tar zxvf Testing.tar.gz) + COMMAND wget -nc + https://paddle-ci.gz.bcebos.com/dy2st/export.forward.pdiparams + COMMAND wget -nc + https://paddle-ci.gz.bcebos.com/dy2st/export.forward.pdmodel + COMMAND wget -nc https://paddle-ci.gz.bcebos.com/dy2st/export.infer.pdmodel) set(JIT_DEPS phi elementwise_add_op diff --git a/paddle/fluid/jit/base_function.cc b/paddle/fluid/jit/base_function.cc index fcbe64de8d..93521173d7 100644 --- a/paddle/fluid/jit/base_function.cc +++ b/paddle/fluid/jit/base_function.cc @@ -38,29 +38,28 @@ std::vector FunctionSchema::GetOutputArgNames() { return output_arg_names; } -void FunctionSchema::AddInputArg(std::string name, bool is_output) { - input_args.emplace_back(name, is_output); +void FunctionSchema::AddInputArg(std::string name) { + input_args.emplace_back(name, false); } -void FunctionSchema::AddOutputArg(std::string name, bool is_output) { - output_args.emplace_back(name, is_output); +void FunctionSchema::AddOutputArg(std::string name) { + output_args.emplace_back(name, true); } -BaseFunction::BaseFunction( - const framework::ProgramDesc &program_desc, - const std::vector param_names_for_program, - const VariableNameMap ¶ms_dict) - : program_desc_(program_desc) { +BaseFunction::BaseFunction(const framework::ProgramDesc &program_desc, + const std::vector ¶m_names, + const VariableNameMap ¶ms_dict, + const phi::Place &place) + : program_desc_(program_desc), place_(place) { // Parse FunctionSchema - // skip_var_name_ = program_desc_.GetFetchTargetNames(); for (auto &in_name : program_desc_.GetFeedTargetNames()) { - schema_.AddInputArg(in_name, false); + schema_.AddInputArg(in_name); } for (auto &out_name : program_desc_.GetFetchTargetNames()) { - schema_.AddOutputArg(out_name, true); + schema_.AddOutputArg(out_name); } // share params into scope - SharePartialIntoScope(param_names_for_program, params_dict); + ShareParamsIntoScope(param_names, params_dict); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); // remove feed fetch op RemoveFeedFetch(); @@ -70,7 +69,9 @@ void BaseFunction::FetchOutput(std::vector *outs) { for (auto &out_name : schema_.GetOutputArgNames()) { VLOG(3) << "fetch out: " << out_name; auto *var = scope_.FindVar(out_name); + VLOG(3) << "after scope_.FindVar(out_name);"; auto &src_tensor = var->Get(); + VLOG(3) << "var->Get();"; Variable v; auto *p = v.GetMutable(); *p = src_tensor; @@ -78,23 +79,30 @@ void BaseFunction::FetchOutput(std::vector *outs) { } } -void BaseFunction::ShareIntoScope(const VariableNameMap &ivals) { - VLOG(3) << "ivals size: " << ivals.size(); - for (auto it = ivals.begin(); it != ivals.end(); ++it) { - VLOG(3) << "share into scope: " << it->first; - DenseTensor dense_tensor = it->second.Get(); - auto *var = scope_.Var(it->first); +void BaseFunction::ShareInputsIntoScope(const std::vector &vars) { + VLOG(3) << "vars size: " << vars.size(); + std::vector ordered_input_names = schema_.GetInputArgNames(); + PADDLE_ENFORCE_EQ( + vars.size(), + ordered_input_names.size(), + platform::errors::InvalidArgument( + "vars.size() should be equal to ordered_input_names.size().")); + + for (size_t i = 0; i < vars.size(); i++) { + VLOG(3) << "share into scope: " << ordered_input_names[i]; + auto &dense_tensor = vars[i].Get(); + auto *var = scope_.Var(ordered_input_names[i]); auto *dst_tensor = var->GetMutable(); *dst_tensor = dense_tensor; } } -void BaseFunction::SharePartialIntoScope( - const std::vector param_names_for_program, +void BaseFunction::ShareParamsIntoScope( + const std::vector ¶m_names, const VariableNameMap ¶ms_dict) { - VLOG(3) << "ivals size: " << param_names_for_program.size(); - for (size_t i = 0; i < param_names_for_program.size(); ++i) { - std::string name = param_names_for_program[i]; + VLOG(3) << "param_names size: " << param_names.size(); + for (size_t i = 0; i < param_names.size(); ++i) { + std::string name = param_names[i]; Variable val = params_dict.find(name)->second; auto &dense_tensor = val.Get(); VLOG(3) << "share into scope: " << name; @@ -112,8 +120,15 @@ void BaseFunction::RemoveFeedFetch() { VLOG(3) << "op_size: " << op_size; for (int i = op_size - 1; i >= 0; i--) { auto op = all_ops[i]; - if (op->Type() == "feed" || op->Type() == "fetch") { - VLOG(3) << "remove op type: " << op->Type() << ", index: " << i; + if (op->Type() == "feed") { + VLOG(3) << "remove op type: " << op->Type() << ", index: " << i + << ", var name: " << op->Input("X")[0]; + block->RemoveVar(op->Input("X")[0]); + block->RemoveOp(i, i + 1); + } else if (op->Type() == "fetch") { + VLOG(3) << "remove op type: " << op->Type() << ", index: " << i + << ", var name: " << op->Output("Out")[0]; + block->RemoveVar(op->Output("Out")[0]); block->RemoveOp(i, i + 1); } } diff --git a/paddle/fluid/jit/base_function.h b/paddle/fluid/jit/base_function.h index 3d4f9a29eb..3f23ebcd97 100644 --- a/paddle/fluid/jit/base_function.h +++ b/paddle/fluid/jit/base_function.h @@ -20,8 +20,7 @@ #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/utils/none.h" -#include "paddle/utils/optional.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace jit { @@ -50,11 +49,12 @@ class FunctionSchema { std::vector GetOutputArgNames(); - void AddInputArg(std::string name, bool is_output); + void AddInputArg(std::string name); - void AddOutputArg(std::string name, bool is_output); + void AddOutputArg(std::string name); private: + // input_args and output_args are ordered std::vector input_args; std::vector output_args; }; @@ -63,33 +63,31 @@ class FunctionSchema { class BaseFunction { public: BaseFunction(const framework::ProgramDesc &program_desc, - const std::vector param_names_for_program, - const VariableNameMap ¶ms_dict); + const std::vector ¶m_names, + const VariableNameMap ¶ms_dict, + const phi::Place &place); virtual ~BaseFunction() {} - virtual std::vector operator()(const VariableNameMap &inputs) = 0; + virtual std::vector operator()( + const std::vector &inputs) = 0; protected: void FetchOutput(std::vector *outs); - void ShareIntoScope(const VariableNameMap &ivals); + void ShareInputsIntoScope(const std::vector &vars); - void SharePartialIntoScope( - const std::vector param_names_for_program, - const VariableNameMap ¶ms_dict); + void ShareParamsIntoScope(const std::vector ¶m_names, + const VariableNameMap ¶ms_dict); void RemoveFeedFetch(); protected: framework::ProgramDesc program_desc_; - // TODO(dev): need a better way to share params - // std::vector ¶m_for_program_; - // std::vector skip_var_name_; FunctionSchema schema_; // global_scope place params framework::Scope scope_; - // framework::Executor inner_exe_; + phi::Place place_; }; } // namespace jit diff --git a/paddle/fluid/jit/exector_function.h b/paddle/fluid/jit/exector_function.h index 3217c62fbd..29f8e6cdab 100644 --- a/paddle/fluid/jit/exector_function.h +++ b/paddle/fluid/jit/exector_function.h @@ -22,18 +22,23 @@ namespace jit { class ExectorFunction : public BaseFunction { public: ExectorFunction(const framework::ProgramDesc &program_desc, - const std::vector param_names_for_program, - const VariableNameMap ¶ms_dict) - : BaseFunction(program_desc, param_names_for_program, params_dict), - inner_exe_(phi::CPUPlace()) {} + const std::vector param_names, + const VariableNameMap ¶ms_dict, + const phi::Place &place) + : BaseFunction(program_desc, param_names, params_dict, place), + inner_exe_(place_) {} ~ExectorFunction() {} - std::vector operator()(const VariableNameMap &inputs) { + std::vector operator()(const std::vector &inputs) { // share input into scope - ShareIntoScope(inputs); + ShareInputsIntoScope(inputs); // run program - inner_exe_.Run(program_desc_, &scope_, /*blockID=*/0, false, true, + inner_exe_.Run(program_desc_, + &scope_, + /*blockID=*/0, + false, + true, schema_.GetOutputArgNames()); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); // fetch outputs @@ -43,7 +48,6 @@ class ExectorFunction : public BaseFunction { } private: - // TODO(dev): support other devices exe framework::Executor inner_exe_; }; diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index cb13a003af..1b4345f55c 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -23,23 +23,24 @@ Layer::Layer( const std::vector& func_names, const std::vector& program_descs, const std::vector>& param_names_for_each_program, - const VariableNameMap& params_dict) { + const VariableNameMap& params_dict, + const phi::Place& place) { VLOG(3) << "program size: " << program_descs.size(); // Layer manage the life time of all parameter. for (size_t i = 0; i < func_names.size(); ++i) { // TODO(dev): choose exector or pe by flag function_dict[func_names[i]] = std::make_shared( - program_descs[i], param_names_for_each_program[i], params_dict); + program_descs[i], param_names_for_each_program[i], params_dict, place); } } -// TODO(dev): make it as const function -std::shared_ptr Layer::GetFunction(const std::string& name) { +std::shared_ptr Layer::GetFunction( + const std::string& name) const { VLOG(3) << "funcs_ size: " << function_dict.size(); - return function_dict[name]; + return function_dict.at(name); } -std::vector Layer::forward(const VariableNameMap& inputs) { +std::vector Layer::forward(const std::vector& inputs) { auto func = GetFunction("forward"); return (*func)(inputs); } diff --git a/paddle/fluid/jit/layer.h b/paddle/fluid/jit/layer.h index 0c2ad49c77..aac7de8515 100644 --- a/paddle/fluid/jit/layer.h +++ b/paddle/fluid/jit/layer.h @@ -36,16 +36,17 @@ class Layer { // TODO(dev): Make vector, num_slot as in argument // Layer(const std::shared_ptr& type) : obj_(type, /*num_slot*/ 0U) // {} + // TODO(dev): consider make `func_name, program_desc, param_nams` as a class Layer( const std::vector& func_names, const std::vector& program_descs, const std::vector>& param_names_for_each_program, - const VariableNameMap& params_dict); + const VariableNameMap& params_dict, + const phi::Place& place); - // TODO(dev): make it as const function - std::shared_ptr GetFunction(const std::string& name); + std::shared_ptr GetFunction(const std::string& name) const; - std::vector forward(const VariableNameMap& inputs); + std::vector forward(const std::vector& inputs); private: // internal::Object obj_; diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc index 9386569d48..6f8b7a58b8 100644 --- a/paddle/fluid/jit/layer_test.cc +++ b/paddle/fluid/jit/layer_test.cc @@ -23,11 +23,13 @@ #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/jit/serializer.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" USE_OP_ITSELF(elementwise_add); @@ -44,28 +46,37 @@ PD_DECLARE_KERNEL(relu, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); +#if defined(PADDLE_WITH_CUDA) +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(relu, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); +#endif + namespace paddle { namespace jit { -VariableNameMap PrepareInputs() { - auto temp = DenseTensor(); - temp.Resize(phi::make_ddim({2, 4})); - phi::CPUContext cpu_ctx; - cpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() - .GetAllocator(paddle::platform::CPUPlace()) - .get()); - cpu_ctx.Init(); - cpu_ctx.Alloc(&temp); - phi::funcs::set_constant(cpu_ctx, &temp, 2.); +std::vector PrepareInputs() { + auto default_place = imperative::GetCurrentTracer()->ExpectedPlace(); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(default_place); + Variable v; - auto *p = v.GetMutable(); - *p = temp; - // TODO(dev): associate the input name - return {{"x", v}}; + auto* dense_tensor = v.GetMutable(); + dense_tensor->Resize(phi::make_ddim({2, 4})); + dense_tensor->mutable_data(default_place); + phi::funcs::set_constant(dev_ctx, dense_tensor, 2.); + + return {v}; } -TEST(layer, Construct) { - std::string path = "./Testing/"; +TEST(CpuLayerTest, Construct) { + auto tracer = std::make_shared(); + paddle::imperative::SetCurrentTracer(tracer); + imperative::GetCurrentTracer()->SetExpectedPlace(phi::CPUPlace()); + + std::string path = "./"; auto layer = jit::Load(path); auto inputs = PrepareInputs(); @@ -83,5 +94,39 @@ TEST(layer, Construct) { EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); } +#if defined(PADDLE_WITH_CUDA) +TEST(GpuLayerTest, Construct) { + auto tracer = std::make_shared(); + paddle::imperative::SetCurrentTracer(tracer); + imperative::GetCurrentTracer()->SetExpectedPlace(phi::GPUPlace(0)); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(imperative::GetCurrentTracer()->ExpectedPlace()); + const auto* dev_ctx_gpu = static_cast(&dev_ctx); + DenseTensor cpu_dense_tensor; + + std::string path = "./"; + auto layer = jit::Load(path); + auto inputs = PrepareInputs(); + + auto outs = layer.forward(inputs); + auto out_vars = outs[0]; + auto out_dense_tensor = out_vars.Get(); + phi::Copy( + *dev_ctx_gpu, out_dense_tensor, phi::CPUPlace(), true, &cpu_dense_tensor); + auto out_data = cpu_dense_tensor.data(); + EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); + + auto func = layer.GetFunction("infer"); + outs = (*func)(inputs); + out_vars = outs[0]; + out_dense_tensor = out_vars.Get(); + phi::Copy( + *dev_ctx_gpu, out_dense_tensor, phi::CPUPlace(), true, &cpu_dense_tensor); + out_data = cpu_dense_tensor.data(); + EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); +} +#endif + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/pe_function.h b/paddle/fluid/jit/pe_function.h index a3d7eb33f7..f4378da556 100644 --- a/paddle/fluid/jit/pe_function.h +++ b/paddle/fluid/jit/pe_function.h @@ -27,13 +27,14 @@ namespace jit { class PEFunction : public BaseFunction { public: PEFunction(const framework::ProgramDesc &program_desc, - const std::vector param_names_for_program, - const VariableNameMap ¶ms_dict) - : BaseFunction(program_desc, param_names_for_program, params_dict) {} + const std::vector param_names, + const VariableNameMap ¶ms_dict, + const phi::Place &place) + : BaseFunction(program_desc, param_names, params_dict, place) {} ~PEFunction() {} - std::vector operator()(const VariableNameMap &inputs) { + std::vector operator()(const std::vector &inputs) { // bool is_test = true; std::string prog_string; std::hash string_hash; @@ -43,15 +44,19 @@ class PEFunction : public BaseFunction { int64_t start_op_index = 0; int64_t end_op_index = static_cast(global_block.OpSize()); - ShareIntoScope(inputs); + ShareInputsIntoScope(inputs); std::vector input_var_names = schema_.GetInputArgNames(); std::vector output_var_names = schema_.GetOutputArgNames(); std::vector dout_var_names; if (end_op_index > start_op_index) { // TODO(dev): support other devices - auto cache_info = framework::GetExecutorInfoFromCache( - program_desc_, phi::CPUPlace(), start_op_index, end_op_index, - /*is_grad=*/false, program_id, &scope_); + auto cache_info = framework::GetExecutorInfoFromCache(program_desc_, + place_, + start_op_index, + end_op_index, + /*is_grad=*/false, + program_id, + &scope_); auto ¶llel_executor = cache_info.first; auto &skip_eager_delete_vars = framework::ExecutorInfoCache::Instance().SkipEagerDeleteVars( @@ -65,7 +70,9 @@ class PEFunction : public BaseFunction { dout_var_names.begin(), dout_var_names.end()); framework::details::ParseSafeEagerDeletionSkipVars( - program_desc_, end_op_index, output_var_names, + program_desc_, + end_op_index, + output_var_names, &skip_eager_delete_vars); } parallel_executor->RunWithoutFetch(skip_eager_delete_vars); diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc index a8bd934d12..587774c278 100644 --- a/paddle/fluid/jit/serializer.cc +++ b/paddle/fluid/jit/serializer.cc @@ -45,14 +45,18 @@ Layer Deserializer::operator()(const std::string& dir_path) { persistable_var_names.end()); } + auto default_place = imperative::GetCurrentTracer()->ExpectedPlace(); // Read from one pdiparams file, refine here - auto params_for_all_program = - ReadTensorData(dir_path + "export.forward.pdiparams", param_names_set); - params_dict.insert(params_for_all_program.begin(), - params_for_all_program.end()); + ReadTensorData(dir_path + "export.forward.pdiparams", + param_names_set, + default_place, + ¶ms_dict); - return Layer(func_names, program_descs, param_names_for_each_program, - params_dict); + return Layer(func_names, + program_descs, + param_names_for_each_program, + params_dict, + default_place); } bool Deserializer::IsPersistable(framework::VarDesc* desc_ptr) { @@ -74,6 +78,7 @@ bool Deserializer::EndsWith(const std::string& str, const std::string& suffix) { 0; } +// process filename like `export.forward.pdmodel` and `export.infer.pdmodel` const std::vector> Deserializer::GetPdmodelFileNamePrefix(const std::string& path) { std::vector> file_name_prefixs; @@ -92,23 +97,22 @@ Deserializer::GetPdmodelFileNamePrefix(const std::string& path) { return file_name_prefixs; } -VariableNameMap Deserializer::ReadTensorData( - const std::string& file_name, const std::set& var_name) const { +void Deserializer::ReadTensorData(const std::string& file_name, + const std::set& var_name, + const phi::Place& place, + VariableNameMap* params_dict) const { VLOG(3) << "ReadTensorData from: " << file_name; std::ifstream fin(file_name, std::ios::binary); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - // TODO(dev): Support other devices - auto& dev_ctx = *pool.Get(phi::CPUPlace()); - VariableNameMap res; + auto& dev_ctx = *pool.Get(place); for (auto it = var_name.begin(); it != var_name.end(); it++) { VLOG(3) << "load Tensor: " << *it; Variable v; // TODO(dev): Support framework::Vocab DenseTensor* dense_tesnor = v.GetMutable(); framework::DeserializeFromStream(fin, dense_tesnor, dev_ctx); - res[*it] = v; + (*params_dict)[*it] = v; } - return res; } framework::ProgramDesc Deserializer::LoadProgram(const std::string& file_name) { diff --git a/paddle/fluid/jit/serializer.h b/paddle/fluid/jit/serializer.h index 4036c5add7..1511b6b50f 100644 --- a/paddle/fluid/jit/serializer.h +++ b/paddle/fluid/jit/serializer.h @@ -23,6 +23,7 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/jit/layer.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/core/dense_tensor.h" @@ -58,8 +59,10 @@ class Deserializer { const std::vector> GetPdmodelFileNamePrefix(const std::string& path); - VariableNameMap ReadTensorData(const std::string& file_name, - const std::set& var_name) const; + void ReadTensorData(const std::string& file_name, + const std::set& var_name, + const phi::Place& place, + VariableNameMap* params_dict) const; // void ReadExtraInfo(const std::string& file_name) const; // void ReadByteCode(const std::string& file_name) const; -- GitLab