From 40d4d834b077408aa445879bb7bef94eaaf4577d Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 18 Aug 2021 18:31:44 +0800 Subject: [PATCH] code refactoring for new executor (#34970) * code refactoring, test=develop * refine, test=develop * refine, test=develop * refine, test=develop --- paddle/fluid/framework/CMakeLists.txt | 1 + paddle/fluid/framework/new_exec.h | 629 ------------------ paddle/fluid/framework/new_exec_test.cc | 88 --- .../framework/new_executor/CMakeLists.txt | 4 + .../framework/new_executor/interpretercore.cc | 471 +++++++++++++ .../framework/new_executor/interpretercore.h | 84 +++ .../interpretercore_util.h} | 2 +- .../new_executor/new_executor_defs.h | 79 +++ .../new_executor/standalone_executor.cc | 106 +++ .../new_executor/standalone_executor.h | 67 ++ .../new_executor/standalone_executor_test.cc | 64 ++ paddle/fluid/pybind/CMakeLists.txt | 1 + paddle/fluid/pybind/pybind.cc | 24 +- ...rpreter.py => test_standalone_executor.py} | 16 +- 14 files changed, 901 insertions(+), 735 deletions(-) delete mode 100644 paddle/fluid/framework/new_exec.h delete mode 100644 paddle/fluid/framework/new_exec_test.cc create mode 100644 paddle/fluid/framework/new_executor/CMakeLists.txt create mode 100644 paddle/fluid/framework/new_executor/interpretercore.cc create mode 100644 paddle/fluid/framework/new_executor/interpretercore.h rename paddle/fluid/framework/{new_exec_util.h => new_executor/interpretercore_util.h} (99%) create mode 100644 paddle/fluid/framework/new_executor/new_executor_defs.h create mode 100644 paddle/fluid/framework/new_executor/standalone_executor.cc create mode 100644 paddle/fluid/framework/new_executor/standalone_executor.h create mode 100644 paddle/fluid/framework/new_executor/standalone_executor_test.cc rename python/paddle/fluid/tests/unittests/interpreter/{test_interpreter.py => test_standalone_executor.py} (76%) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 6853b03c612..cf1b5c10bb5 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -25,6 +25,7 @@ add_subdirectory(ir) add_subdirectory(details) add_subdirectory(fleet) add_subdirectory(io) +add_subdirectory(new_executor) #ddim lib proto_library(framework_proto SRCS framework.proto) diff --git a/paddle/fluid/framework/new_exec.h b/paddle/fluid/framework/new_exec.h deleted file mode 100644 index defa7a96733..00000000000 --- a/paddle/fluid/framework/new_exec.h +++ /dev/null @@ -1,629 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/executor_gc_helper.h" -#include "paddle/fluid/framework/garbage_collector.h" -#include "paddle/fluid/framework/new_exec_util.h" -#include "paddle/fluid/framework/op_info.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/framework/variable_helper.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/init.h" - -// USE_OP(fill_constant); -// USE_OP(elementwise_add); - -// using namespace std; - -namespace paddle { -namespace framework { - -using std::cerr; -using std::endl; - -using OpKernelComputeFunc = std::function; -using OpKernelMap = - std::unordered_map; - -framework::ProgramDesc load_from_file(const std::string& file_name) { - std::ifstream fin(file_name, std::ios::in | std::ios::binary); - fin.seekg(0, std::ios::end); - std::string buffer(fin.tellg(), ' '); - fin.seekg(0, std::ios::beg); - fin.read(&buffer[0], buffer.size()); - fin.close(); - - ProgramDesc program_desc(buffer); - return program_desc; -} - -struct OpKernelFunc { - OpKernelComputeFunc compute_func_; - OperatorBase* operator_base_; -}; - -struct VariableMetaInfo { - int var_ref_count_; -}; - -struct VariableScope { - std::vector var_list; - std::map name2id; - std::vector vec_meta_info_; -}; - -struct NextInstruction { - std::vector direct_run_; -}; - -struct EventInter {}; - -struct InstructionInfo { - std::vector dependecy_count_; -}; - -struct EventRun { - EventInter event_inter; - std::vector same_device_run_; - std::vector synchronized_run; -}; - -struct Instruction { - OpKernelFunc kernel_func_; - std::map> input_index_; - std::map> output_index_; - - std::vector gc_check_var_list; - NextInstruction next_instruction_; - std::vector vec_event_list_; -}; - -struct OpFuncNode { - // int unsed; - std::map> input_index; - std::map> output_index; - - OpKernelComputeFunc kernel_func_; -}; - -int convert(const platform::Place& place) { - if (is_cpu_place(place)) { - return 0; - } - if (is_gpu_place(place)) { - return 1; - } - - return -1; -} - -std::vector merge_vec(const std::vector& first, - const std::vector& second) { - std::vector out(first.size() + second.size()); - std::merge(first.begin(), first.end(), second.begin(), second.end(), - out.begin()); - - std::vector::iterator it; - it = std::unique(out.begin(), out.end()); - - out.resize(std::distance(out.begin(), it)); - - return out; -} - -void build_variable_outer_scope(const framework::ProgramDesc& pdesc, - VariableScope* var_scope, Scope* outer_scope) { - auto& global_block = pdesc.Block(0); - - for (auto& var : global_block.AllVars()) { - if (var->Name() == framework::kEmptyVarName) { - continue; - } - auto v = outer_scope->Var(var->Name()); - - if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) { - var_scope->name2id[var->Name()] = var_scope->var_list.size(); - } - - InitializeVariable(v, var->GetType()); - var_scope->var_list.push_back(v); - } -} - -void build_variable_scope(const framework::ProgramDesc& pdesc, - VariableScope* var_scope) { - auto& global_block = pdesc.Block(0); - - for (auto& var : global_block.AllVars()) { - if (var->Name() == framework::kEmptyVarName) { - continue; - } - - if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) { - var_scope->name2id[var->Name()] = var_scope->var_list.size(); - } - - auto v = new Variable(); - InitializeVariable(v, var->GetType()); - var_scope->var_list.push_back(v); - } -} - -void build_op_func_list(const framework::ProgramDesc& pdesc, - std::vector* op_list, - std::vector* vec_func_list, - VariableScope* var_scope, - const platform::Place& place) { - auto& global_block = pdesc.Block(0); - - for (auto& op : global_block.AllOps()) { - VLOG(3) << op->Type(); - // << op->Type() << endl; - - auto& info = OpInfoMap::Instance().Get(op->Type()); - - const VariableNameMap& inputs_names = op->Inputs(); - const VariableNameMap& outputs_names = op->Outputs(); - AttributeMap op_attr_map = op->GetAttrMap(); - - if (info.Checker() != nullptr) { - info.Checker()->Check(&op_attr_map); - } - auto op_base = - info.Creator()(op->Type(), inputs_names, outputs_names, op_attr_map); - - OpFuncNode op_func_node; - - VariableValueMap ins_map; - std::map> ins_name2id; - for (auto& var_name_item : inputs_names) { - std::vector input_vars; - std::vector vec_ids; - input_vars.reserve(var_name_item.second.size()); - for (auto& var_name : var_name_item.second) { - auto it = var_scope->name2id.find(var_name); - assert(it != var_scope->name2id.end()); - input_vars.push_back(var_scope->var_list[it->second]); - vec_ids.push_back(it->second); - } - ins_map[var_name_item.first] = input_vars; - ins_name2id[var_name_item.first] = vec_ids; - } - - VariableValueMap outs_map; - std::map> outs_name2id; - for (auto& var_name_item : outputs_names) { - std::vector output_vars; - std::vector vec_ids; - output_vars.reserve(var_name_item.second.size()); - for (auto& var_name : var_name_item.second) { - auto it = var_scope->name2id.find(var_name); - assert(it != var_scope->name2id.end()); - output_vars.push_back(var_scope->var_list[it->second]); - vec_ids.push_back(it->second); - } - outs_map[var_name_item.first] = output_vars; - outs_name2id[var_name_item.first] = vec_ids; - } - - op_func_node.input_index = ins_name2id; - op_func_node.output_index = outs_name2id; - RuntimeContext runtime_context({}, {}); - runtime_context.inputs.swap(ins_map); - runtime_context.outputs.swap(outs_map); - RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context); - static_cast(op_base)->InferShape( - &infer_shape_ctx); - auto& all_op_kernels = OperatorWithKernel::AllOpKernels(); - auto kernels_iter = all_op_kernels.find(op->Type()); - PADDLE_ENFORCE_NE( - kernels_iter, all_op_kernels.end(), - platform::errors::Unavailable( - "There are no kernels which are registered in the %s operator.", - op->Type())); - - OpKernelMap& kernels = kernels_iter->second; - // auto place = platform::CPUPlace(); - // auto place = platform::CUDAPlace(0); - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto* dev_ctx = pool.Get(place); - Scope scope; - auto exec_ctx = - ExecutionContext(*op_base, scope, *dev_ctx, runtime_context); - auto expected_kernel_key = - dynamic_cast(op_base) - ->GetExpectedKernelType(exec_ctx); - - VariableValueMap& ins_map_temp = runtime_context.inputs; - - for (auto& var_name_item : ins_map_temp) { - for (size_t i = 0; i < var_name_item.second.size(); ++i) { - auto var = var_name_item.second[i]; - auto tensor_in = static_cast(&(var->Get())); - if (!tensor_in->IsInitialized()) { - continue; - } - auto kernel_type_for_var = - static_cast(op_base) - ->GetKernelTypeForVar(var_name_item.first, *tensor_in, - expected_kernel_key); - if (!platform::is_same_place(kernel_type_for_var.place_, - expected_kernel_key.place_)) { - // need trans place - // 1. add var in scope - // 2. add copy op - std::string new_var_name = - "temp_1" + std::to_string(var_scope->var_list.size() + 1); - auto v = new Variable(); - v->GetMutable(); - var_scope->name2id[new_var_name] = var_scope->var_list.size(); - var_scope->var_list.push_back(v); - - VariableNameMap copy_in_map; - auto x_iter = inputs_names.find(var_name_item.first); - copy_in_map["X"] = {x_iter->second[i]}; - VariableNameMap copy_out_map; - copy_out_map["Out"] = {new_var_name}; - AttributeMap attr_map; - attr_map["dst_place_type"] = convert(place); - - std::map> copy_ins_name2id; - copy_ins_name2id["X"] = ins_name2id[var_name_item.first]; - std::map> copy_out_name2id; - copy_out_name2id["Out"] = {var_scope->name2id[new_var_name]}; - - op_func_node.input_index[var_name_item.first][i] = - var_scope->name2id[new_var_name]; - - VariableValueMap copy_ins_value_map; - copy_ins_value_map["X"] = {var}; - VariableValueMap copy_outs_value_map; - copy_outs_value_map["Out"] = {v}; - - auto& copy_info = OpInfoMap::Instance().Get("memcpy"); - auto copy_op = copy_info.Creator()("memcpy", copy_in_map, - copy_out_map, attr_map); - OpFuncNode copy_op_func_node; - copy_op_func_node.input_index = copy_ins_name2id; - copy_op_func_node.output_index = copy_out_name2id; - - RuntimeContext copy_runtime_context({}, {}); - copy_runtime_context.inputs.swap(copy_ins_value_map); - copy_runtime_context.outputs.swap(copy_outs_value_map); - RuntimeInferShapeContext copy_infer_shape_ctx(*copy_op, - copy_runtime_context); - static_cast(copy_op) - ->InferShape(©_infer_shape_ctx); - auto& all_op_kernels = OperatorWithKernel::AllOpKernels(); - auto kernels_iter = all_op_kernels.find("memcpy"); - PADDLE_ENFORCE_NE(kernels_iter, all_op_kernels.end(), - platform::errors::Unavailable( - "There are no kernels which are registered in " - "the memcpy operator.")); - - OpKernelMap& kernels = kernels_iter->second; - platform::DeviceContextPool& pool = - platform::DeviceContextPool::Instance(); - auto* dev_ctx = pool.Get(place); - Scope scope; - auto copy_exec_ctx = - ExecutionContext(*copy_op, scope, *dev_ctx, copy_runtime_context); - auto expected_kernel_key = - dynamic_cast(copy_op) - ->GetExpectedKernelType(copy_exec_ctx); - auto kernel_iter = kernels.find(expected_kernel_key); - copy_op_func_node.kernel_func_ = - OpKernelComputeFunc(kernel_iter->second); - copy_op_func_node.kernel_func_(copy_exec_ctx); - op_list->push_back(copy_op); - vec_func_list->push_back(copy_op_func_node); - - var_name_item.second[i] = v; - } - } - } - - op_list->push_back(op_base); - - auto kernel_iter = kernels.find(expected_kernel_key); - PADDLE_ENFORCE_NE(kernel_iter, kernels.end(), - platform::errors::NotFound( - "Operator (%s) does not have kernel for %s.", - op->Type(), KernelTypeToString(expected_kernel_key))); - - op_func_node.kernel_func_ = OpKernelComputeFunc(kernel_iter->second); - op_func_node.kernel_func_(exec_ctx); - vec_func_list->push_back(op_func_node); - } -} - -class InterpreterCore { - public: - InterpreterCore(const platform::Place& place, const ProgramDesc& prog, - const ProgramDesc& startup_prog, Scope* scope) - : place_(place), prog_(prog), outer_scope_(scope) { - paddle::framework::InitDevices(); - - is_build_ = false; - - if (outer_scope_ != nullptr) { - auto name_list = outer_scope_->LocalVarNames(); - for (auto name : name_list) { - auto v = outer_scope_->Var(name); - if (global_scope.name2id.find(name) == global_scope.name2id.end()) { - global_scope.name2id[name] = global_scope.var_list.size(); - } - - global_scope.var_list.push_back(v); - } - } - - paddle::framework::build_variable_outer_scope(startup_prog, &global_scope, - outer_scope_); - - std::vector vec_func_list; - std::vector op_list; - paddle::framework::build_op_func_list( - startup_prog, &op_list, &vec_func_list, &global_scope, place_); - // add variable to outer_scope - } - void run(const std::vector& vec_name, - const std::vector& vec_tensor, - const std::vector& vec_fetch_name, - std::vector* vec_out) { - if (is_build_ == false) { - paddle::framework::build_variable_scope(prog_, &global_scope); - } - for (size_t i = 0; i < vec_name.size(); ++i) { - auto it = global_scope.name2id.find(vec_name[i]); - assert(it != global_scope.name2id.end()); - - auto feed_tensor = - global_scope.var_list[it->second]->GetMutable(); - feed_tensor->ShareDataWith(vec_tensor[i]); - } - - if (is_build_ == false) { - paddle::framework::build_op_func_list(prog_, &op_list, &vec_func_list, - &global_scope, place_); - is_build_ = true; - // convert vec func_list to graph - convert(); - } else { - exec_instruction_list(vec_instruction_, global_scope, place_); - } - - for (size_t i = 0; i < vec_fetch_name.size(); ++i) { - auto it = global_scope.name2id.find(vec_fetch_name[i]); - assert(it != global_scope.name2id.end()); - PADDLE_ENFORCE_NE(it, global_scope.name2id.end(), - platform::errors::NotFound( - "Can't find (%d) the fetch var (%s) in scope", i, - vec_fetch_name[i])); - - auto fetch_tensor = - global_scope.var_list[it->second]->GetMutable(); - - if (platform::is_gpu_place(fetch_tensor->place())) { - Tensor out; - platform::DeviceContextPool& pool = - platform::DeviceContextPool::Instance(); - auto* dev_ctx = pool.Get(place_); - dev_ctx->Wait(); - TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out); - dev_ctx->Wait(); - vec_out->push_back(out); - } else { - Tensor out; - TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out); - vec_out->push_back(out); - } - } - } - - private: - void convert() { - input_var2op_info_.resize(global_scope.var_list.size()); - - vec_instruction_.reserve(vec_func_list.size()); - dependecy_count_.resize(vec_func_list.size()); - global_scope.vec_meta_info_.resize(global_scope.var_list.size()); - for (size_t i = 0; i < vec_func_list.size(); ++i) { - Instruction temp_inst; - temp_inst.kernel_func_.compute_func_ = vec_func_list[i].kernel_func_; - temp_inst.kernel_func_.operator_base_ = op_list[i]; - temp_inst.input_index_ = vec_func_list[i].input_index; - temp_inst.output_index_ = vec_func_list[i].output_index; - - std::vector gc_check_input_list; - for (auto& item : vec_func_list[i].input_index) { - for (auto id : item.second) { - input_var2op_info_[id].push_back(i); - gc_check_input_list.push_back(id); - } - } - std::sort(gc_check_input_list.begin(), gc_check_input_list.end()); - auto last = - std::unique(gc_check_input_list.begin(), gc_check_input_list.end()); - gc_check_input_list.erase(last, gc_check_input_list.end()); - for (auto var_id : gc_check_input_list) { - global_scope.vec_meta_info_[var_id].var_ref_count_++; - } - - temp_inst.gc_check_var_list.swap(gc_check_input_list); - - vec_instruction_.push_back(temp_inst); - } - - for (size_t i = 0; i < vec_instruction_.size(); ++i) { - std::vector vec_temp; - for (auto& item : vec_instruction_[i].output_index_) { - for (auto id : item.second) { - vec_temp = merge_vec(vec_temp, input_var2op_info_[id]); - } - } - - // In Program, op order is a very import information. - // Op can noly add op after it as next as next ops. - std::vector filter_next; - filter_next.reserve(vec_temp.size()); - for (auto item : vec_temp) { - if (item > i) { - filter_next.push_back(item); - } - } - vec_instruction_[i].next_instruction_.direct_run_ = filter_next; - - // checkout ouput - for (auto& item : vec_instruction_[i].output_index_) { - for (auto id : item.second) { - if (input_var2op_info_[id].size() == 0) { - // output var not be used by any kernel - vec_instruction_[i].gc_check_var_list.push_back(id); - global_scope.vec_meta_info_[id].var_ref_count_++; - } - } - } - - for (auto inst_id : filter_next) { - dependecy_count_[inst_id]++; - } - } - } - - void run_instr(const Instruction& instr_node, const VariableScope& var_scope, - const platform::Place& place) { - auto op_base = instr_node.kernel_func_.operator_base_; - // build runtime cost - VariableValueMap ins_map; - for (auto& var_name_item : instr_node.input_index_) { - std::vector input_vars; - - input_vars.reserve(var_name_item.second.size()); - for (auto& id : var_name_item.second) { - input_vars.emplace_back(var_scope.var_list[id]); - } - ins_map.emplace(var_name_item.first, std::move(input_vars)); - } - - VariableValueMap outs_map; - for (auto& var_name_item : instr_node.output_index_) { - std::vector out_vars; - - out_vars.reserve(var_name_item.second.size()); - for (auto& id : var_name_item.second) { - out_vars.emplace_back(var_scope.var_list[id]); - } - outs_map.emplace(var_name_item.first, std::move(out_vars)); - } - - RuntimeContext runtime_context({}, {}); - runtime_context.inputs.swap(ins_map); - runtime_context.outputs.swap(outs_map); - - RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context); - - static_cast(op_base)->InferShape( - &infer_shape_ctx); - - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - auto* dev_ctx = pool.Get(place); - Scope scope; - - auto exec_context = - ExecutionContext(*op_base, scope, *dev_ctx, runtime_context); - - instr_node.kernel_func_.compute_func_(exec_context); - } - - void exec_instruction_list(const std::vector& vec_instr, - const VariableScope& var_scope, - const platform::Place& place) { - std::queue working_queue; - auto working_dependecy_count = dependecy_count_; - for (size_t i = 0; i < dependecy_count_.size(); ++i) { - if (dependecy_count_[i] == 0) { - working_queue.push(i); - } - } - - auto working_var_ref = global_scope.vec_meta_info_; - - size_t run_op_number = 0; - while (!working_queue.empty()) { - auto instr_id = working_queue.front(); - working_queue.pop(); - auto& instr_node = vec_instr[instr_id]; - run_instr(instr_node, var_scope, place); - - auto& next_instr = instr_node.next_instruction_.direct_run_; - ++run_op_number; - - for (auto next_i : next_instr) { - --working_dependecy_count[next_i]; - if (working_dependecy_count[next_i] == 0) { - working_queue.push(next_i); - } - } - - // GC infomation - - auto& gc_check_list = instr_node.gc_check_var_list; - for (auto var_id : gc_check_list) { - --working_var_ref[var_id].var_ref_count_; - } - } - - for (size_t i = 0; i < working_var_ref.size(); ++i) { - if (working_var_ref[i].var_ref_count_ != 0) { - cerr << " var ref is not zero " << i << endl; - } - } - } - - const platform::Place& place_; - const ProgramDesc& prog_; - paddle::framework::VariableScope global_scope; - std::vector vec_func_list; - std::vector op_list; - - bool is_build_; - - std::vector vec_instruction_; - - InstructionInfo instruction_info_; - - std::vector dependecy_count_; - std::vector ref_coun_info; - std::vector> input_var2op_info_; - - Scope* outer_scope_; -}; -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/framework/new_exec_test.cc b/paddle/fluid/framework/new_exec_test.cc deleted file mode 100644 index 7bfb6b6540c..00000000000 --- a/paddle/fluid/framework/new_exec_test.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/executor_gc_helper.h" -#include "paddle/fluid/framework/garbage_collector.h" -#include "paddle/fluid/framework/op_info.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/platform/device_context.h" - -#include "paddle/fluid/pybind/pybind.h" - -#include "gperftools/profiler.h" -#include "paddle/fluid/framework/new_exec.h" -#include "paddle/fluid/platform/init.h" - -int main() { - paddle::framework::InitDevices(); - paddle::framework::VariableScope global_scope; - auto place = paddle::platform::CUDAPlace(0); - auto test_prog = paddle::framework::load_from_file("lm_startup_program"); - { - paddle::framework::build_variable_scope(test_prog, &global_scope); - - std::vector vec_func_list; - std::vector op_list; - paddle::framework::build_op_func_list(test_prog, op_list, vec_func_list, - &global_scope, place); - - // paddle::framework::exec_op_func_list( vec_func_list, op_list, - // global_scope, place ); - } - - cerr << "run main" << endl; - auto main_prog = paddle::framework::load_from_file("lm_main_program"); - - paddle::framework::build_variable_scope(main_prog, &global_scope); - - std::vector vec_main_func_list; - std::vector op_main_list; - paddle::framework::build_op_func_list( - main_prog, op_main_list, vec_main_func_list, &global_scope, place); - paddle::framework::Scope scope; - paddle::framework::InterpreterCore interp_core(place, main_prog, test_prog, - &scope); - auto start = std::chrono::steady_clock::now(); - ProfilerStart("new_executor.prof"); - for (size_t i = 0; i < 2320; ++i) { - if (i % 200 == 0) { - cerr << i << endl; - } - // paddle::framework::exec_op_func_list( vec_main_func_list, op_main_list, - // global_scope, place ); - std::vector vec_out; - interp_core.run({}, {}, {}, vec_out); - } - ProfilerStop(); - auto end = std::chrono::steady_clock::now(); - std::chrono::duration diff = end - start; - - cerr << "time cost " << diff.count() << endl; - - return 1; -} diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt new file mode 100644 index 00000000000..80f9d343de0 --- /dev/null +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -0,0 +1,4 @@ +cc_library(interpretercore SRCS interpretercore.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler) +cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler) + +# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler) diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc new file mode 100644 index 00000000000..7f6091742f0 --- /dev/null +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -0,0 +1,471 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/fluid/framework/new_executor/interpretercore.h" + +namespace paddle { +namespace framework { + +InterpreterCore::InterpreterCore(const platform::Place& place, + const ProgramDesc& main_prog, + VariableScope* global_scope, + const std::vector& feed_names, + const std::vector& fetch_names) + : place_(place), main_program_(main_prog), global_scope_(global_scope) { + is_build_ = false; + feed_names_ = feed_names; + fetch_names_ = fetch_names; + // add feedop and fetchop to main_program + + // prune + + // optmize graph pass + + // convert to run graph +} + +void InterpreterCore::Run(const std::vector& feed_tensors, + std::vector* fetch_tensors) { + if (is_build_ == false) { + BuildVariableScope(main_program_, global_scope_); + } + for (size_t i = 0; i < feed_names_.size(); ++i) { + auto it = global_scope_->name2id.find(feed_names_[i]); + assert(it != global_scope_->name2id.end()); + + auto feed_tensor = + global_scope_->var_list[it->second]->GetMutable(); + feed_tensor->ShareDataWith(feed_tensors[i]); + } + + if (is_build_ == false) { + BuildOpFuncList(place_, main_program_, &op_list_, &vec_func_list_, + global_scope_); + is_build_ = true; + // convert vec func_list to graph + Convert(); + } else { + ExecuteInstructionList(vec_instruction_, *global_scope_, place_); + } + + for (size_t i = 0; i < fetch_names_.size(); ++i) { + auto it = global_scope_->name2id.find(fetch_names_[i]); + assert(it != global_scope_->name2id.end()); + PADDLE_ENFORCE_NE( + it, global_scope_->name2id.end(), + platform::errors::NotFound( + "Can't find (%d) the fetch var (%s) in scope", i, fetch_names_[i])); + + auto fetch_tensor = + global_scope_->var_list[it->second]->GetMutable(); + + if (platform::is_gpu_place(fetch_tensor->place())) { + Tensor out; + platform::DeviceContextPool& pool = + platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(place_); + dev_ctx->Wait(); + TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out); + dev_ctx->Wait(); + fetch_tensors->push_back(out); + } else { + Tensor out; + TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out); + fetch_tensors->push_back(out); + } + } +} + +void InterpreterCore::Convert() { + input_var2op_info_.resize(global_scope_->var_list.size()); + + vec_instruction_.reserve(vec_func_list_.size()); + dependecy_count_.resize(vec_func_list_.size()); + vec_meta_info_.resize(global_scope_->var_list.size()); + for (size_t i = 0; i < vec_func_list_.size(); ++i) { + Instruction temp_inst; + temp_inst.kernel_func_.compute_func_ = vec_func_list_[i].kernel_func_; + temp_inst.kernel_func_.operator_base_ = op_list_[i]; + temp_inst.input_index_ = vec_func_list_[i].input_index; + temp_inst.output_index_ = vec_func_list_[i].output_index; + + std::vector gc_check_input_list; + for (auto& item : vec_func_list_[i].input_index) { + for (auto id : item.second) { + input_var2op_info_[id].push_back(i); + gc_check_input_list.push_back(id); + } + } + std::sort(gc_check_input_list.begin(), gc_check_input_list.end()); + auto last = + std::unique(gc_check_input_list.begin(), gc_check_input_list.end()); + gc_check_input_list.erase(last, gc_check_input_list.end()); + for (auto var_id : gc_check_input_list) { + vec_meta_info_[var_id].var_ref_count_++; + } + + temp_inst.gc_check_var_list.swap(gc_check_input_list); + + vec_instruction_.push_back(temp_inst); + } + + for (size_t i = 0; i < vec_instruction_.size(); ++i) { + std::vector vec_temp; + for (auto& item : vec_instruction_[i].output_index_) { + for (auto id : item.second) { + vec_temp = MergeVector(vec_temp, input_var2op_info_[id]); + } + } + + // In Program, op order is a very import information. + // Op can noly add op after it as next as next ops. + std::vector filter_next; + filter_next.reserve(vec_temp.size()); + for (auto item : vec_temp) { + if (item > i) { + filter_next.push_back(item); + } + } + vec_instruction_[i].next_instruction_.direct_run_ = filter_next; + + // checkout ouput + for (auto& item : vec_instruction_[i].output_index_) { + for (auto id : item.second) { + if (input_var2op_info_[id].size() == 0) { + // output var not be used by any kernel + vec_instruction_[i].gc_check_var_list.push_back(id); + vec_meta_info_[id].var_ref_count_++; + } + } + } + + for (auto inst_id : filter_next) { + dependecy_count_[inst_id]++; + } + } +} + +void InterpreterCore::RunInstruction(const Instruction& instr_node, + const VariableScope& var_scope, + const platform::Place& place) { + auto op_base = instr_node.kernel_func_.operator_base_; + // build runtime cost + VariableValueMap ins_map; + for (auto& var_name_item : instr_node.input_index_) { + std::vector input_vars; + + input_vars.reserve(var_name_item.second.size()); + for (auto& id : var_name_item.second) { + input_vars.emplace_back(var_scope.var_list[id]); + } + ins_map.emplace(var_name_item.first, std::move(input_vars)); + } + + VariableValueMap outs_map; + for (auto& var_name_item : instr_node.output_index_) { + std::vector out_vars; + + out_vars.reserve(var_name_item.second.size()); + for (auto& id : var_name_item.second) { + out_vars.emplace_back(var_scope.var_list[id]); + } + outs_map.emplace(var_name_item.first, std::move(out_vars)); + } + + RuntimeContext runtime_context({}, {}); + runtime_context.inputs.swap(ins_map); + runtime_context.outputs.swap(outs_map); + + RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context); + + static_cast(op_base)->InferShape( + &infer_shape_ctx); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(place); + Scope scope; + + auto exec_context = + ExecutionContext(*op_base, scope, *dev_ctx, runtime_context); + + instr_node.kernel_func_.compute_func_(exec_context); +} + +void InterpreterCore::ExecuteInstructionList( + const std::vector& vec_instr, const VariableScope& var_scope, + const platform::Place& place) { + std::queue working_queue; + auto working_dependecy_count = dependecy_count_; + for (size_t i = 0; i < dependecy_count_.size(); ++i) { + if (dependecy_count_[i] == 0) { + working_queue.push(i); + } + } + + auto working_var_ref = vec_meta_info_; + + size_t run_op_number = 0; + while (!working_queue.empty()) { + auto instr_id = working_queue.front(); + working_queue.pop(); + auto& instr_node = vec_instr[instr_id]; + RunInstruction(instr_node, var_scope, place); + + auto& next_instr = instr_node.next_instruction_.direct_run_; + ++run_op_number; + + for (auto next_i : next_instr) { + --working_dependecy_count[next_i]; + if (working_dependecy_count[next_i] == 0) { + working_queue.push(next_i); + } + } + + // GC infomation + + auto& gc_check_list = instr_node.gc_check_var_list; + for (auto var_id : gc_check_list) { + --working_var_ref[var_id].var_ref_count_; + } + } + + for (size_t i = 0; i < working_var_ref.size(); ++i) { + if (working_var_ref[i].var_ref_count_ != 0) { + std::cerr << " var ref is not zero " << i << std::endl; + } + } +} + +std::vector InterpreterCore::MergeVector( + const std::vector& first, const std::vector& second) { + std::vector out(first.size() + second.size()); + std::merge(first.begin(), first.end(), second.begin(), second.end(), + out.begin()); + + std::vector::iterator it; + it = std::unique(out.begin(), out.end()); + + out.resize(std::distance(out.begin(), it)); + + return out; +} + +void InterpreterCore::BuildVariableScope(const framework::ProgramDesc& pdesc, + VariableScope* var_scope) { + auto& global_block = pdesc.Block(0); + + for (auto& var : global_block.AllVars()) { + if (var->Name() == framework::kEmptyVarName) { + continue; + } + + if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) { + var_scope->name2id[var->Name()] = var_scope->var_list.size(); + auto v = new Variable(); + InitializeVariable(v, var->GetType()); + var_scope->var_list.push_back(v); + } + } +} + +void InterpreterCore::BuildOpFuncList(const platform::Place& place, + const framework::ProgramDesc& pdesc, + std::vector* op_list, + std::vector* vec_func_list, + VariableScope* var_scope) { + auto& global_block = pdesc.Block(0); + + for (auto& op : global_block.AllOps()) { + VLOG(3) << op->Type(); + // << op->Type() << endl; + + auto& info = OpInfoMap::Instance().Get(op->Type()); + + const VariableNameMap& inputs_names = op->Inputs(); + const VariableNameMap& outputs_names = op->Outputs(); + AttributeMap op_attr_map = op->GetAttrMap(); + + if (info.Checker() != nullptr) { + info.Checker()->Check(&op_attr_map); + } + auto op_base = + info.Creator()(op->Type(), inputs_names, outputs_names, op_attr_map); + + OpFuncNode op_func_node; + + VariableValueMap ins_map; + std::map> ins_name2id; + for (auto& var_name_item : inputs_names) { + std::vector input_vars; + std::vector vec_ids; + input_vars.reserve(var_name_item.second.size()); + for (auto& var_name : var_name_item.second) { + auto it = var_scope->name2id.find(var_name); + assert(it != var_scope->name2id.end()); + input_vars.push_back(var_scope->var_list[it->second]); + vec_ids.push_back(it->second); + } + ins_map[var_name_item.first] = input_vars; + ins_name2id[var_name_item.first] = vec_ids; + } + + VariableValueMap outs_map; + std::map> outs_name2id; + for (auto& var_name_item : outputs_names) { + std::vector output_vars; + std::vector vec_ids; + output_vars.reserve(var_name_item.second.size()); + for (auto& var_name : var_name_item.second) { + auto it = var_scope->name2id.find(var_name); + assert(it != var_scope->name2id.end()); + output_vars.push_back(var_scope->var_list[it->second]); + vec_ids.push_back(it->second); + } + outs_map[var_name_item.first] = output_vars; + outs_name2id[var_name_item.first] = vec_ids; + } + + op_func_node.input_index = ins_name2id; + op_func_node.output_index = outs_name2id; + RuntimeContext runtime_context({}, {}); + runtime_context.inputs.swap(ins_map); + runtime_context.outputs.swap(outs_map); + RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context); + static_cast(op_base)->InferShape( + &infer_shape_ctx); + auto& all_op_kernels = OperatorWithKernel::AllOpKernels(); + auto kernels_iter = all_op_kernels.find(op->Type()); + PADDLE_ENFORCE_NE( + kernels_iter, all_op_kernels.end(), + platform::errors::Unavailable( + "There are no kernels which are registered in the %s operator.", + op->Type())); + + OpKernelMap& kernels = kernels_iter->second; + // auto place = platform::CPUPlace(); + // auto place = platform::CUDAPlace(0); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(place); + Scope scope; + auto exec_ctx = + ExecutionContext(*op_base, scope, *dev_ctx, runtime_context); + auto expected_kernel_key = + dynamic_cast(op_base) + ->GetExpectedKernelType(exec_ctx); + + VariableValueMap& ins_map_temp = runtime_context.inputs; + + for (auto& var_name_item : ins_map_temp) { + for (size_t i = 0; i < var_name_item.second.size(); ++i) { + auto var = var_name_item.second[i]; + auto tensor_in = static_cast(&(var->Get())); + if (!tensor_in->IsInitialized()) { + continue; + } + auto kernel_type_for_var = + static_cast(op_base) + ->GetKernelTypeForVar(var_name_item.first, *tensor_in, + expected_kernel_key); + if (!platform::is_same_place(kernel_type_for_var.place_, + expected_kernel_key.place_)) { + // need trans place + // 1. add var in scope + // 2. add copy op + std::string new_var_name = + "temp_1" + std::to_string(var_scope->var_list.size() + 1); + auto v = new Variable(); + v->GetMutable(); + var_scope->name2id[new_var_name] = var_scope->var_list.size(); + var_scope->var_list.push_back(v); + + VariableNameMap copy_in_map; + auto x_iter = inputs_names.find(var_name_item.first); + copy_in_map["X"] = {x_iter->second[i]}; + VariableNameMap copy_out_map; + copy_out_map["Out"] = {new_var_name}; + AttributeMap attr_map; + attr_map["dst_place_type"] = + is_cpu_place(place) ? 0 : is_gpu_place(place) ? 1 : -1; + + std::map> copy_ins_name2id; + copy_ins_name2id["X"] = ins_name2id[var_name_item.first]; + std::map> copy_out_name2id; + copy_out_name2id["Out"] = {var_scope->name2id[new_var_name]}; + + op_func_node.input_index[var_name_item.first][i] = + var_scope->name2id[new_var_name]; + + VariableValueMap copy_ins_value_map; + copy_ins_value_map["X"] = {var}; + VariableValueMap copy_outs_value_map; + copy_outs_value_map["Out"] = {v}; + + auto& copy_info = OpInfoMap::Instance().Get("memcpy"); + auto copy_op = copy_info.Creator()("memcpy", copy_in_map, + copy_out_map, attr_map); + OpFuncNode copy_op_func_node; + copy_op_func_node.input_index = copy_ins_name2id; + copy_op_func_node.output_index = copy_out_name2id; + + RuntimeContext copy_runtime_context({}, {}); + copy_runtime_context.inputs.swap(copy_ins_value_map); + copy_runtime_context.outputs.swap(copy_outs_value_map); + RuntimeInferShapeContext copy_infer_shape_ctx(*copy_op, + copy_runtime_context); + static_cast(copy_op) + ->InferShape(©_infer_shape_ctx); + auto& all_op_kernels = OperatorWithKernel::AllOpKernels(); + auto kernels_iter = all_op_kernels.find("memcpy"); + PADDLE_ENFORCE_NE(kernels_iter, all_op_kernels.end(), + platform::errors::Unavailable( + "There are no kernels which are registered in " + "the memcpy operator.")); + + OpKernelMap& kernels = kernels_iter->second; + platform::DeviceContextPool& pool = + platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(place); + Scope scope; + auto copy_exec_ctx = + ExecutionContext(*copy_op, scope, *dev_ctx, copy_runtime_context); + auto expected_kernel_key = + dynamic_cast(copy_op) + ->GetExpectedKernelType(copy_exec_ctx); + auto kernel_iter = kernels.find(expected_kernel_key); + copy_op_func_node.kernel_func_ = + OpKernelComputeFunc(kernel_iter->second); + copy_op_func_node.kernel_func_(copy_exec_ctx); + op_list->push_back(copy_op); + vec_func_list->push_back(copy_op_func_node); + + var_name_item.second[i] = v; + } + } + } + + op_list->push_back(op_base); + + auto kernel_iter = kernels.find(expected_kernel_key); + PADDLE_ENFORCE_NE(kernel_iter, kernels.end(), + platform::errors::NotFound( + "Operator (%s) does not have kernel for %s.", + op->Type(), KernelTypeToString(expected_kernel_key))); + + op_func_node.kernel_func_ = OpKernelComputeFunc(kernel_iter->second); + op_func_node.kernel_func_(exec_ctx); + vec_func_list->push_back(op_func_node); + } +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h new file mode 100644 index 00000000000..4d3369c8947 --- /dev/null +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -0,0 +1,84 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/new_executor/interpretercore_util.h" +#include "paddle/fluid/framework/new_executor/new_executor_defs.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace framework { + +class InterpreterCore { + public: + InterpreterCore(const platform::Place& place, const ProgramDesc& main_prog, + VariableScope* global_scope, + const std::vector& feed_names, + const std::vector& fetch_names); + + void Run(const std::vector& feed_tensors, + std::vector* fetch_tensors); + + static void BuildOpFuncList(const platform::Place& place, + const framework::ProgramDesc& pdesc, + std::vector* op_list, + std::vector* vec_func_list, + VariableScope* var_scope); + + private: + void Convert(); + + void RunInstruction(const Instruction& instr_node, + const VariableScope& var_scope, + const platform::Place& place); + + void ExecuteInstructionList(const std::vector& vec_instr, + const VariableScope& var_scope, + const platform::Place& place); + + std::vector MergeVector(const std::vector& first, + const std::vector& second); + + void BuildVariableScope(const framework::ProgramDesc& pdesc, + VariableScope* var_scope); + + const platform::Place& place_; + const ProgramDesc& main_program_; + VariableScope* global_scope_; + std::vector vec_meta_info_; + + std::vector vec_func_list_; + std::vector op_list_; + + std::vector vec_instruction_; + InstructionInfo instruction_info_; + std::vector dependecy_count_; + std::vector ref_coun_info_; + std::vector> input_var2op_info_; + + bool is_build_; + + std::vector feed_names_; + std::vector fetch_names_; +}; +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/new_exec_util.h b/paddle/fluid/framework/new_executor/interpretercore_util.h similarity index 99% rename from paddle/fluid/framework/new_exec_util.h rename to paddle/fluid/framework/new_executor/interpretercore_util.h index 1783b9be74b..e6651f38d91 100644 --- a/paddle/fluid/framework/new_exec_util.h +++ b/paddle/fluid/framework/new_executor/interpretercore_util.h @@ -13,7 +13,7 @@ // limitations under the License. /************************************************************************* - > File Name: new_exec_util.h + > File Name: interpretercore_util.h > Author: guanshanshan@baidu.com > Created Time: Fri 23 Jul 2021 06:19:19 AM UTC ************************************************************************/ diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h new file mode 100644 index 00000000000..fb8a96aaca4 --- /dev/null +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -0,0 +1,79 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace framework { + +using OpKernelComputeFunc = std::function; +using OpKernelMap = + std::unordered_map; + +struct OpKernelFunc { + OpKernelComputeFunc compute_func_; + OperatorBase* operator_base_; +}; + +struct VariableMetaInfo { + int var_ref_count_; +}; + +struct VariableScope { + std::vector var_list; + std::map name2id; +}; + +struct NextInstruction { + std::vector direct_run_; +}; + +struct EventInter {}; + +struct InstructionInfo { + std::vector dependecy_count_; +}; + +struct EventRun { + EventInter event_inter; + std::vector same_device_run_; + std::vector synchronized_run; +}; + +struct Instruction { + OpKernelFunc kernel_func_; + std::map> input_index_; + std::map> output_index_; + + std::vector gc_check_var_list; + NextInstruction next_instruction_; + std::vector vec_event_list_; +}; + +struct OpFuncNode { + // int unsed; + std::map> input_index; + std::map> output_index; + + OpKernelComputeFunc kernel_func_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc new file mode 100644 index 00000000000..c312195feb5 --- /dev/null +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/fluid/framework/new_executor/standalone_executor.h" + +namespace paddle { +namespace framework { +StandaloneExecutor::StandaloneExecutor(const platform::Place& place, + const ProgramDesc& startup_prog, + const ProgramDesc& main_prog, + Scope* scope) + : place_(place), + startup_prog_(startup_prog), + main_prog_(main_prog), + outer_scope_(scope) { + paddle::framework::InitDevices(); + + // init scope + BuildVariableOuterScope(startup_prog, &global_scope_, scope); + + if (outer_scope_ != nullptr) { + auto name_list = outer_scope_->LocalVarNames(); + for (auto name : name_list) { + auto v = outer_scope_->Var(name); + if (global_scope_.name2id.find(name) == global_scope_.name2id.end()) { + global_scope_.name2id[name] = global_scope_.var_list.size(); + } + + global_scope_.var_list.push_back(v); + } + } + + // run startup program + std::vector vec_func_list; + std::vector op_list; + InterpreterCore::BuildOpFuncList(place_, startup_prog, &op_list, + &vec_func_list, &global_scope_); +} + +int StandaloneExecutor::Run(const std::vector& feed_names, + const std::vector& feed_tensors, + const std::vector& fetch_names, + std::vector* fetch_tensors) { + auto core = GetInterpreterCore(feed_names, fetch_names); + + core->Run(feed_tensors, fetch_tensors); + + return 0; +} + +void StandaloneExecutor::BuildVariableOuterScope( + const framework::ProgramDesc& pdesc, VariableScope* var_scope, + Scope* outer_scope) { + auto& global_block = pdesc.Block(0); + + for (auto& var : global_block.AllVars()) { + if (var->Name() == framework::kEmptyVarName) { + continue; + } + + if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) { + var_scope->name2id[var->Name()] = var_scope->var_list.size(); + auto v = outer_scope->Var(var->Name()); + InitializeVariable(v, var->GetType()); + var_scope->var_list.push_back(v); + } + } +} + +std::shared_ptr StandaloneExecutor::GetInterpreterCore( + const std::vector& feed_names, + const std::vector& fetch_names) { + std::ostringstream oss; + oss << "feed:"; + for (auto& feedname : feed_names) { + oss << feedname << ","; + } + oss << "fetch:"; + for (auto& fetchname : fetch_names) { + oss << fetchname << ","; + } + + auto iter = interpretercores_.find(oss.str()); + + if (iter == interpretercores_.end()) { + auto core = std::make_shared( + place_, main_prog_, &global_scope_, feed_names, fetch_names); + interpretercores_.emplace(oss.str(), core); + return core; + } else { + return iter->second; + } +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/new_executor/standalone_executor.h b/paddle/fluid/framework/new_executor/standalone_executor.h new file mode 100644 index 00000000000..8526f64c6bc --- /dev/null +++ b/paddle/fluid/framework/new_executor/standalone_executor.h @@ -0,0 +1,67 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "paddle/fluid/framework/new_executor/interpretercore.h" + +namespace paddle { +namespace framework { + +class ExecutorBase { + public: + virtual ~ExecutorBase() {} + virtual int Run(const std::vector& feed_names, + const std::vector& feed_tensors, + const std::vector& fetch_names, + std::vector* fetch_tensors) = 0; +}; + +class StandaloneExecutor : public ExecutorBase { + public: + StandaloneExecutor(const platform::Place& place, + const ProgramDesc& startup_prog, + const ProgramDesc& main_prog, Scope* scope); + + ~StandaloneExecutor() {} + + virtual int Run(const std::vector& feed_names, + const std::vector& feed_tensors, + const std::vector& fetch_names, + std::vector* fetch_tensors); + + private: + void BuildVariableOuterScope(const framework::ProgramDesc& pdesc, + VariableScope* var_scope, Scope* outer_scope); + + std::shared_ptr GetInterpreterCore( + const std::vector& feed_names, + const std::vector& fetch_names); + + const platform::Place& place_; + const ProgramDesc& startup_prog_; + const ProgramDesc& main_prog_; + Scope* outer_scope_; + VariableScope global_scope_; + + std::unordered_map> + interpretercores_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc new file mode 100644 index 00000000000..9e831147903 --- /dev/null +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/new_executor/standalone_executor.h" + +paddle::framework::ProgramDesc load_from_file(const std::string& file_name) { + std::ifstream fin(file_name, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + std::string buffer(fin.tellg(), ' '); + fin.seekg(0, std::ios::beg); + fin.read(&buffer[0], buffer.size()); + fin.close(); + + paddle::framework::ProgramDesc program_desc(buffer); + return program_desc; +} + +int main() { + paddle::framework::InitDevices(); + auto place = paddle::platform::CUDAPlace(0); + auto test_prog = load_from_file("lm_startup_program"); + + auto main_prog = load_from_file("lm_main_program"); + + paddle::framework::Scope scope; + paddle::framework::StandaloneExecutor exec(place, test_prog, main_prog, + &scope); + + auto start = std::chrono::steady_clock::now(); + for (size_t i = 0; i < 2320; ++i) { + if (i % 200 == 0) { + std::cout << i << std::endl; + } + + std::vector vec_out; + exec.Run({}, {}, {}, &vec_out); + } + auto end = std::chrono::steady_clock::now(); + std::chrono::duration diff = end - start; + + std::cout << "time cost " << diff.count() << std::endl; + + return 1; +} diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index b8774f42963..ca0ed68a13f 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -200,6 +200,7 @@ if(WITH_PYTHON) endif(WIN32) add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file}) + list(APPEND PYBIND_DEPS interpretercore standalone_executor) cc_library(paddle_pybind SHARED SRCS ${PYBIND_SRCS} DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 235a06833fc..426b539e80c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -42,7 +42,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" -#include "paddle/fluid/framework/new_exec.h" +#include "paddle/fluid/framework/new_executor/standalone_executor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -1945,30 +1945,30 @@ All parameter, weight, gradient are variables in Paddle. fetch_vars); }); - py::class_(m, "InterpreterCore") + py::class_(m, "StandaloneExecutor") .def(py::init()) .def("run", - [](InterpreterCore &self, + [](StandaloneExecutor &self, const std::unordered_map &input_dict, - std::vector vec_fetch_name) { + std::vector fetch_names) { pybind11::gil_scoped_release release; - std::vector vec_tensor; - std::vector vec_name; + std::vector feed_tensors; + std::vector feed_names; for (auto &item : input_dict) { framework::LoDTensor t; SetTensorFromPyArray( &t, item.second, platform::CPUPlace(), false); - vec_name.push_back(item.first); - vec_tensor.push_back(t); + feed_names.push_back(item.first); + feed_tensors.push_back(t); } - std::vector vec_out; - self.run(vec_name, vec_tensor, vec_fetch_name, &vec_out); + std::vector fetch_tensors; + self.Run(feed_names, feed_tensors, fetch_names, &fetch_tensors); std::vector vec_ret; - for (size_t i = 0; i < vec_out.size(); ++i) { - vec_ret.push_back(TensorToPyArray(vec_out[i], true)); + for (size_t i = 0; i < fetch_tensors.size(); ++i) { + vec_ret.push_back(TensorToPyArray(fetch_tensors[i], true)); } return vec_ret; }); diff --git a/python/paddle/fluid/tests/unittests/interpreter/test_interpreter.py b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py similarity index 76% rename from python/paddle/fluid/tests/unittests/interpreter/test_interpreter.py rename to python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py index bb18d28e48b..bfed9621c94 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/test_interpreter.py +++ b/python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py @@ -15,7 +15,7 @@ import unittest import paddle from paddle.fluid import core -from paddle.fluid.core import InterpreterCore +from paddle.fluid.core import StandaloneExecutor import numpy as np @@ -37,19 +37,25 @@ class LinearTestCase(unittest.TestCase): startup_program = paddle.fluid.default_startup_program() p = core.Place() p.set_place(self.place) - inter_core = InterpreterCore(p, main_program.desc, startup_program.desc, - core.Scope()) + standaloneexecutor = StandaloneExecutor(p, startup_program.desc, + main_program.desc, core.Scope()) - out = inter_core.run({ + out = standaloneexecutor.run({ "a": np.ones( [2, 2], dtype="float32") * 2 }, [c.name]) for i in range(10): - out = inter_core.run({ + out = standaloneexecutor.run({ "a": np.ones( [2, 2], dtype="float32") * i }, [c.name]) + for i in range(10): + out = standaloneexecutor.run({ + "a": np.ones( + [2, 2], dtype="float32") * i + }, [a.name, c.name]) + if __name__ == "__main__": unittest.main() -- GitLab