未验证 提交 40d4d834 编写于 作者: W wanghuancoder 提交者: GitHub

code refactoring for new executor (#34970)

* code refactoring, test=develop

* refine, test=develop

* refine, test=develop

* refine, test=develop
上级 1b747de7
...@@ -25,6 +25,7 @@ add_subdirectory(ir) ...@@ -25,6 +25,7 @@ add_subdirectory(ir)
add_subdirectory(details) add_subdirectory(details)
add_subdirectory(fleet) add_subdirectory(fleet)
add_subdirectory(io) add_subdirectory(io)
add_subdirectory(new_executor)
#ddim lib #ddim lib
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
......
cc_library(interpretercore SRCS interpretercore.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler)
cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler)
# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
...@@ -11,153 +11,257 @@ ...@@ -11,153 +11,257 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#pragma once #include "paddle/fluid/framework/new_executor/interpretercore.h"
#include <iostream>
#include <string>
#include <chrono>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/new_exec_util.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"
// USE_OP(fill_constant);
// USE_OP(elementwise_add);
// using namespace std;
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using std::cerr; InterpreterCore::InterpreterCore(const platform::Place& place,
using std::endl; const ProgramDesc& main_prog,
VariableScope* global_scope,
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names)
: place_(place), main_program_(main_prog), global_scope_(global_scope) {
is_build_ = false;
feed_names_ = feed_names;
fetch_names_ = fetch_names;
// add feedop and fetchop to main_program
using OpKernelComputeFunc = std::function<void(const ExecutionContext&)>; // prune
using OpKernelMap =
std::unordered_map<OpKernelType, OpKernelComputeFunc, OpKernelType::Hash>;
framework::ProgramDesc load_from_file(const std::string& file_name) { // optmize graph pass
std::ifstream fin(file_name, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();
ProgramDesc program_desc(buffer); // convert to run graph
return program_desc;
} }
struct OpKernelFunc { void InterpreterCore::Run(const std::vector<framework::Tensor>& feed_tensors,
OpKernelComputeFunc compute_func_; std::vector<framework::Tensor>* fetch_tensors) {
OperatorBase* operator_base_; if (is_build_ == false) {
}; BuildVariableScope(main_program_, global_scope_);
struct VariableMetaInfo {
int var_ref_count_;
};
struct VariableScope {
std::vector<Variable*> var_list;
std::map<std::string, int> name2id;
std::vector<VariableMetaInfo> vec_meta_info_;
};
struct NextInstruction {
std::vector<size_t> direct_run_;
};
struct EventInter {};
struct InstructionInfo {
std::vector<size_t> dependecy_count_;
};
struct EventRun {
EventInter event_inter;
std::vector<size_t> same_device_run_;
std::vector<size_t> synchronized_run;
};
struct Instruction {
OpKernelFunc kernel_func_;
std::map<std::string, std::vector<int>> input_index_;
std::map<std::string, std::vector<int>> output_index_;
std::vector<size_t> gc_check_var_list;
NextInstruction next_instruction_;
std::vector<EventInter> vec_event_list_;
};
struct OpFuncNode {
// int unsed;
std::map<std::string, std::vector<int>> input_index;
std::map<std::string, std::vector<int>> output_index;
OpKernelComputeFunc kernel_func_;
};
int convert(const platform::Place& place) {
if (is_cpu_place(place)) {
return 0;
} }
if (is_gpu_place(place)) { for (size_t i = 0; i < feed_names_.size(); ++i) {
return 1; auto it = global_scope_->name2id.find(feed_names_[i]);
assert(it != global_scope_->name2id.end());
auto feed_tensor =
global_scope_->var_list[it->second]->GetMutable<framework::LoDTensor>();
feed_tensor->ShareDataWith(feed_tensors[i]);
}
if (is_build_ == false) {
BuildOpFuncList(place_, main_program_, &op_list_, &vec_func_list_,
global_scope_);
is_build_ = true;
// convert vec func_list to graph
Convert();
} else {
ExecuteInstructionList(vec_instruction_, *global_scope_, place_);
} }
return -1; for (size_t i = 0; i < fetch_names_.size(); ++i) {
auto it = global_scope_->name2id.find(fetch_names_[i]);
assert(it != global_scope_->name2id.end());
PADDLE_ENFORCE_NE(
it, global_scope_->name2id.end(),
platform::errors::NotFound(
"Can't find (%d) the fetch var (%s) in scope", i, fetch_names_[i]));
auto fetch_tensor =
global_scope_->var_list[it->second]->GetMutable<framework::LoDTensor>();
if (platform::is_gpu_place(fetch_tensor->place())) {
Tensor out;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place_);
dev_ctx->Wait();
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
dev_ctx->Wait();
fetch_tensors->push_back(out);
} else {
Tensor out;
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
fetch_tensors->push_back(out);
}
}
} }
std::vector<size_t> merge_vec(const std::vector<size_t>& first, void InterpreterCore::Convert() {
const std::vector<size_t>& second) { input_var2op_info_.resize(global_scope_->var_list.size());
std::vector<size_t> out(first.size() + second.size());
std::merge(first.begin(), first.end(), second.begin(), second.end(), vec_instruction_.reserve(vec_func_list_.size());
out.begin()); dependecy_count_.resize(vec_func_list_.size());
vec_meta_info_.resize(global_scope_->var_list.size());
for (size_t i = 0; i < vec_func_list_.size(); ++i) {
Instruction temp_inst;
temp_inst.kernel_func_.compute_func_ = vec_func_list_[i].kernel_func_;
temp_inst.kernel_func_.operator_base_ = op_list_[i];
temp_inst.input_index_ = vec_func_list_[i].input_index;
temp_inst.output_index_ = vec_func_list_[i].output_index;
std::vector<size_t> gc_check_input_list;
for (auto& item : vec_func_list_[i].input_index) {
for (auto id : item.second) {
input_var2op_info_[id].push_back(i);
gc_check_input_list.push_back(id);
}
}
std::sort(gc_check_input_list.begin(), gc_check_input_list.end());
auto last =
std::unique(gc_check_input_list.begin(), gc_check_input_list.end());
gc_check_input_list.erase(last, gc_check_input_list.end());
for (auto var_id : gc_check_input_list) {
vec_meta_info_[var_id].var_ref_count_++;
}
std::vector<size_t>::iterator it; temp_inst.gc_check_var_list.swap(gc_check_input_list);
it = std::unique(out.begin(), out.end());
out.resize(std::distance(out.begin(), it)); vec_instruction_.push_back(temp_inst);
}
return out; for (size_t i = 0; i < vec_instruction_.size(); ++i) {
std::vector<size_t> vec_temp;
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
vec_temp = MergeVector(vec_temp, input_var2op_info_[id]);
}
}
// In Program, op order is a very import information.
// Op can noly add op after it as next as next ops.
std::vector<size_t> filter_next;
filter_next.reserve(vec_temp.size());
for (auto item : vec_temp) {
if (item > i) {
filter_next.push_back(item);
}
}
vec_instruction_[i].next_instruction_.direct_run_ = filter_next;
// checkout ouput
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
if (input_var2op_info_[id].size() == 0) {
// output var not be used by any kernel
vec_instruction_[i].gc_check_var_list.push_back(id);
vec_meta_info_[id].var_ref_count_++;
}
}
}
for (auto inst_id : filter_next) {
dependecy_count_[inst_id]++;
}
}
} }
void build_variable_outer_scope(const framework::ProgramDesc& pdesc, void InterpreterCore::RunInstruction(const Instruction& instr_node,
VariableScope* var_scope, Scope* outer_scope) { const VariableScope& var_scope,
auto& global_block = pdesc.Block(0); const platform::Place& place) {
auto op_base = instr_node.kernel_func_.operator_base_;
// build runtime cost
VariableValueMap ins_map;
for (auto& var_name_item : instr_node.input_index_) {
std::vector<Variable*> input_vars;
input_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
input_vars.emplace_back(var_scope.var_list[id]);
}
ins_map.emplace(var_name_item.first, std::move(input_vars));
}
for (auto& var : global_block.AllVars()) { VariableValueMap outs_map;
if (var->Name() == framework::kEmptyVarName) { for (auto& var_name_item : instr_node.output_index_) {
continue; std::vector<Variable*> out_vars;
out_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
out_vars.emplace_back(var_scope.var_list[id]);
} }
auto v = outer_scope->Var(var->Name()); outs_map.emplace(var_name_item.first, std::move(out_vars));
}
if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) { RuntimeContext runtime_context({}, {});
var_scope->name2id[var->Name()] = var_scope->var_list.size(); runtime_context.inputs.swap(ins_map);
runtime_context.outputs.swap(outs_map);
RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context);
static_cast<const framework::OperatorWithKernel*>(op_base)->InferShape(
&infer_shape_ctx);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
Scope scope;
auto exec_context =
ExecutionContext(*op_base, scope, *dev_ctx, runtime_context);
instr_node.kernel_func_.compute_func_(exec_context);
}
void InterpreterCore::ExecuteInstructionList(
const std::vector<Instruction>& vec_instr, const VariableScope& var_scope,
const platform::Place& place) {
std::queue<size_t> working_queue;
auto working_dependecy_count = dependecy_count_;
for (size_t i = 0; i < dependecy_count_.size(); ++i) {
if (dependecy_count_[i] == 0) {
working_queue.push(i);
}
}
auto working_var_ref = vec_meta_info_;
size_t run_op_number = 0;
while (!working_queue.empty()) {
auto instr_id = working_queue.front();
working_queue.pop();
auto& instr_node = vec_instr[instr_id];
RunInstruction(instr_node, var_scope, place);
auto& next_instr = instr_node.next_instruction_.direct_run_;
++run_op_number;
for (auto next_i : next_instr) {
--working_dependecy_count[next_i];
if (working_dependecy_count[next_i] == 0) {
working_queue.push(next_i);
}
}
// GC infomation
auto& gc_check_list = instr_node.gc_check_var_list;
for (auto var_id : gc_check_list) {
--working_var_ref[var_id].var_ref_count_;
} }
}
InitializeVariable(v, var->GetType()); for (size_t i = 0; i < working_var_ref.size(); ++i) {
var_scope->var_list.push_back(v); if (working_var_ref[i].var_ref_count_ != 0) {
std::cerr << " var ref is not zero " << i << std::endl;
}
} }
} }
void build_variable_scope(const framework::ProgramDesc& pdesc, std::vector<size_t> InterpreterCore::MergeVector(
VariableScope* var_scope) { const std::vector<size_t>& first, const std::vector<size_t>& second) {
std::vector<size_t> out(first.size() + second.size());
std::merge(first.begin(), first.end(), second.begin(), second.end(),
out.begin());
std::vector<size_t>::iterator it;
it = std::unique(out.begin(), out.end());
out.resize(std::distance(out.begin(), it));
return out;
}
void InterpreterCore::BuildVariableScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope) {
auto& global_block = pdesc.Block(0); auto& global_block = pdesc.Block(0);
for (auto& var : global_block.AllVars()) { for (auto& var : global_block.AllVars()) {
...@@ -167,19 +271,18 @@ void build_variable_scope(const framework::ProgramDesc& pdesc, ...@@ -167,19 +271,18 @@ void build_variable_scope(const framework::ProgramDesc& pdesc,
if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) { if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) {
var_scope->name2id[var->Name()] = var_scope->var_list.size(); var_scope->name2id[var->Name()] = var_scope->var_list.size();
auto v = new Variable();
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
} }
auto v = new Variable();
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
} }
} }
void build_op_func_list(const framework::ProgramDesc& pdesc, void InterpreterCore::BuildOpFuncList(const platform::Place& place,
std::vector<OperatorBase*>* op_list, const framework::ProgramDesc& pdesc,
std::vector<OpFuncNode>* vec_func_list, std::vector<OperatorBase*>* op_list,
VariableScope* var_scope, std::vector<OpFuncNode>* vec_func_list,
const platform::Place& place) { VariableScope* var_scope) {
auto& global_block = pdesc.Block(0); auto& global_block = pdesc.Block(0);
for (auto& op : global_block.AllOps()) { for (auto& op : global_block.AllOps()) {
...@@ -291,7 +394,8 @@ void build_op_func_list(const framework::ProgramDesc& pdesc, ...@@ -291,7 +394,8 @@ void build_op_func_list(const framework::ProgramDesc& pdesc,
VariableNameMap copy_out_map; VariableNameMap copy_out_map;
copy_out_map["Out"] = {new_var_name}; copy_out_map["Out"] = {new_var_name};
AttributeMap attr_map; AttributeMap attr_map;
attr_map["dst_place_type"] = convert(place); attr_map["dst_place_type"] =
is_cpu_place(place) ? 0 : is_gpu_place(place) ? 1 : -1;
std::map<std::string, std::vector<int>> copy_ins_name2id; std::map<std::string, std::vector<int>> copy_ins_name2id;
copy_ins_name2id["X"] = ins_name2id[var_name_item.first]; copy_ins_name2id["X"] = ins_name2id[var_name_item.first];
...@@ -363,267 +467,5 @@ void build_op_func_list(const framework::ProgramDesc& pdesc, ...@@ -363,267 +467,5 @@ void build_op_func_list(const framework::ProgramDesc& pdesc,
} }
} }
class InterpreterCore {
public:
InterpreterCore(const platform::Place& place, const ProgramDesc& prog,
const ProgramDesc& startup_prog, Scope* scope)
: place_(place), prog_(prog), outer_scope_(scope) {
paddle::framework::InitDevices();
is_build_ = false;
if (outer_scope_ != nullptr) {
auto name_list = outer_scope_->LocalVarNames();
for (auto name : name_list) {
auto v = outer_scope_->Var(name);
if (global_scope.name2id.find(name) == global_scope.name2id.end()) {
global_scope.name2id[name] = global_scope.var_list.size();
}
global_scope.var_list.push_back(v);
}
}
paddle::framework::build_variable_outer_scope(startup_prog, &global_scope,
outer_scope_);
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
paddle::framework::build_op_func_list(
startup_prog, &op_list, &vec_func_list, &global_scope, place_);
// add variable to outer_scope
}
void run(const std::vector<std::string>& vec_name,
const std::vector<framework::Tensor>& vec_tensor,
const std::vector<std::string>& vec_fetch_name,
std::vector<framework::Tensor>* vec_out) {
if (is_build_ == false) {
paddle::framework::build_variable_scope(prog_, &global_scope);
}
for (size_t i = 0; i < vec_name.size(); ++i) {
auto it = global_scope.name2id.find(vec_name[i]);
assert(it != global_scope.name2id.end());
auto feed_tensor =
global_scope.var_list[it->second]->GetMutable<framework::LoDTensor>();
feed_tensor->ShareDataWith(vec_tensor[i]);
}
if (is_build_ == false) {
paddle::framework::build_op_func_list(prog_, &op_list, &vec_func_list,
&global_scope, place_);
is_build_ = true;
// convert vec func_list to graph
convert();
} else {
exec_instruction_list(vec_instruction_, global_scope, place_);
}
for (size_t i = 0; i < vec_fetch_name.size(); ++i) {
auto it = global_scope.name2id.find(vec_fetch_name[i]);
assert(it != global_scope.name2id.end());
PADDLE_ENFORCE_NE(it, global_scope.name2id.end(),
platform::errors::NotFound(
"Can't find (%d) the fetch var (%s) in scope", i,
vec_fetch_name[i]));
auto fetch_tensor =
global_scope.var_list[it->second]->GetMutable<framework::LoDTensor>();
if (platform::is_gpu_place(fetch_tensor->place())) {
Tensor out;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place_);
dev_ctx->Wait();
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
dev_ctx->Wait();
vec_out->push_back(out);
} else {
Tensor out;
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
vec_out->push_back(out);
}
}
}
private:
void convert() {
input_var2op_info_.resize(global_scope.var_list.size());
vec_instruction_.reserve(vec_func_list.size());
dependecy_count_.resize(vec_func_list.size());
global_scope.vec_meta_info_.resize(global_scope.var_list.size());
for (size_t i = 0; i < vec_func_list.size(); ++i) {
Instruction temp_inst;
temp_inst.kernel_func_.compute_func_ = vec_func_list[i].kernel_func_;
temp_inst.kernel_func_.operator_base_ = op_list[i];
temp_inst.input_index_ = vec_func_list[i].input_index;
temp_inst.output_index_ = vec_func_list[i].output_index;
std::vector<size_t> gc_check_input_list;
for (auto& item : vec_func_list[i].input_index) {
for (auto id : item.second) {
input_var2op_info_[id].push_back(i);
gc_check_input_list.push_back(id);
}
}
std::sort(gc_check_input_list.begin(), gc_check_input_list.end());
auto last =
std::unique(gc_check_input_list.begin(), gc_check_input_list.end());
gc_check_input_list.erase(last, gc_check_input_list.end());
for (auto var_id : gc_check_input_list) {
global_scope.vec_meta_info_[var_id].var_ref_count_++;
}
temp_inst.gc_check_var_list.swap(gc_check_input_list);
vec_instruction_.push_back(temp_inst);
}
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
std::vector<size_t> vec_temp;
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
vec_temp = merge_vec(vec_temp, input_var2op_info_[id]);
}
}
// In Program, op order is a very import information.
// Op can noly add op after it as next as next ops.
std::vector<size_t> filter_next;
filter_next.reserve(vec_temp.size());
for (auto item : vec_temp) {
if (item > i) {
filter_next.push_back(item);
}
}
vec_instruction_[i].next_instruction_.direct_run_ = filter_next;
// checkout ouput
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
if (input_var2op_info_[id].size() == 0) {
// output var not be used by any kernel
vec_instruction_[i].gc_check_var_list.push_back(id);
global_scope.vec_meta_info_[id].var_ref_count_++;
}
}
}
for (auto inst_id : filter_next) {
dependecy_count_[inst_id]++;
}
}
}
void run_instr(const Instruction& instr_node, const VariableScope& var_scope,
const platform::Place& place) {
auto op_base = instr_node.kernel_func_.operator_base_;
// build runtime cost
VariableValueMap ins_map;
for (auto& var_name_item : instr_node.input_index_) {
std::vector<Variable*> input_vars;
input_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
input_vars.emplace_back(var_scope.var_list[id]);
}
ins_map.emplace(var_name_item.first, std::move(input_vars));
}
VariableValueMap outs_map;
for (auto& var_name_item : instr_node.output_index_) {
std::vector<Variable*> out_vars;
out_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
out_vars.emplace_back(var_scope.var_list[id]);
}
outs_map.emplace(var_name_item.first, std::move(out_vars));
}
RuntimeContext runtime_context({}, {});
runtime_context.inputs.swap(ins_map);
runtime_context.outputs.swap(outs_map);
RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context);
static_cast<const framework::OperatorWithKernel*>(op_base)->InferShape(
&infer_shape_ctx);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
Scope scope;
auto exec_context =
ExecutionContext(*op_base, scope, *dev_ctx, runtime_context);
instr_node.kernel_func_.compute_func_(exec_context);
}
void exec_instruction_list(const std::vector<Instruction>& vec_instr,
const VariableScope& var_scope,
const platform::Place& place) {
std::queue<size_t> working_queue;
auto working_dependecy_count = dependecy_count_;
for (size_t i = 0; i < dependecy_count_.size(); ++i) {
if (dependecy_count_[i] == 0) {
working_queue.push(i);
}
}
auto working_var_ref = global_scope.vec_meta_info_;
size_t run_op_number = 0;
while (!working_queue.empty()) {
auto instr_id = working_queue.front();
working_queue.pop();
auto& instr_node = vec_instr[instr_id];
run_instr(instr_node, var_scope, place);
auto& next_instr = instr_node.next_instruction_.direct_run_;
++run_op_number;
for (auto next_i : next_instr) {
--working_dependecy_count[next_i];
if (working_dependecy_count[next_i] == 0) {
working_queue.push(next_i);
}
}
// GC infomation
auto& gc_check_list = instr_node.gc_check_var_list;
for (auto var_id : gc_check_list) {
--working_var_ref[var_id].var_ref_count_;
}
}
for (size_t i = 0; i < working_var_ref.size(); ++i) {
if (working_var_ref[i].var_ref_count_ != 0) {
cerr << " var ref is not zero " << i << endl;
}
}
}
const platform::Place& place_;
const ProgramDesc& prog_;
paddle::framework::VariableScope global_scope;
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
bool is_build_;
std::vector<Instruction> vec_instruction_;
InstructionInfo instruction_info_;
std::vector<size_t> dependecy_count_;
std::vector<VariableMetaInfo> ref_coun_info;
std::vector<std::vector<size_t>> input_var2op_info_;
Scope* outer_scope_;
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <queue>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/new_executor/interpretercore_util.h"
#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace framework {
class InterpreterCore {
public:
InterpreterCore(const platform::Place& place, const ProgramDesc& main_prog,
VariableScope* global_scope,
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names);
void Run(const std::vector<framework::Tensor>& feed_tensors,
std::vector<framework::Tensor>* fetch_tensors);
static void BuildOpFuncList(const platform::Place& place,
const framework::ProgramDesc& pdesc,
std::vector<OperatorBase*>* op_list,
std::vector<OpFuncNode>* vec_func_list,
VariableScope* var_scope);
private:
void Convert();
void RunInstruction(const Instruction& instr_node,
const VariableScope& var_scope,
const platform::Place& place);
void ExecuteInstructionList(const std::vector<Instruction>& vec_instr,
const VariableScope& var_scope,
const platform::Place& place);
std::vector<size_t> MergeVector(const std::vector<size_t>& first,
const std::vector<size_t>& second);
void BuildVariableScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope);
const platform::Place& place_;
const ProgramDesc& main_program_;
VariableScope* global_scope_;
std::vector<VariableMetaInfo> vec_meta_info_;
std::vector<paddle::framework::OpFuncNode> vec_func_list_;
std::vector<paddle::framework::OperatorBase*> op_list_;
std::vector<Instruction> vec_instruction_;
InstructionInfo instruction_info_;
std::vector<size_t> dependecy_count_;
std::vector<VariableMetaInfo> ref_coun_info_;
std::vector<std::vector<size_t>> input_var2op_info_;
bool is_build_;
std::vector<std::string> feed_names_;
std::vector<std::string> fetch_names_;
};
} // namespace framework
} // namespace paddle
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// limitations under the License. // limitations under the License.
/************************************************************************* /*************************************************************************
> File Name: new_exec_util.h > File Name: interpretercore_util.h
> Author: guanshanshan@baidu.com > Author: guanshanshan@baidu.com
> Created Time: Fri 23 Jul 2021 06:19:19 AM UTC > Created Time: Fri 23 Jul 2021 06:19:19 AM UTC
************************************************************************/ ************************************************************************/
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace framework {
using OpKernelComputeFunc = std::function<void(const ExecutionContext&)>;
using OpKernelMap =
std::unordered_map<OpKernelType, OpKernelComputeFunc, OpKernelType::Hash>;
struct OpKernelFunc {
OpKernelComputeFunc compute_func_;
OperatorBase* operator_base_;
};
struct VariableMetaInfo {
int var_ref_count_;
};
struct VariableScope {
std::vector<Variable*> var_list;
std::map<std::string, int> name2id;
};
struct NextInstruction {
std::vector<size_t> direct_run_;
};
struct EventInter {};
struct InstructionInfo {
std::vector<size_t> dependecy_count_;
};
struct EventRun {
EventInter event_inter;
std::vector<size_t> same_device_run_;
std::vector<size_t> synchronized_run;
};
struct Instruction {
OpKernelFunc kernel_func_;
std::map<std::string, std::vector<int>> input_index_;
std::map<std::string, std::vector<int>> output_index_;
std::vector<size_t> gc_check_var_list;
NextInstruction next_instruction_;
std::vector<EventInter> vec_event_list_;
};
struct OpFuncNode {
// int unsed;
std::map<std::string, std::vector<int>> input_index;
std::map<std::string, std::vector<int>> output_index;
OpKernelComputeFunc kernel_func_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
namespace paddle {
namespace framework {
StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
const ProgramDesc& startup_prog,
const ProgramDesc& main_prog,
Scope* scope)
: place_(place),
startup_prog_(startup_prog),
main_prog_(main_prog),
outer_scope_(scope) {
paddle::framework::InitDevices();
// init scope
BuildVariableOuterScope(startup_prog, &global_scope_, scope);
if (outer_scope_ != nullptr) {
auto name_list = outer_scope_->LocalVarNames();
for (auto name : name_list) {
auto v = outer_scope_->Var(name);
if (global_scope_.name2id.find(name) == global_scope_.name2id.end()) {
global_scope_.name2id[name] = global_scope_.var_list.size();
}
global_scope_.var_list.push_back(v);
}
}
// run startup program
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
InterpreterCore::BuildOpFuncList(place_, startup_prog, &op_list,
&vec_func_list, &global_scope_);
}
int StandaloneExecutor::Run(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors,
const std::vector<std::string>& fetch_names,
std::vector<framework::Tensor>* fetch_tensors) {
auto core = GetInterpreterCore(feed_names, fetch_names);
core->Run(feed_tensors, fetch_tensors);
return 0;
}
void StandaloneExecutor::BuildVariableOuterScope(
const framework::ProgramDesc& pdesc, VariableScope* var_scope,
Scope* outer_scope) {
auto& global_block = pdesc.Block(0);
for (auto& var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) {
var_scope->name2id[var->Name()] = var_scope->var_list.size();
auto v = outer_scope->Var(var->Name());
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
}
}
}
std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names) {
std::ostringstream oss;
oss << "feed:";
for (auto& feedname : feed_names) {
oss << feedname << ",";
}
oss << "fetch:";
for (auto& fetchname : fetch_names) {
oss << fetchname << ",";
}
auto iter = interpretercores_.find(oss.str());
if (iter == interpretercores_.end()) {
auto core = std::make_shared<InterpreterCore>(
place_, main_prog_, &global_scope_, feed_names, fetch_names);
interpretercores_.emplace(oss.str(), core);
return core;
} else {
return iter->second;
}
}
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/new_executor/interpretercore.h"
namespace paddle {
namespace framework {
class ExecutorBase {
public:
virtual ~ExecutorBase() {}
virtual int Run(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors,
const std::vector<std::string>& fetch_names,
std::vector<framework::Tensor>* fetch_tensors) = 0;
};
class StandaloneExecutor : public ExecutorBase {
public:
StandaloneExecutor(const platform::Place& place,
const ProgramDesc& startup_prog,
const ProgramDesc& main_prog, Scope* scope);
~StandaloneExecutor() {}
virtual int Run(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors,
const std::vector<std::string>& fetch_names,
std::vector<framework::Tensor>* fetch_tensors);
private:
void BuildVariableOuterScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope, Scope* outer_scope);
std::shared_ptr<InterpreterCore> GetInterpreterCore(
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names);
const platform::Place& place_;
const ProgramDesc& startup_prog_;
const ProgramDesc& main_prog_;
Scope* outer_scope_;
VariableScope global_scope_;
std::unordered_map<std::string, std::shared_ptr<InterpreterCore>>
interpretercores_;
};
} // namespace framework
} // namespace paddle
...@@ -21,68 +21,44 @@ ...@@ -21,68 +21,44 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/pybind/pybind.h" paddle::framework::ProgramDesc load_from_file(const std::string& file_name) {
std::ifstream fin(file_name, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();
#include "gperftools/profiler.h" paddle::framework::ProgramDesc program_desc(buffer);
#include "paddle/fluid/framework/new_exec.h" return program_desc;
#include "paddle/fluid/platform/init.h" }
int main() { int main() {
paddle::framework::InitDevices(); paddle::framework::InitDevices();
paddle::framework::VariableScope global_scope;
auto place = paddle::platform::CUDAPlace(0); auto place = paddle::platform::CUDAPlace(0);
auto test_prog = paddle::framework::load_from_file("lm_startup_program"); auto test_prog = load_from_file("lm_startup_program");
{
paddle::framework::build_variable_scope(test_prog, &global_scope);
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
paddle::framework::build_op_func_list(test_prog, op_list, vec_func_list,
&global_scope, place);
// paddle::framework::exec_op_func_list( vec_func_list, op_list,
// global_scope, place );
}
cerr << "run main" << endl; auto main_prog = load_from_file("lm_main_program");
auto main_prog = paddle::framework::load_from_file("lm_main_program");
paddle::framework::build_variable_scope(main_prog, &global_scope);
std::vector<paddle::framework::OpFuncNode> vec_main_func_list;
std::vector<paddle::framework::OperatorBase*> op_main_list;
paddle::framework::build_op_func_list(
main_prog, op_main_list, vec_main_func_list, &global_scope, place);
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::framework::InterpreterCore interp_core(place, main_prog, test_prog, paddle::framework::StandaloneExecutor exec(place, test_prog, main_prog,
&scope); &scope);
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
ProfilerStart("new_executor.prof");
for (size_t i = 0; i < 2320; ++i) { for (size_t i = 0; i < 2320; ++i) {
if (i % 200 == 0) { if (i % 200 == 0) {
cerr << i << endl; std::cout << i << std::endl;
} }
// paddle::framework::exec_op_func_list( vec_main_func_list, op_main_list,
// global_scope, place );
std::vector<paddle::framework::Tensor> vec_out; std::vector<paddle::framework::Tensor> vec_out;
interp_core.run({}, {}, {}, vec_out); exec.Run({}, {}, {}, &vec_out);
} }
ProfilerStop();
auto end = std::chrono::steady_clock::now(); auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> diff = end - start; std::chrono::duration<double> diff = end - start;
cerr << "time cost " << diff.count() << endl; std::cout << "time cost " << diff.count() << std::endl;
return 1; return 1;
} }
...@@ -200,6 +200,7 @@ if(WITH_PYTHON) ...@@ -200,6 +200,7 @@ if(WITH_PYTHON)
endif(WIN32) endif(WIN32)
add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file}) add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})
list(APPEND PYBIND_DEPS interpretercore standalone_executor)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS ${PYBIND_SRCS} SRCS ${PYBIND_SRCS}
DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
......
...@@ -42,7 +42,7 @@ limitations under the License. */ ...@@ -42,7 +42,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/new_exec.h" #include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
...@@ -1945,30 +1945,30 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1945,30 +1945,30 @@ All parameter, weight, gradient are variables in Paddle.
fetch_vars); fetch_vars);
}); });
py::class_<framework::InterpreterCore>(m, "InterpreterCore") py::class_<framework::StandaloneExecutor>(m, "StandaloneExecutor")
.def(py::init<const platform::Place &, const ProgramDesc &, .def(py::init<const platform::Place &, const ProgramDesc &,
const ProgramDesc &, Scope *>()) const ProgramDesc &, Scope *>())
.def("run", .def("run",
[](InterpreterCore &self, [](StandaloneExecutor &self,
const std::unordered_map<std::string, py::array> &input_dict, const std::unordered_map<std::string, py::array> &input_dict,
std::vector<std::string> vec_fetch_name) { std::vector<std::string> fetch_names) {
pybind11::gil_scoped_release release; pybind11::gil_scoped_release release;
std::vector<framework::Tensor> vec_tensor; std::vector<framework::Tensor> feed_tensors;
std::vector<std::string> vec_name; std::vector<std::string> feed_names;
for (auto &item : input_dict) { for (auto &item : input_dict) {
framework::LoDTensor t; framework::LoDTensor t;
SetTensorFromPyArray<platform::CPUPlace>( SetTensorFromPyArray<platform::CPUPlace>(
&t, item.second, platform::CPUPlace(), false); &t, item.second, platform::CPUPlace(), false);
vec_name.push_back(item.first); feed_names.push_back(item.first);
vec_tensor.push_back(t); feed_tensors.push_back(t);
} }
std::vector<framework::Tensor> vec_out; std::vector<framework::Tensor> fetch_tensors;
self.run(vec_name, vec_tensor, vec_fetch_name, &vec_out); self.Run(feed_names, feed_tensors, fetch_names, &fetch_tensors);
std::vector<py::array> vec_ret; std::vector<py::array> vec_ret;
for (size_t i = 0; i < vec_out.size(); ++i) { for (size_t i = 0; i < fetch_tensors.size(); ++i) {
vec_ret.push_back(TensorToPyArray(vec_out[i], true)); vec_ret.push_back(TensorToPyArray(fetch_tensors[i], true));
} }
return vec_ret; return vec_ret;
}); });
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import paddle import paddle
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.core import InterpreterCore from paddle.fluid.core import StandaloneExecutor
import numpy as np import numpy as np
...@@ -37,19 +37,25 @@ class LinearTestCase(unittest.TestCase): ...@@ -37,19 +37,25 @@ class LinearTestCase(unittest.TestCase):
startup_program = paddle.fluid.default_startup_program() startup_program = paddle.fluid.default_startup_program()
p = core.Place() p = core.Place()
p.set_place(self.place) p.set_place(self.place)
inter_core = InterpreterCore(p, main_program.desc, startup_program.desc, standaloneexecutor = StandaloneExecutor(p, startup_program.desc,
core.Scope()) main_program.desc, core.Scope())
out = inter_core.run({ out = standaloneexecutor.run({
"a": np.ones( "a": np.ones(
[2, 2], dtype="float32") * 2 [2, 2], dtype="float32") * 2
}, [c.name]) }, [c.name])
for i in range(10): for i in range(10):
out = inter_core.run({ out = standaloneexecutor.run({
"a": np.ones( "a": np.ones(
[2, 2], dtype="float32") * i [2, 2], dtype="float32") * i
}, [c.name]) }, [c.name])
for i in range(10):
out = standaloneexecutor.run({
"a": np.ones(
[2, 2], dtype="float32") * i
}, [a.name, c.name])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册