未验证 提交 40d4d834 编写于 作者: W wanghuancoder 提交者: GitHub

code refactoring for new executor (#34970)

* code refactoring, test=develop

* refine, test=develop

* refine, test=develop

* refine, test=develop
上级 1b747de7
......@@ -25,6 +25,7 @@ add_subdirectory(ir)
add_subdirectory(details)
add_subdirectory(fleet)
add_subdirectory(io)
add_subdirectory(new_executor)
#ddim lib
proto_library(framework_proto SRCS framework.proto)
......
cc_library(interpretercore SRCS interpretercore.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler)
cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${PYBIND_DEPS} profiler)
# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
......@@ -11,153 +11,257 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <string>
#include <chrono>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/new_exec_util.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"
// USE_OP(fill_constant);
// USE_OP(elementwise_add);
// using namespace std;
#include "paddle/fluid/framework/new_executor/interpretercore.h"
namespace paddle {
namespace framework {
using std::cerr;
using std::endl;
InterpreterCore::InterpreterCore(const platform::Place& place,
const ProgramDesc& main_prog,
VariableScope* global_scope,
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names)
: place_(place), main_program_(main_prog), global_scope_(global_scope) {
is_build_ = false;
feed_names_ = feed_names;
fetch_names_ = fetch_names;
// add feedop and fetchop to main_program
using OpKernelComputeFunc = std::function<void(const ExecutionContext&)>;
using OpKernelMap =
std::unordered_map<OpKernelType, OpKernelComputeFunc, OpKernelType::Hash>;
// prune
framework::ProgramDesc load_from_file(const std::string& file_name) {
std::ifstream fin(file_name, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();
// optmize graph pass
ProgramDesc program_desc(buffer);
return program_desc;
// convert to run graph
}
struct OpKernelFunc {
OpKernelComputeFunc compute_func_;
OperatorBase* operator_base_;
};
struct VariableMetaInfo {
int var_ref_count_;
};
struct VariableScope {
std::vector<Variable*> var_list;
std::map<std::string, int> name2id;
std::vector<VariableMetaInfo> vec_meta_info_;
};
struct NextInstruction {
std::vector<size_t> direct_run_;
};
struct EventInter {};
struct InstructionInfo {
std::vector<size_t> dependecy_count_;
};
struct EventRun {
EventInter event_inter;
std::vector<size_t> same_device_run_;
std::vector<size_t> synchronized_run;
};
struct Instruction {
OpKernelFunc kernel_func_;
std::map<std::string, std::vector<int>> input_index_;
std::map<std::string, std::vector<int>> output_index_;
std::vector<size_t> gc_check_var_list;
NextInstruction next_instruction_;
std::vector<EventInter> vec_event_list_;
};
struct OpFuncNode {
// int unsed;
std::map<std::string, std::vector<int>> input_index;
std::map<std::string, std::vector<int>> output_index;
OpKernelComputeFunc kernel_func_;
};
int convert(const platform::Place& place) {
if (is_cpu_place(place)) {
return 0;
void InterpreterCore::Run(const std::vector<framework::Tensor>& feed_tensors,
std::vector<framework::Tensor>* fetch_tensors) {
if (is_build_ == false) {
BuildVariableScope(main_program_, global_scope_);
}
if (is_gpu_place(place)) {
return 1;
for (size_t i = 0; i < feed_names_.size(); ++i) {
auto it = global_scope_->name2id.find(feed_names_[i]);
assert(it != global_scope_->name2id.end());
auto feed_tensor =
global_scope_->var_list[it->second]->GetMutable<framework::LoDTensor>();
feed_tensor->ShareDataWith(feed_tensors[i]);
}
if (is_build_ == false) {
BuildOpFuncList(place_, main_program_, &op_list_, &vec_func_list_,
global_scope_);
is_build_ = true;
// convert vec func_list to graph
Convert();
} else {
ExecuteInstructionList(vec_instruction_, *global_scope_, place_);
}
return -1;
for (size_t i = 0; i < fetch_names_.size(); ++i) {
auto it = global_scope_->name2id.find(fetch_names_[i]);
assert(it != global_scope_->name2id.end());
PADDLE_ENFORCE_NE(
it, global_scope_->name2id.end(),
platform::errors::NotFound(
"Can't find (%d) the fetch var (%s) in scope", i, fetch_names_[i]));
auto fetch_tensor =
global_scope_->var_list[it->second]->GetMutable<framework::LoDTensor>();
if (platform::is_gpu_place(fetch_tensor->place())) {
Tensor out;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place_);
dev_ctx->Wait();
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
dev_ctx->Wait();
fetch_tensors->push_back(out);
} else {
Tensor out;
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
fetch_tensors->push_back(out);
}
}
}
std::vector<size_t> merge_vec(const std::vector<size_t>& first,
const std::vector<size_t>& second) {
std::vector<size_t> out(first.size() + second.size());
std::merge(first.begin(), first.end(), second.begin(), second.end(),
out.begin());
void InterpreterCore::Convert() {
input_var2op_info_.resize(global_scope_->var_list.size());
vec_instruction_.reserve(vec_func_list_.size());
dependecy_count_.resize(vec_func_list_.size());
vec_meta_info_.resize(global_scope_->var_list.size());
for (size_t i = 0; i < vec_func_list_.size(); ++i) {
Instruction temp_inst;
temp_inst.kernel_func_.compute_func_ = vec_func_list_[i].kernel_func_;
temp_inst.kernel_func_.operator_base_ = op_list_[i];
temp_inst.input_index_ = vec_func_list_[i].input_index;
temp_inst.output_index_ = vec_func_list_[i].output_index;
std::vector<size_t> gc_check_input_list;
for (auto& item : vec_func_list_[i].input_index) {
for (auto id : item.second) {
input_var2op_info_[id].push_back(i);
gc_check_input_list.push_back(id);
}
}
std::sort(gc_check_input_list.begin(), gc_check_input_list.end());
auto last =
std::unique(gc_check_input_list.begin(), gc_check_input_list.end());
gc_check_input_list.erase(last, gc_check_input_list.end());
for (auto var_id : gc_check_input_list) {
vec_meta_info_[var_id].var_ref_count_++;
}
std::vector<size_t>::iterator it;
it = std::unique(out.begin(), out.end());
temp_inst.gc_check_var_list.swap(gc_check_input_list);
out.resize(std::distance(out.begin(), it));
vec_instruction_.push_back(temp_inst);
}
return out;
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
std::vector<size_t> vec_temp;
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
vec_temp = MergeVector(vec_temp, input_var2op_info_[id]);
}
}
// In Program, op order is a very import information.
// Op can noly add op after it as next as next ops.
std::vector<size_t> filter_next;
filter_next.reserve(vec_temp.size());
for (auto item : vec_temp) {
if (item > i) {
filter_next.push_back(item);
}
}
vec_instruction_[i].next_instruction_.direct_run_ = filter_next;
// checkout ouput
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
if (input_var2op_info_[id].size() == 0) {
// output var not be used by any kernel
vec_instruction_[i].gc_check_var_list.push_back(id);
vec_meta_info_[id].var_ref_count_++;
}
}
}
for (auto inst_id : filter_next) {
dependecy_count_[inst_id]++;
}
}
}
void build_variable_outer_scope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope, Scope* outer_scope) {
auto& global_block = pdesc.Block(0);
void InterpreterCore::RunInstruction(const Instruction& instr_node,
const VariableScope& var_scope,
const platform::Place& place) {
auto op_base = instr_node.kernel_func_.operator_base_;
// build runtime cost
VariableValueMap ins_map;
for (auto& var_name_item : instr_node.input_index_) {
std::vector<Variable*> input_vars;
input_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
input_vars.emplace_back(var_scope.var_list[id]);
}
ins_map.emplace(var_name_item.first, std::move(input_vars));
}
for (auto& var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
VariableValueMap outs_map;
for (auto& var_name_item : instr_node.output_index_) {
std::vector<Variable*> out_vars;
out_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
out_vars.emplace_back(var_scope.var_list[id]);
}
auto v = outer_scope->Var(var->Name());
outs_map.emplace(var_name_item.first, std::move(out_vars));
}
if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) {
var_scope->name2id[var->Name()] = var_scope->var_list.size();
RuntimeContext runtime_context({}, {});
runtime_context.inputs.swap(ins_map);
runtime_context.outputs.swap(outs_map);
RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context);
static_cast<const framework::OperatorWithKernel*>(op_base)->InferShape(
&infer_shape_ctx);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
Scope scope;
auto exec_context =
ExecutionContext(*op_base, scope, *dev_ctx, runtime_context);
instr_node.kernel_func_.compute_func_(exec_context);
}
void InterpreterCore::ExecuteInstructionList(
const std::vector<Instruction>& vec_instr, const VariableScope& var_scope,
const platform::Place& place) {
std::queue<size_t> working_queue;
auto working_dependecy_count = dependecy_count_;
for (size_t i = 0; i < dependecy_count_.size(); ++i) {
if (dependecy_count_[i] == 0) {
working_queue.push(i);
}
}
auto working_var_ref = vec_meta_info_;
size_t run_op_number = 0;
while (!working_queue.empty()) {
auto instr_id = working_queue.front();
working_queue.pop();
auto& instr_node = vec_instr[instr_id];
RunInstruction(instr_node, var_scope, place);
auto& next_instr = instr_node.next_instruction_.direct_run_;
++run_op_number;
for (auto next_i : next_instr) {
--working_dependecy_count[next_i];
if (working_dependecy_count[next_i] == 0) {
working_queue.push(next_i);
}
}
// GC infomation
auto& gc_check_list = instr_node.gc_check_var_list;
for (auto var_id : gc_check_list) {
--working_var_ref[var_id].var_ref_count_;
}
}
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
for (size_t i = 0; i < working_var_ref.size(); ++i) {
if (working_var_ref[i].var_ref_count_ != 0) {
std::cerr << " var ref is not zero " << i << std::endl;
}
}
}
void build_variable_scope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope) {
std::vector<size_t> InterpreterCore::MergeVector(
const std::vector<size_t>& first, const std::vector<size_t>& second) {
std::vector<size_t> out(first.size() + second.size());
std::merge(first.begin(), first.end(), second.begin(), second.end(),
out.begin());
std::vector<size_t>::iterator it;
it = std::unique(out.begin(), out.end());
out.resize(std::distance(out.begin(), it));
return out;
}
void InterpreterCore::BuildVariableScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope) {
auto& global_block = pdesc.Block(0);
for (auto& var : global_block.AllVars()) {
......@@ -167,19 +271,18 @@ void build_variable_scope(const framework::ProgramDesc& pdesc,
if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) {
var_scope->name2id[var->Name()] = var_scope->var_list.size();
auto v = new Variable();
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
}
auto v = new Variable();
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
}
}
void build_op_func_list(const framework::ProgramDesc& pdesc,
std::vector<OperatorBase*>* op_list,
std::vector<OpFuncNode>* vec_func_list,
VariableScope* var_scope,
const platform::Place& place) {
void InterpreterCore::BuildOpFuncList(const platform::Place& place,
const framework::ProgramDesc& pdesc,
std::vector<OperatorBase*>* op_list,
std::vector<OpFuncNode>* vec_func_list,
VariableScope* var_scope) {
auto& global_block = pdesc.Block(0);
for (auto& op : global_block.AllOps()) {
......@@ -291,7 +394,8 @@ void build_op_func_list(const framework::ProgramDesc& pdesc,
VariableNameMap copy_out_map;
copy_out_map["Out"] = {new_var_name};
AttributeMap attr_map;
attr_map["dst_place_type"] = convert(place);
attr_map["dst_place_type"] =
is_cpu_place(place) ? 0 : is_gpu_place(place) ? 1 : -1;
std::map<std::string, std::vector<int>> copy_ins_name2id;
copy_ins_name2id["X"] = ins_name2id[var_name_item.first];
......@@ -363,267 +467,5 @@ void build_op_func_list(const framework::ProgramDesc& pdesc,
}
}
class InterpreterCore {
public:
InterpreterCore(const platform::Place& place, const ProgramDesc& prog,
const ProgramDesc& startup_prog, Scope* scope)
: place_(place), prog_(prog), outer_scope_(scope) {
paddle::framework::InitDevices();
is_build_ = false;
if (outer_scope_ != nullptr) {
auto name_list = outer_scope_->LocalVarNames();
for (auto name : name_list) {
auto v = outer_scope_->Var(name);
if (global_scope.name2id.find(name) == global_scope.name2id.end()) {
global_scope.name2id[name] = global_scope.var_list.size();
}
global_scope.var_list.push_back(v);
}
}
paddle::framework::build_variable_outer_scope(startup_prog, &global_scope,
outer_scope_);
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
paddle::framework::build_op_func_list(
startup_prog, &op_list, &vec_func_list, &global_scope, place_);
// add variable to outer_scope
}
void run(const std::vector<std::string>& vec_name,
const std::vector<framework::Tensor>& vec_tensor,
const std::vector<std::string>& vec_fetch_name,
std::vector<framework::Tensor>* vec_out) {
if (is_build_ == false) {
paddle::framework::build_variable_scope(prog_, &global_scope);
}
for (size_t i = 0; i < vec_name.size(); ++i) {
auto it = global_scope.name2id.find(vec_name[i]);
assert(it != global_scope.name2id.end());
auto feed_tensor =
global_scope.var_list[it->second]->GetMutable<framework::LoDTensor>();
feed_tensor->ShareDataWith(vec_tensor[i]);
}
if (is_build_ == false) {
paddle::framework::build_op_func_list(prog_, &op_list, &vec_func_list,
&global_scope, place_);
is_build_ = true;
// convert vec func_list to graph
convert();
} else {
exec_instruction_list(vec_instruction_, global_scope, place_);
}
for (size_t i = 0; i < vec_fetch_name.size(); ++i) {
auto it = global_scope.name2id.find(vec_fetch_name[i]);
assert(it != global_scope.name2id.end());
PADDLE_ENFORCE_NE(it, global_scope.name2id.end(),
platform::errors::NotFound(
"Can't find (%d) the fetch var (%s) in scope", i,
vec_fetch_name[i]));
auto fetch_tensor =
global_scope.var_list[it->second]->GetMutable<framework::LoDTensor>();
if (platform::is_gpu_place(fetch_tensor->place())) {
Tensor out;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place_);
dev_ctx->Wait();
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
dev_ctx->Wait();
vec_out->push_back(out);
} else {
Tensor out;
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
vec_out->push_back(out);
}
}
}
private:
void convert() {
input_var2op_info_.resize(global_scope.var_list.size());
vec_instruction_.reserve(vec_func_list.size());
dependecy_count_.resize(vec_func_list.size());
global_scope.vec_meta_info_.resize(global_scope.var_list.size());
for (size_t i = 0; i < vec_func_list.size(); ++i) {
Instruction temp_inst;
temp_inst.kernel_func_.compute_func_ = vec_func_list[i].kernel_func_;
temp_inst.kernel_func_.operator_base_ = op_list[i];
temp_inst.input_index_ = vec_func_list[i].input_index;
temp_inst.output_index_ = vec_func_list[i].output_index;
std::vector<size_t> gc_check_input_list;
for (auto& item : vec_func_list[i].input_index) {
for (auto id : item.second) {
input_var2op_info_[id].push_back(i);
gc_check_input_list.push_back(id);
}
}
std::sort(gc_check_input_list.begin(), gc_check_input_list.end());
auto last =
std::unique(gc_check_input_list.begin(), gc_check_input_list.end());
gc_check_input_list.erase(last, gc_check_input_list.end());
for (auto var_id : gc_check_input_list) {
global_scope.vec_meta_info_[var_id].var_ref_count_++;
}
temp_inst.gc_check_var_list.swap(gc_check_input_list);
vec_instruction_.push_back(temp_inst);
}
for (size_t i = 0; i < vec_instruction_.size(); ++i) {
std::vector<size_t> vec_temp;
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
vec_temp = merge_vec(vec_temp, input_var2op_info_[id]);
}
}
// In Program, op order is a very import information.
// Op can noly add op after it as next as next ops.
std::vector<size_t> filter_next;
filter_next.reserve(vec_temp.size());
for (auto item : vec_temp) {
if (item > i) {
filter_next.push_back(item);
}
}
vec_instruction_[i].next_instruction_.direct_run_ = filter_next;
// checkout ouput
for (auto& item : vec_instruction_[i].output_index_) {
for (auto id : item.second) {
if (input_var2op_info_[id].size() == 0) {
// output var not be used by any kernel
vec_instruction_[i].gc_check_var_list.push_back(id);
global_scope.vec_meta_info_[id].var_ref_count_++;
}
}
}
for (auto inst_id : filter_next) {
dependecy_count_[inst_id]++;
}
}
}
void run_instr(const Instruction& instr_node, const VariableScope& var_scope,
const platform::Place& place) {
auto op_base = instr_node.kernel_func_.operator_base_;
// build runtime cost
VariableValueMap ins_map;
for (auto& var_name_item : instr_node.input_index_) {
std::vector<Variable*> input_vars;
input_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
input_vars.emplace_back(var_scope.var_list[id]);
}
ins_map.emplace(var_name_item.first, std::move(input_vars));
}
VariableValueMap outs_map;
for (auto& var_name_item : instr_node.output_index_) {
std::vector<Variable*> out_vars;
out_vars.reserve(var_name_item.second.size());
for (auto& id : var_name_item.second) {
out_vars.emplace_back(var_scope.var_list[id]);
}
outs_map.emplace(var_name_item.first, std::move(out_vars));
}
RuntimeContext runtime_context({}, {});
runtime_context.inputs.swap(ins_map);
runtime_context.outputs.swap(outs_map);
RuntimeInferShapeContext infer_shape_ctx(*op_base, runtime_context);
static_cast<const framework::OperatorWithKernel*>(op_base)->InferShape(
&infer_shape_ctx);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
Scope scope;
auto exec_context =
ExecutionContext(*op_base, scope, *dev_ctx, runtime_context);
instr_node.kernel_func_.compute_func_(exec_context);
}
void exec_instruction_list(const std::vector<Instruction>& vec_instr,
const VariableScope& var_scope,
const platform::Place& place) {
std::queue<size_t> working_queue;
auto working_dependecy_count = dependecy_count_;
for (size_t i = 0; i < dependecy_count_.size(); ++i) {
if (dependecy_count_[i] == 0) {
working_queue.push(i);
}
}
auto working_var_ref = global_scope.vec_meta_info_;
size_t run_op_number = 0;
while (!working_queue.empty()) {
auto instr_id = working_queue.front();
working_queue.pop();
auto& instr_node = vec_instr[instr_id];
run_instr(instr_node, var_scope, place);
auto& next_instr = instr_node.next_instruction_.direct_run_;
++run_op_number;
for (auto next_i : next_instr) {
--working_dependecy_count[next_i];
if (working_dependecy_count[next_i] == 0) {
working_queue.push(next_i);
}
}
// GC infomation
auto& gc_check_list = instr_node.gc_check_var_list;
for (auto var_id : gc_check_list) {
--working_var_ref[var_id].var_ref_count_;
}
}
for (size_t i = 0; i < working_var_ref.size(); ++i) {
if (working_var_ref[i].var_ref_count_ != 0) {
cerr << " var ref is not zero " << i << endl;
}
}
}
const platform::Place& place_;
const ProgramDesc& prog_;
paddle::framework::VariableScope global_scope;
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
bool is_build_;
std::vector<Instruction> vec_instruction_;
InstructionInfo instruction_info_;
std::vector<size_t> dependecy_count_;
std::vector<VariableMetaInfo> ref_coun_info;
std::vector<std::vector<size_t>> input_var2op_info_;
Scope* outer_scope_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <queue>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/new_executor/interpretercore_util.h"
#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace framework {
class InterpreterCore {
public:
InterpreterCore(const platform::Place& place, const ProgramDesc& main_prog,
VariableScope* global_scope,
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names);
void Run(const std::vector<framework::Tensor>& feed_tensors,
std::vector<framework::Tensor>* fetch_tensors);
static void BuildOpFuncList(const platform::Place& place,
const framework::ProgramDesc& pdesc,
std::vector<OperatorBase*>* op_list,
std::vector<OpFuncNode>* vec_func_list,
VariableScope* var_scope);
private:
void Convert();
void RunInstruction(const Instruction& instr_node,
const VariableScope& var_scope,
const platform::Place& place);
void ExecuteInstructionList(const std::vector<Instruction>& vec_instr,
const VariableScope& var_scope,
const platform::Place& place);
std::vector<size_t> MergeVector(const std::vector<size_t>& first,
const std::vector<size_t>& second);
void BuildVariableScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope);
const platform::Place& place_;
const ProgramDesc& main_program_;
VariableScope* global_scope_;
std::vector<VariableMetaInfo> vec_meta_info_;
std::vector<paddle::framework::OpFuncNode> vec_func_list_;
std::vector<paddle::framework::OperatorBase*> op_list_;
std::vector<Instruction> vec_instruction_;
InstructionInfo instruction_info_;
std::vector<size_t> dependecy_count_;
std::vector<VariableMetaInfo> ref_coun_info_;
std::vector<std::vector<size_t>> input_var2op_info_;
bool is_build_;
std::vector<std::string> feed_names_;
std::vector<std::string> fetch_names_;
};
} // namespace framework
} // namespace paddle
......@@ -13,7 +13,7 @@
// limitations under the License.
/*************************************************************************
> File Name: new_exec_util.h
> File Name: interpretercore_util.h
> Author: guanshanshan@baidu.com
> Created Time: Fri 23 Jul 2021 06:19:19 AM UTC
************************************************************************/
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace framework {
using OpKernelComputeFunc = std::function<void(const ExecutionContext&)>;
using OpKernelMap =
std::unordered_map<OpKernelType, OpKernelComputeFunc, OpKernelType::Hash>;
struct OpKernelFunc {
OpKernelComputeFunc compute_func_;
OperatorBase* operator_base_;
};
struct VariableMetaInfo {
int var_ref_count_;
};
struct VariableScope {
std::vector<Variable*> var_list;
std::map<std::string, int> name2id;
};
struct NextInstruction {
std::vector<size_t> direct_run_;
};
struct EventInter {};
struct InstructionInfo {
std::vector<size_t> dependecy_count_;
};
struct EventRun {
EventInter event_inter;
std::vector<size_t> same_device_run_;
std::vector<size_t> synchronized_run;
};
struct Instruction {
OpKernelFunc kernel_func_;
std::map<std::string, std::vector<int>> input_index_;
std::map<std::string, std::vector<int>> output_index_;
std::vector<size_t> gc_check_var_list;
NextInstruction next_instruction_;
std::vector<EventInter> vec_event_list_;
};
struct OpFuncNode {
// int unsed;
std::map<std::string, std::vector<int>> input_index;
std::map<std::string, std::vector<int>> output_index;
OpKernelComputeFunc kernel_func_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
namespace paddle {
namespace framework {
StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
const ProgramDesc& startup_prog,
const ProgramDesc& main_prog,
Scope* scope)
: place_(place),
startup_prog_(startup_prog),
main_prog_(main_prog),
outer_scope_(scope) {
paddle::framework::InitDevices();
// init scope
BuildVariableOuterScope(startup_prog, &global_scope_, scope);
if (outer_scope_ != nullptr) {
auto name_list = outer_scope_->LocalVarNames();
for (auto name : name_list) {
auto v = outer_scope_->Var(name);
if (global_scope_.name2id.find(name) == global_scope_.name2id.end()) {
global_scope_.name2id[name] = global_scope_.var_list.size();
}
global_scope_.var_list.push_back(v);
}
}
// run startup program
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
InterpreterCore::BuildOpFuncList(place_, startup_prog, &op_list,
&vec_func_list, &global_scope_);
}
int StandaloneExecutor::Run(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors,
const std::vector<std::string>& fetch_names,
std::vector<framework::Tensor>* fetch_tensors) {
auto core = GetInterpreterCore(feed_names, fetch_names);
core->Run(feed_tensors, fetch_tensors);
return 0;
}
void StandaloneExecutor::BuildVariableOuterScope(
const framework::ProgramDesc& pdesc, VariableScope* var_scope,
Scope* outer_scope) {
auto& global_block = pdesc.Block(0);
for (auto& var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
if (var_scope->name2id.find(var->Name()) == var_scope->name2id.end()) {
var_scope->name2id[var->Name()] = var_scope->var_list.size();
auto v = outer_scope->Var(var->Name());
InitializeVariable(v, var->GetType());
var_scope->var_list.push_back(v);
}
}
}
std::shared_ptr<InterpreterCore> StandaloneExecutor::GetInterpreterCore(
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names) {
std::ostringstream oss;
oss << "feed:";
for (auto& feedname : feed_names) {
oss << feedname << ",";
}
oss << "fetch:";
for (auto& fetchname : fetch_names) {
oss << fetchname << ",";
}
auto iter = interpretercores_.find(oss.str());
if (iter == interpretercores_.end()) {
auto core = std::make_shared<InterpreterCore>(
place_, main_prog_, &global_scope_, feed_names, fetch_names);
interpretercores_.emplace(oss.str(), core);
return core;
} else {
return iter->second;
}
}
} // namespace framework
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/new_executor/interpretercore.h"
namespace paddle {
namespace framework {
class ExecutorBase {
public:
virtual ~ExecutorBase() {}
virtual int Run(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors,
const std::vector<std::string>& fetch_names,
std::vector<framework::Tensor>* fetch_tensors) = 0;
};
class StandaloneExecutor : public ExecutorBase {
public:
StandaloneExecutor(const platform::Place& place,
const ProgramDesc& startup_prog,
const ProgramDesc& main_prog, Scope* scope);
~StandaloneExecutor() {}
virtual int Run(const std::vector<std::string>& feed_names,
const std::vector<framework::Tensor>& feed_tensors,
const std::vector<std::string>& fetch_names,
std::vector<framework::Tensor>* fetch_tensors);
private:
void BuildVariableOuterScope(const framework::ProgramDesc& pdesc,
VariableScope* var_scope, Scope* outer_scope);
std::shared_ptr<InterpreterCore> GetInterpreterCore(
const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names);
const platform::Place& place_;
const ProgramDesc& startup_prog_;
const ProgramDesc& main_prog_;
Scope* outer_scope_;
VariableScope global_scope_;
std::unordered_map<std::string, std::shared_ptr<InterpreterCore>>
interpretercores_;
};
} // namespace framework
} // namespace paddle
......@@ -21,68 +21,44 @@
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/executor_gc_helper.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/pybind/pybind.h"
paddle::framework::ProgramDesc load_from_file(const std::string& file_name) {
std::ifstream fin(file_name, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
std::string buffer(fin.tellg(), ' ');
fin.seekg(0, std::ios::beg);
fin.read(&buffer[0], buffer.size());
fin.close();
#include "gperftools/profiler.h"
#include "paddle/fluid/framework/new_exec.h"
#include "paddle/fluid/platform/init.h"
paddle::framework::ProgramDesc program_desc(buffer);
return program_desc;
}
int main() {
paddle::framework::InitDevices();
paddle::framework::VariableScope global_scope;
auto place = paddle::platform::CUDAPlace(0);
auto test_prog = paddle::framework::load_from_file("lm_startup_program");
{
paddle::framework::build_variable_scope(test_prog, &global_scope);
std::vector<paddle::framework::OpFuncNode> vec_func_list;
std::vector<paddle::framework::OperatorBase*> op_list;
paddle::framework::build_op_func_list(test_prog, op_list, vec_func_list,
&global_scope, place);
// paddle::framework::exec_op_func_list( vec_func_list, op_list,
// global_scope, place );
}
auto test_prog = load_from_file("lm_startup_program");
cerr << "run main" << endl;
auto main_prog = paddle::framework::load_from_file("lm_main_program");
auto main_prog = load_from_file("lm_main_program");
paddle::framework::build_variable_scope(main_prog, &global_scope);
std::vector<paddle::framework::OpFuncNode> vec_main_func_list;
std::vector<paddle::framework::OperatorBase*> op_main_list;
paddle::framework::build_op_func_list(
main_prog, op_main_list, vec_main_func_list, &global_scope, place);
paddle::framework::Scope scope;
paddle::framework::InterpreterCore interp_core(place, main_prog, test_prog,
&scope);
paddle::framework::StandaloneExecutor exec(place, test_prog, main_prog,
&scope);
auto start = std::chrono::steady_clock::now();
ProfilerStart("new_executor.prof");
for (size_t i = 0; i < 2320; ++i) {
if (i % 200 == 0) {
cerr << i << endl;
std::cout << i << std::endl;
}
// paddle::framework::exec_op_func_list( vec_main_func_list, op_main_list,
// global_scope, place );
std::vector<paddle::framework::Tensor> vec_out;
interp_core.run({}, {}, {}, vec_out);
exec.Run({}, {}, {}, &vec_out);
}
ProfilerStop();
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> diff = end - start;
cerr << "time cost " << diff.count() << endl;
std::cout << "time cost " << diff.count() << std::endl;
return 1;
}
......@@ -200,6 +200,7 @@ if(WITH_PYTHON)
endif(WIN32)
add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})
list(APPEND PYBIND_DEPS interpretercore standalone_executor)
cc_library(paddle_pybind SHARED
SRCS ${PYBIND_SRCS}
DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
......
......@@ -42,7 +42,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/new_exec.h"
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
......@@ -1945,30 +1945,30 @@ All parameter, weight, gradient are variables in Paddle.
fetch_vars);
});
py::class_<framework::InterpreterCore>(m, "InterpreterCore")
py::class_<framework::StandaloneExecutor>(m, "StandaloneExecutor")
.def(py::init<const platform::Place &, const ProgramDesc &,
const ProgramDesc &, Scope *>())
.def("run",
[](InterpreterCore &self,
[](StandaloneExecutor &self,
const std::unordered_map<std::string, py::array> &input_dict,
std::vector<std::string> vec_fetch_name) {
std::vector<std::string> fetch_names) {
pybind11::gil_scoped_release release;
std::vector<framework::Tensor> vec_tensor;
std::vector<std::string> vec_name;
std::vector<framework::Tensor> feed_tensors;
std::vector<std::string> feed_names;
for (auto &item : input_dict) {
framework::LoDTensor t;
SetTensorFromPyArray<platform::CPUPlace>(
&t, item.second, platform::CPUPlace(), false);
vec_name.push_back(item.first);
vec_tensor.push_back(t);
feed_names.push_back(item.first);
feed_tensors.push_back(t);
}
std::vector<framework::Tensor> vec_out;
self.run(vec_name, vec_tensor, vec_fetch_name, &vec_out);
std::vector<framework::Tensor> fetch_tensors;
self.Run(feed_names, feed_tensors, fetch_names, &fetch_tensors);
std::vector<py::array> vec_ret;
for (size_t i = 0; i < vec_out.size(); ++i) {
vec_ret.push_back(TensorToPyArray(vec_out[i], true));
for (size_t i = 0; i < fetch_tensors.size(); ++i) {
vec_ret.push_back(TensorToPyArray(fetch_tensors[i], true));
}
return vec_ret;
});
......
......@@ -15,7 +15,7 @@
import unittest
import paddle
from paddle.fluid import core
from paddle.fluid.core import InterpreterCore
from paddle.fluid.core import StandaloneExecutor
import numpy as np
......@@ -37,19 +37,25 @@ class LinearTestCase(unittest.TestCase):
startup_program = paddle.fluid.default_startup_program()
p = core.Place()
p.set_place(self.place)
inter_core = InterpreterCore(p, main_program.desc, startup_program.desc,
core.Scope())
standaloneexecutor = StandaloneExecutor(p, startup_program.desc,
main_program.desc, core.Scope())
out = inter_core.run({
out = standaloneexecutor.run({
"a": np.ones(
[2, 2], dtype="float32") * 2
}, [c.name])
for i in range(10):
out = inter_core.run({
out = standaloneexecutor.run({
"a": np.ones(
[2, 2], dtype="float32") * i
}, [c.name])
for i in range(10):
out = standaloneexecutor.run({
"a": np.ones(
[2, 2], dtype="float32") * i
}, [a.name, c.name])
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册