未验证 提交 754ab705 编写于 作者: T TeFeng Chen 提交者: GitHub

support inplaced variable in cinn_launch (#49912)

* support inplaced variable in cinn_launch

* fix error hint when compiling

* fix inplaced output variable of the subgraph

* skip CinnCompiler check

* using existed definition

* fix namespace reference error

* modify error message

* update cinn tage

* fix namespace

* skip enforce check

* fix unittest attribute throw
上级 7122760a
......@@ -17,8 +17,8 @@ if(NOT WITH_CINN)
endif()
if(NOT CINN_GIT_TAG)
# 2023.01.12 commit
set(CINN_GIT_TAG 5d1ae0f4b8e3f7cd5b16dfc76d2161bf77e938ac)
# 2023.01.28 commit
set(CINN_GIT_TAG 1449890f7724babf2a343c6f8073bd28a7bbc683)
endif()
message(STATUS "CINN version: " ${CINN_GIT_TAG})
......
......@@ -26,6 +26,7 @@
#include "cinn/auto_schedule/tuning.h"
#include "cinn/common/target.h"
#include "cinn/common/type.h"
#include "cinn/frontend/op_mapper_registry.h"
#include "cinn/frontend/optimize.h"
#include "cinn/frontend/syntax.h"
#include "cinn/hlir/framework/graph.h"
......@@ -54,6 +55,7 @@ namespace paddle2cinn {
using ::cinn::auto_schedule::AutoTuner;
using ::cinn::common::Target;
using ::cinn::frontend::Optimize;
using ::cinn::frontend::paddle::InplaceOutSuffix;
using ::cinn::hlir::framework::BuildScope;
using ::cinn::hlir::framework::GraphCompiler;
using inference::analysis::Dot;
......@@ -239,11 +241,17 @@ void CinnCompiler::CheckCompiledValid(
const std::map<std::string, const phi::DenseTensor *> &input_tensors,
const CinnCompiledObject &compiled_obj) const {
const auto &input_var_names = graph.Get<std::vector<std::string>>(kInputVars);
const auto &inplace_var_names =
graph.Get<std::unordered_set<std::string>>(kInplaceVarNames);
const auto &output_var_names =
graph.Get<std::vector<std::string>>(kOutputVars);
auto *launch_context = compiled_obj.launch_context.get();
// 1. check all of the output variables will be assigned by compiled program
for (auto &&var_name : output_var_names) {
for (auto var_name : output_var_names) {
// inplace variables are renamed with a specified suffix
if (inplace_var_names.count(var_name)) {
var_name += InplaceOutSuffix;
}
PADDLE_ENFORCE_EQ(launch_context->IsVariableUsed(var_name),
true,
platform::errors::PreconditionNotMet(
......
......@@ -59,7 +59,8 @@ class CinnInstructionRunOpKernel : public framework::OpKernel<T> {
auto share_argument_buffer_fn = [launch_context,
&ctx](const std::string& var_name) {
cinn_buffer_t* buffer = launch_context->GetCinnBufferOfVar(var_name);
framework::Variable* var = ctx.scope().GetVar(var_name);
std::string revise_var_name = launch_context->RedirectVarName(var_name);
framework::Variable* var = ctx.scope().GetVar(revise_var_name);
auto* tensor = var->template GetMutable<phi::DenseTensor>();
buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
ctx.GetPlace(),
......
......@@ -19,6 +19,7 @@
#include <utility>
#include <vector>
#include "cinn/frontend/op_mapper_registry.h"
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/instruction.h"
#include "cinn/hlir/framework/scope.h"
......@@ -50,6 +51,8 @@ using framework::ParallelExecutor;
using framework::Scope;
using CinnInstruction = ::cinn::hlir::framework::Instruction;
using CinnRuntimeProgram = ::cinn::hlir::framework::Program;
using ::cinn::frontend::paddle::InplaceOutSuffix;
using framework::paddle2cinn::kInplaceVarNames;
using framework::paddle2cinn::kMemOptVarInfoFromMainGraph;
using framework::paddle2cinn::kSkipGcVarNames;
using framework::paddle2cinn::Name2VarInfoMap;
......@@ -72,6 +75,8 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
graph.Get<std::vector<std::string>>(framework::paddle2cinn::kInputVars);
const auto& output_var_names =
graph.Get<std::vector<std::string>>(framework::paddle2cinn::kOutputVars);
inplace_var_names_ =
graph.Get<std::unordered_set<std::string>>(kInplaceVarNames);
internal_var_names_ =
ExtractInternalVarNames(input_var_names, output_var_names);
// initialize all execution arguments
......@@ -83,7 +88,13 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
}
}
for (auto&& var_name : output_var_names) {
AssignExternalVariable(var_name);
if (inplace_var_names_.count(var_name)) {
VLOG(4) << "Inplaced variable:" << var_name << " -> "
<< var_name + InplaceOutSuffix << " as paddle2cinn varmap key";
AssignExternalVariable(var_name + InplaceOutSuffix);
} else {
AssignExternalVariable(var_name);
}
}
for (auto&& var_name : internal_var_names_) {
AssignInternalVariable(var_name);
......@@ -124,14 +135,13 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
"Distribution of variables in the graph compiled:"
"input[%lu],internal[%lu],output[%lu],"
"outer_eager_deletion[%lu],skip_eager_deletion[%lu],"
"skip_gc_vars_[%lu],initialized_beforehand[%lu]",
"skip_gc_vars_[%lu]",
input_var_names.size(),
internal_var_names_.size(),
output_var_names.size(),
outer_varinfo.size(),
skip_eager_vars_.size(),
skip_gc_vars_.size(),
initialized_beforehand_vars_.size());
skip_gc_vars_.size());
}
void CinnLaunchContext::BuildVarNameMap(
......@@ -214,8 +224,12 @@ std::unordered_set<std::string> CinnLaunchContext::ExtractInternalVarNames(
[](const auto& name_pair) { return name_pair.first; });
// exclude the input variables and output variables
auto exclude_names_fn = [&remain_var_names](const std::string& var_name) {
auto exclude_names_fn = [this,
&remain_var_names](const std::string& var_name) {
remain_var_names.erase(var_name);
if (inplace_var_names_.count(var_name)) {
remain_var_names.erase(var_name + InplaceOutSuffix);
}
};
std::for_each(
input_var_names.begin(), input_var_names.end(), exclude_names_fn);
......@@ -281,11 +295,12 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
platform::errors::InvalidArgument(
"Variable(%s) not applied in cinn", var_name));
auto* cinn_buffer = GetCinnBufferOfVar(var_name);
std::string revise_var_name = RedirectVarName(var_name);
// assign external malloc/free callbacks of cinn_buffer_t
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[this, var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor =
cached_scope_->GetVar(var_name)->GetMutable<phi::DenseTensor>();
[this, revise_var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor = cached_scope_->GetVar(revise_var_name)
->GetMutable<phi::DenseTensor>();
tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
*cached_place_,
......@@ -307,11 +322,12 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
platform::errors::InvalidArgument(
"Variable(%s) not applied in cinn", var_name));
auto* cinn_buffer = GetCinnBufferOfVar(var_name);
std::string revise_var_name = RedirectVarName(var_name);
// assign external malloc/free callbacks of cinn_buffer_t
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[this, var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor =
cached_temp_scope_->Var(var_name)->GetMutable<phi::DenseTensor>();
[this, revise_var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor = cached_temp_scope_->Var(revise_var_name)
->GetMutable<phi::DenseTensor>();
tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
*cached_place_,
......@@ -322,8 +338,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
// internal variables should release its buffer immediately
// if no instruction use it
cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
[this, var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor = cached_temp_scope_->GetVar(var_name)
[this, revise_var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor = cached_temp_scope_->GetVar(revise_var_name)
->GetMutable<phi::DenseTensor>();
tensor->clear();
return 0;
......@@ -359,7 +375,6 @@ std::unique_ptr<framework::ProgramDesc> CinnLaunchContext::BuildCompiledProgram(
// are set by values of the corresponding compiled tensors,
// including the in/out variables where the equiality between their tensors
// and the CINN compiled ones is verified in corresponding cinn_launch_op.
std::unordered_set<std::string> has_refer_vars;
for (auto&& arg : cinn_argument_names_) {
const std::string& var_name = cinn2paddle_varmap_.at(arg);
framework::VarDesc* var_desc = block->Var(var_name);
......@@ -370,7 +385,6 @@ std::unique_ptr<framework::ProgramDesc> CinnLaunchContext::BuildCompiledProgram(
auto* ori_desc = res->second;
var_desc->SetPersistable(ori_desc->Persistable());
var_desc->SetIsParameter(ori_desc->IsParameter());
has_refer_vars.insert(var_name);
}
auto cinn_tensor = GetCinnTensorOfVar(var_name);
......@@ -404,13 +418,6 @@ std::unique_ptr<framework::ProgramDesc> CinnLaunchContext::BuildCompiledProgram(
auto* ins = instructions.at(ins_idx).get();
auto in_args = trans_and_pack_args_fn(ins->GetInArgs());
auto out_args = trans_and_pack_args_fn(ins->GetOutArgs());
for (auto&& var_name : in_args) {
if (!has_refer_vars.count(var_name)) {
initialized_beforehand_vars_.emplace_back(var_name);
}
}
has_refer_vars.insert(out_args.begin(), out_args.end());
auto* op_desc = block->AppendOp();
op_desc->SetType("cinn_instruction_run");
op_desc->SetInput(kX, in_args);
......@@ -453,14 +460,6 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
framework::proto::VarType::LOD_TENSOR);
}
for (auto&& var_name : initialized_beforehand_vars_) {
auto* var = scope->GetVar(var_name);
auto* buffer = GetCinnBufferOfVar(var_name);
auto dim = framework::DDim(buffer->dims, buffer->dimensions);
var->GetMutable<phi::DenseTensor>()->Resize(dim);
var->GetMutable<phi::DenseTensor>()->mutable_data(
place, framework::paddle2cinn::TransToPaddleDataType(buffer->type));
}
return parallel_executor_.get();
}
......@@ -493,17 +492,24 @@ framework::InterpreterCore* CinnLaunchContext::InitializeInterpreterCore(
}
UpdateCapturedEnv(*scope, place);
}
for (auto&& var_name : initialized_beforehand_vars_) {
auto* var = scope->GetVar(var_name);
auto* buffer = GetCinnBufferOfVar(var_name);
auto dim = framework::DDim(buffer->dims, buffer->dimensions);
var->GetMutable<phi::DenseTensor>()->Resize(dim);
var->GetMutable<phi::DenseTensor>()->mutable_data(
place, framework::paddle2cinn::TransToPaddleDataType(buffer->type));
}
return interpreter_core_.get();
}
std::string CinnLaunchContext::RedirectVarName(const std::string& var_name) {
auto pos = var_name.find(InplaceOutSuffix);
if (pos == std::string::npos) {
return var_name;
}
std::string remove_suffix_name = var_name.substr(0, pos);
if (!inplace_var_names_.count(remove_suffix_name)) {
LOG(WARNING) << "Variable:" << remove_suffix_name
<< " was not marked as inplaced by Paddle, but CINN does";
}
VLOG(4) << "Inplaced variable:" << var_name << " redirect to "
<< remove_suffix_name;
return remove_suffix_name;
}
cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar(
const std::string& var_name) {
auto res = paddle2argument_.find(var_name);
......
......@@ -96,6 +96,9 @@ class CinnLaunchContext {
return skip_eager_vars_;
}
// Redirect the name of a Paddle variable to the orignal if it was inplaced
std::string RedirectVarName(const std::string& var_name);
// Return internal variable names list
const std::unordered_set<std::string>& GetInternalVarNames() const {
return internal_var_names_;
......@@ -151,11 +154,13 @@ class CinnLaunchContext {
std::unordered_map<std::string, std::string> cinn2paddle_varmap_;
// a list of internal variable names in Paddle
std::unordered_set<std::string> internal_var_names_;
// In CINN, there are two variables(in/out) mapped to the one inplaced
// variable of Paddle. To resovle this conflict, we add a output counterpart
// in Paddle with the name suffixed by @InplaceOut.
// This set stores which Paddle variable names are inplaced.
std::unordered_set<std::string> inplace_var_names_;
// the names of the cinn arguments used in compiled executable program
std::unordered_set<std::string> cinn_argument_names_;
// TODO(CtfGo): remove this list after fixing batch_norm bug
// due to duplicate association in the same variable.
std::vector<std::string> initialized_beforehand_vars_;
// the variable scope compiled from cinn
const std::shared_ptr<CinnScope> cinn_scope_;
......
......@@ -90,6 +90,8 @@ const Graph& InitDefaultSubgraph() {
new std::vector<std::string>({"var5"}));
graph->GetOrInit<Name2VarInfoMap>(
framework::paddle2cinn::kMemOptVarInfoFromMainGraph);
graph->GetOrInit<std::unordered_set<std::string>>(
framework::paddle2cinn::kInplaceVarNames);
});
return *graph.get();
}
......
......@@ -84,6 +84,8 @@ std::unique_ptr<Graph> CreateOnlyElementwiseAddGraph(
new std::vector<std::string>({out_name}));
g->GetOrInit<Name2VarInfoMap>(
framework::paddle2cinn::kMemOptVarInfoFromMainGraph);
g->GetOrInit<std::unordered_set<std::string>>(
framework::paddle2cinn::kInplaceVarNames);
return g;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册