未验证 提交 4d042a83 编写于 作者: T TeFeng Chen 提交者: GitHub

build a Paddle Graph from CINN compiled program for execution with PE (#39724)

* build a Paddle Graph from CINN compiled program for execution with PE

* update names of some variables

* fix random fail in build_cinn_pass_test and update some comments

* fix compiler error by merging phi pr
上级 df0b4434
......@@ -44,11 +44,6 @@ DECLARE_string(deny_cinn_ops);
namespace paddle {
namespace framework {
namespace ir {
class MemOptVarInfo;
} // namespace ir
namespace paddle2cinn {
using framework::ir::Graph;
......@@ -398,9 +393,7 @@ std::unique_ptr<Graph> CreateNewSubGraph(const GraphNodeSet& cluster,
kNoNeedBufferFeeds, no_need_buffer_feeds.release());
// initialize empty map for kMemOptVarInfoFromMainGraph attribute,
// it will be filled on the share_mem_opt_info_to_subgraph pass
subgraph->GetOrInit<std::unordered_map<
std::string, std::shared_ptr<framework::ir::MemOptVarInfo>>>(
kMemOptVarInfoFromMainGraph);
subgraph->GetOrInit<Name2VarInfoMap>(kMemOptVarInfoFromMainGraph);
return subgraph;
}
......
......@@ -18,6 +18,10 @@ limitations under the License. */
namespace paddle {
namespace framework {
namespace ir {
class MemOptVarInfo;
} // namespace ir
namespace paddle2cinn {
constexpr char kCinnLaunchOp[] = "cinn_launch";
......@@ -27,6 +31,9 @@ constexpr char kInternalVars[] = "InternalVars";
constexpr char kOutputVars[] = "OutputVars";
constexpr char kMemOptVarInfoFromMainGraph[] =
"mem_opt_var_info_from_main_graph";
using Name2VarInfoMap =
std::unordered_map<std::string,
std::shared_ptr<framework::ir::MemOptVarInfo>>;
// A pass named BuildCinnPass, the function of this pass is:
//
......
......@@ -255,7 +255,9 @@ TEST(BuildCinnPassTest, AllOpSupportCinn) {
ASSERT_EQ(
std::unordered_set<Node*>(cinn_op->inputs.begin(), cinn_op->inputs.end()),
std::unordered_set<Node*>({v0, v1, v2, v4}));
ASSERT_EQ(cinn_op->outputs, std::vector<Node*>({v6, v7}));
ASSERT_EQ(std::unordered_set<Node*>(cinn_op->outputs.begin(),
cinn_op->outputs.end()),
std::unordered_set<Node*>({v6, v7}));
ASSERT_EQ(v1->outputs, std::vector<Node*>({cinn_op}));
ASSERT_EQ(v6->inputs, std::vector<Node*>({cinn_op}));
......
......@@ -248,10 +248,10 @@ std::unique_ptr<CinnCompiledObject> CinnCompiler::CompileGraph(
*compiled_obj = {std::move(graph_compiler),
std::move(compiled_res.runtime_program), scope,
symbol.var_model_to_program_map()};
compiled_obj->launch_context =
std::make_unique<operators::details::CinnLaunchContext>(
compiled_obj->paddle2cinn_varmap, compiled_obj->scope);
compiled_obj->cached_index = compiled_num;
compiled_obj->launch_context =
std::make_unique<operators::details::CinnLaunchContext>(graph,
*compiled_obj);
return compiled_obj;
}
......
include(operators)
cc_library(cinn_op_helper SRCS cinn_op_helper.cc DEPS operator device_context)
cc_library(cinn_launch_context SRCS cinn_launch_context.cc DEPS ddim lod_tensor scope cinn)
cc_library(cinn_launch_context SRCS cinn_launch_context.cc DEPS ddim lod_tensor scope proto_desc graph build_strategy parallel_executor cinn)
SET(CINN_OP_DEPS string_helper cinn cinn_compiler cinn_op_helper cinn_launch_context)
register_operators(DEPS ${CINN_OP_DEPS})
if (WITH_TESTING)
cc_test(cinn_launch_context_test SRCS cinn_launch_context_test.cc DEPS ddim lod_tensor scope cinn_launch_context)
cc_test(cinn_launch_context_test SRCS cinn_launch_context_test.cc DEPS ddim lod_tensor scope proto_desc graph cinn_launch_context cinn_instruction_run_op cinn)
set_tests_properties(cinn_launch_context_test PROPERTIES LABELS "RUN_TYPE=CINN")
SET(CINN_RUN_ENVIRONMENT "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda")
......
......@@ -17,22 +17,39 @@
#include <functional>
#include <utility>
#include <vector>
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/instruction.h"
#include "cinn/hlir/framework/scope.h"
#include "cinn/hlir/framework/tensor.h"
#include "cinn/runtime/cinn_runtime.h"
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/cinn/cinn_op_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/core/ddim.h"
namespace paddle {
namespace operators::details {
using LoDTensor = framework::LoDTensor;
using framework::Scope;
using framework::LoDTensor;
using framework::ParallelExecutor;
using CinnInstruction = ::cinn::hlir::framework::Instruction;
using CinnRuntimeProgram = ::cinn::hlir::framework::Program;
using framework::paddle2cinn::Name2VarInfoMap;
using framework::paddle2cinn::kMemOptVarInfoFromMainGraph;
CinnLaunchContext::CinnLaunchContext(
const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
const std::shared_ptr<CinnScope>& cinn_scope)
: cinn_scope_(cinn_scope) {
// generate all names of the cinn execution arguments
CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
const CinnCompiledObject& compiled_obj)
: cinn_scope_(compiled_obj.scope) {
// collect all names of the CINN execution arguments
auto var_names = cinn_scope_->var_names();
cinn_argument_names_.reserve(var_names.size());
std::transform(
......@@ -40,7 +57,42 @@ CinnLaunchContext::CinnLaunchContext(
std::inserter(cinn_argument_names_, cinn_argument_names_.end()),
[](const auto& name_view) { return std::string(name_view.data()); });
// build name map between the original variables and compiled ones
BuildVarNameMap(paddle2cinn_varmap, cinn_argument_names_);
BuildVarNameMap(compiled_obj.paddle2cinn_varmap, cinn_argument_names_);
const auto& input_var_names =
graph.Get<std::vector<std::string>>(framework::paddle2cinn::kInputVars);
const auto& output_var_names =
graph.Get<std::vector<std::string>>(framework::paddle2cinn::kOutputVars);
internal_var_names_ =
ExtractInternalVarNames(input_var_names, output_var_names);
// check completeness of output variables in compiled result
for (auto&& var_name : output_var_names) {
PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
platform::errors::PreconditionNotMet(
"Variable(%s) not applied in CINN", var_name));
}
// initialize all execution arguments
InitializeArguments();
// DEPRECATED(CtfGo): following callback assignment will be deprecated soon
for (auto&& var_name : input_var_names) {
if (IsVariableUsed(var_name)) {
AssignExternalVariable(var_name);
}
}
for (auto&& var_name : output_var_names) {
AssignExternalVariable(var_name);
}
for (auto&& var_name : internal_var_names_) {
AssignInternalVariable(var_name);
}
// Convert the CINN runtime program to a Paddle graph
runtime_graph_ = std::make_unique<framework::ir::Graph>(
BuildCompiledProgram(graph, compiled_obj));
runtime_graph_->SetNotOwned<Name2VarInfoMap>(
kMemOptVarInfoFromMainGraph,
&graph.Get<Name2VarInfoMap>(kMemOptVarInfoFromMainGraph));
}
void CinnLaunchContext::BuildVarNameMap(
......@@ -94,21 +146,15 @@ void CinnLaunchContext::UpdateCapturedEnv(const framework::Scope& scope,
<< std::addressof(place);
}
bool CinnLaunchContext::IsArgumentsInitialized() const {
if (hold_buffers_.empty() || name2argument_.empty()) {
return false;
}
return true;
}
bool CinnLaunchContext::IsVariableUsed(const std::string& var_name) const {
return paddle2cinn_varmap_.count(var_name) > 0;
}
CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& arg_name) {
PADDLE_ENFORCE_GT(cinn_argument_names_.count(arg_name), 0,
platform::errors::InvalidArgument(
"Variable(%s) not found in cinn scope.", arg_name));
CinnTensor CinnLaunchContext::GetCinnTensorOfVar(const std::string& var_name) {
PADDLE_ENFORCE_EQ(
IsVariableUsed(var_name), true,
platform::errors::NotFound("Variable(%s) not applied in CINN", var_name));
const auto& arg_name = paddle2cinn_varmap_.at(var_name);
return cinn_scope_->GetTensor(arg_name);
}
......@@ -132,10 +178,13 @@ std::unordered_set<std::string> CinnLaunchContext::ExtractInternalVarNames(
return remain_var_names;
}
void CinnLaunchContext::CheckTensorEquivalent(const std::string& var_name,
const LoDTensor& paddle_tensor,
const CinnTensor& cinn_tensor) {
void CinnLaunchContext::CheckTensorEquivalent(
const std::string& var_name, const framework::LoDTensor& paddle_tensor) {
PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Variable(%s) not applied in cinn", var_name));
// check dimension
auto cinn_tensor = GetCinnTensorOfVar(var_name);
auto cinn_dims = phi::make_ddim(cinn_tensor->shape().data());
PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims,
platform::errors::PreconditionNotMet(
......@@ -146,22 +195,28 @@ void CinnLaunchContext::CheckTensorEquivalent(const std::string& var_name,
// TODO(CtfGo): check the underlying data type after CINN ready
}
void CinnLaunchContext::InitializeArguments() {
for (auto&& arg : cinn_argument_names_) {
auto cinn_buffer = std::make_unique<cinn_buffer_t>();
auto cinn_tensor = GetCinnTensorOfVar(cinn2paddle_varmap_.at(arg));
// assign dimensions with corresponding compiled tensor
cinn_buffer->resize(cinn_tensor->shape().data().data(),
cinn_tensor->shape().data().size());
VLOG(4) << string::Sprintf(
"Append an argument:name(%s),dims(%s),argument size:(%lu)", arg,
framework::DDim(cinn_buffer->dims, cinn_buffer->dimensions).to_str(),
name2argument_.size());
name2argument_.emplace(arg, cinn_buffer.get());
hold_buffers_.emplace_back(std::move(cinn_buffer));
}
}
void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Variable(%s) not applied in cinn", var_name));
const auto& cinn_arg_name = paddle2cinn_varmap_.at(var_name);
const auto& paddle_tensor = cached_scope_->GetVar(var_name)->Get<LoDTensor>();
CinnTensor cinn_tensor = GetCinnTensor(cinn_arg_name);
if (paddle_tensor.IsInitialized()) {
CheckTensorEquivalent(var_name, paddle_tensor, cinn_tensor);
}
auto cinn_buffer = std::make_unique<cinn_buffer_t>();
// assign dimensions and alloc/free callback of cinn_buffer_t
cinn_buffer->resize(cinn_tensor->shape().data().data(),
cinn_tensor->shape().data().size());
auto* cinn_buffer = GetCinnBufferOfVar(var_name);
// assign external malloc/free callbacks of cinn_buffer_t
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[this, var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor = cached_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
......@@ -177,22 +232,14 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
// Do nothing
return 0;
});
return AppendArgument(cinn_arg_name, std::move(cinn_buffer));
}
void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Variable(%s) not applied in cinn", var_name));
const auto& cinn_arg_name = paddle2cinn_varmap_.at(var_name);
CinnTensor cinn_tensor = GetCinnTensor(cinn_arg_name);
auto cinn_buffer = std::make_unique<cinn_buffer_t>();
// assign dimensions and alloc/free callback of cinn_buffer_t
cinn_buffer->resize(cinn_tensor->shape().data().data(),
cinn_tensor->shape().data().size());
auto* cinn_buffer = GetCinnBufferOfVar(var_name);
// assign external malloc/free callbacks of cinn_buffer_t
cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
[this, var_name](void* ctx, cinn_buffer_t* buffer) {
auto* tensor =
......@@ -212,30 +259,106 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
tensor->clear();
return 0;
});
return AppendArgument(cinn_arg_name, std::move(cinn_buffer));
}
void CinnLaunchContext::AppendArgument(
const std::string& arg_name, std::unique_ptr<cinn_buffer_t>&& buffer) {
name2argument_.emplace(arg_name, buffer.get());
hold_buffers_.emplace_back(std::move(buffer));
VLOG(4) << string::Sprintf(
"Append an argument:name(%s),dims(%s),argument size:(%lu)", arg_name,
framework::DDim(buffer->dims, buffer->dimensions).to_str(),
name2argument_.size());
framework::ProgramDesc CinnLaunchContext::BuildCompiledProgram(
const framework::ir::Graph& graph, const CinnCompiledObject& compiled_obj) {
CinnRuntimeProgram* runtime_program = compiled_obj.runtime_program.get();
// Step 0: Create an empty program_desc, there will be only one block
framework::ProgramDesc program_desc;
auto* block = program_desc.MutableBlock(0);
const std::vector<std::unique_ptr<CinnInstruction>>& instructions =
runtime_program->GetRunInstructions();
// build a map that links the name of a Paddle variable to its VarDesc
const std::unordered_set<framework::ir::Node*>& nodes = graph.Nodes();
std::unordered_map<std::string, framework::VarDesc*> original_vardescs;
for (auto* node : nodes) {
if (node->IsVar() && node->Var()) {
original_vardescs.emplace(node->Name(), node->Var());
}
}
// Step 1: Create a VarDesc for each execution argument:
// (1) For those variables that are input or output variables of the
// original subgraph, there must exist an original VarDesc, so
// we copy some useful info(such as IsParameter,Persistable)
// to the new VarDesc.
// (2) For all variables, the shape, data type of their VarDescs
// are set by values of the corresponding compiled tensors,
// including the in/out variables where the equiality between their tensors
// and the CINN compiled ones is verified in corresponding cinn_launch_op.
for (auto&& arg : cinn_argument_names_) {
const std::string& var_name = cinn2paddle_varmap_.at(arg);
framework::VarDesc* var_desc = block->Var(var_name);
var_desc->SetType(framework::proto::VarType::LOD_TENSOR);
auto res = original_vardescs.find(var_name);
if (res != original_vardescs.end()) {
auto* ori_desc = res->second;
var_desc->SetPersistable(ori_desc->Persistable());
var_desc->SetIsParameter(ori_desc->IsParameter());
}
auto cinn_tensor = GetCinnTensorOfVar(var_name);
// TODO(CtfGo): set the corresponding data type after CINN ready,
// currently set as FP32 in default
var_desc->SetDataType(framework::proto::VarType::FP32);
var_desc->SetShape(std::vector<int64_t>(cinn_tensor->shape().data().begin(),
cinn_tensor->shape().data().end()));
}
// transform names of the input or output arguments of a CINN instruction
// to the corresponding Paddle variable names, and repack them as one vector
auto trans_and_pack_args_fn =
[this](const std::vector<std::vector<std::string>>& cinn_args_array) {
std::vector<std::string> var_names;
for (auto&& cinn_args : cinn_args_array) {
for (auto&& arg : cinn_args) {
auto res = cinn2paddle_varmap_.find(arg);
PADDLE_ENFORCE_NE(
res, cinn2paddle_varmap_.end(),
platform::errors::NotFound("Argument(%s) not found", arg));
var_names.emplace_back(res->second);
}
}
return var_names;
};
// Step 2: create a VarDesc of cinn_instruction_run op for
// each CINN instruction and append it to the main block
for (auto ins_idx = 0; ins_idx < instructions.size(); ++ins_idx) {
auto* ins = instructions.at(ins_idx).get();
auto in_args = trans_and_pack_args_fn(ins->GetInArgs());
auto out_args = trans_and_pack_args_fn(ins->GetOutArgs());
auto* op_desc = block->AppendOp();
op_desc->SetType("cinn_instruction_run");
op_desc->SetInput(kX, in_args);
op_desc->SetOutput(kOutputs, out_args);
op_desc->SetAttr(kCachedIndex,
{static_cast<int64_t>(compiled_obj.cached_index)});
op_desc->SetAttr(kInstructionIndex, {static_cast<int64_t>(ins_idx)});
}
return program_desc;
}
const std::map<std::string, cinn_pod_value_t>&
CinnLaunchContext::FinalizeArguments() const {
// Check all execution parameters are assigned valued.
std::for_each(cinn_argument_names_.begin(), cinn_argument_names_.end(),
[this](const auto& arg_name) {
PADDLE_ENFORCE_GT(
name2argument_.count(arg_name), 0,
platform::errors::NotFound(
"Argument(%s) is missed for execution", arg_name));
});
return name2argument_;
ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
framework::Scope* scope) {
if (!parallel_executor_) {
framework::details::ExecutionStrategy exec_strategy;
framework::details::BuildStrategy build_strategy;
parallel_executor_ = std::make_unique<ParallelExecutor>(
place, scope, exec_strategy, build_strategy, runtime_graph_.get());
}
// update the scope bound to an OpHandle and rebuild temporary variables
std::unordered_map<Scope*, Scope*> scope_map = {
{parallel_executor_->GetLocalScopes().front(), scope}};
parallel_executor_->ResetOpHandleScopeMapOfGraphs(scope_map);
parallel_executor_->PrepareVariables(scope);
return parallel_executor_.get();
}
cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar(
......
......@@ -21,7 +21,7 @@
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h"
......@@ -35,10 +35,25 @@ class Program;
} // namespace cinn::hlir::framework
namespace paddle {
namespace framework {
class ProgramDesc;
class Scope;
class VarDesc;
namespace ir {
class Graph;
} // namespace ir
namespace paddle2cinn {
class CinnCompiledObject;
} // namespace paddle2cinn
} // namespace framework
namespace operators::details {
using CinnTensor = ::cinn::hlir::framework::Tensor;
using CinnScope = ::cinn::hlir::framework::Scope;
using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject;
// This class is used to cache some reusable data among repeated
// executions for efficiency and it also provides easy interfaces
......@@ -49,58 +64,71 @@ using CinnScope = ::cinn::hlir::framework::Scope;
// Variable while a CINN variable is called an Argument.
class CinnLaunchContext {
public:
explicit CinnLaunchContext(
const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
const std::shared_ptr<CinnScope>& cinn_scope);
explicit CinnLaunchContext(const framework::ir::Graph& graph,
const CinnCompiledObject& compiled_obj);
// Initialize a ParallelExecutor to execute the runtime graph,
// it will be constructed in the first call, and just update
// the execution scope in the following usage.
framework::ParallelExecutor* InitializePE(const platform::Place& place,
framework::Scope* scope);
// explicitly update several environment variables captured
// by callback of execution arguments
void UpdateCapturedEnv(const framework::Scope& scope,
const platform::Place& place);
// Return whether execution arguments has been initialized
bool IsArgumentsInitialized() const;
// Return whether a Paddle variable used in cinn execution
bool IsVariableUsed(const std::string& var_name) const;
// Assign tensor buffer to input or output variables
void AssignExternalVariable(const std::string& var_name);
// Assign tensor buffer to internal variables
void AssignInternalVariable(const std::string& var_name);
// Check the equiality in type and dimension between the tensor
// in Paddle and the compiled tensor returned by CINN of a same variable
void CheckTensorEquivalent(const std::string& var_name,
const framework::LoDTensor& paddle_tensor);
// Extract internal variable names from all applied variables
// in execution by excluding the input and output variables
std::unordered_set<std::string> ExtractInternalVarNames(
const std::vector<std::string>& input_var_names,
const std::vector<std::string>& output_var_names);
// Return internal variable names list
const std::unordered_set<std::string>& GetInternalVarNames() const {
return internal_var_names_;
}
// Finalize all execution arguments and return the name->argument map
const std::map<std::string, cinn_pod_value_t>& FinalizeArguments() const;
const std::map<std::string, cinn_pod_value_t>& FinalizeArguments() const {
return name2argument_;
}
// Return the cinn_buffer_t* of a specific variable
cinn_buffer_t* GetCinnBufferOfVar(const std::string& var_name);
private:
// Get CinnTensor with CINN argument name
CinnTensor GetCinnTensor(const std::string& arg_name);
// Get corresponding compiled tensor of a Paddle variable name
CinnTensor GetCinnTensorOfVar(const std::string& var_name);
// Build the name maps of paddle->cinn and cinn->paddle
// in reverse for all variables used in cinn execution
void BuildVarNameMap(
const std::unordered_map<std::string, std::string>& compiled_varmap,
const std::unordered_set<std::string>& argument_names);
// Check whether the tensor in Paddle and the compiled
// tensor returned by CINN of a same variable
// are equivalent in type and dimension
void CheckTensorEquivalent(const std::string& var_name,
const framework::LoDTensor& paddle_tensor,
const CinnTensor& cinn_tensor);
// Extract internal variable names from all applied variables
// in execution by excluding the input and output variables
std::unordered_set<std::string> ExtractInternalVarNames(
const std::vector<std::string>& input_var_names,
const std::vector<std::string>& output_var_names);
// Initialize each execution argument with a cinn_buffer_t
void InitializeArguments();
// Append an argument with (cinn name)->(cinn_buffer_t) pair
void AppendArgument(const std::string& arg_name,
std::unique_ptr<cinn_buffer_t>&& buffer);
// Assign tensor buffer to input or output variables
void AssignExternalVariable(const std::string& var_name);
// Assign tensor buffer to internal variables
void AssignInternalVariable(const std::string& var_name);
// Construct a Paddle ProgramDesc with the CINN runtime
// instructions included in the compiled CINN Program
framework::ProgramDesc BuildCompiledProgram(
const framework::ir::Graph& graph,
const CinnCompiledObject& compiled_obj);
private:
const framework::Scope* cached_scope_ = nullptr;
......@@ -111,16 +139,22 @@ class CinnLaunchContext {
std::unordered_map<std::string, std::string> paddle2cinn_varmap_;
// a name map from cinn execution arguments to paddle variables
std::unordered_map<std::string, std::string> cinn2paddle_varmap_;
// a list of internal variable names in Paddle
std::unordered_set<std::string> internal_var_names_;
// the names of the cinn arguments used in compiled executable program
std::unordered_set<std::string> cinn_argument_names_;
// the variable scope compiled from cinn
const std::shared_ptr<CinnScope> cinn_scope_;
// the ir::Graph object converted from the program compiled by CINN
std::unique_ptr<framework::ir::Graph> runtime_graph_;
// a ParallelExecutor to execute the runtime graph
std::unique_ptr<framework::ParallelExecutor> parallel_executor_;
// because a cinn_pod_value_t does not own a cinn_buffer_t object,
// an extra stroage is necessary to keep those objects and they can
// not be released until the runtime program finish execution.
std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers_;
// this map saves all execution arguments with their cinn names as key,
// and it is passed to the Execute interface of a cinn runtime program.
std::map<std::string, cinn_pod_value_t> name2argument_;
......
......@@ -13,87 +13,229 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/cinn/cinn_launch_context.h"
#include <memory>
#include <set>
#include <utility>
#include "cinn/common/target.h"
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/instruction.h"
#include "cinn/hlir/framework/scope.h"
#include "cinn/hlir/framework/tensor.h"
#include "cinn/runtime/cinn_runtime.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/cinn/cinn_op_helper.h"
#include "paddle/phi/core/ddim.h"
USE_OP(cinn_instruction_run);
namespace paddle {
namespace operators::details {
using LoDTensor = framework::LoDTensor;
using framework::OpDesc;
using framework::ProgramDesc;
using framework::LoDTensor;
using framework::ir::Graph;
using framework::ParallelExecutor;
using framework::paddle2cinn::Name2VarInfoMap;
using CinnShape = ::cinn::hlir::framework::Shape;
using CinnInstruction = ::cinn::hlir::framework::Instruction;
using CinnRuntimeProgram = ::cinn::hlir::framework::Program;
std::unique_ptr<CinnLaunchContext> CreateDefaultLaunchContext() {
const Graph& InitDefaultSubgraph() {
static std::once_flag initialized;
static std::unordered_map<std::string, std::string> paddle2cinn_varmap;
static std::shared_ptr<CinnScope> cinn_scope;
std::call_once(initialized, [&paddle2cinn_varmap, &cinn_scope]() {
auto& scope = cinn_scope;
scope = std::make_shared<CinnScope>();
static std::unique_ptr<Graph> graph;
std::call_once(initialized, [&]() {
ProgramDesc program;
auto* block = program.MutableBlock(0);
auto* var1 = block->Var("var1");
var1->SetPersistable(true);
block->Var("var2");
block->Var("var3");
block->Var("var4");
auto* var5 = block->Var("var5");
var5->SetIsParameter(true);
auto add_op = std::unique_ptr<OpDesc>(
new OpDesc("elementwise_add", {{"X", {"var1"}}, {"Y", {"var2"}}},
{{"Out", {"var3"}}}, {}));
block->AppendAllocatedOp(std::move(add_op));
auto mul_op = std::unique_ptr<OpDesc>(new OpDesc(
"mul", {{"X", {"var1"}}, {"Y", {"var2"}}}, {{"Out", {"var4"}}}, {}));
block->AppendAllocatedOp(std::move(mul_op));
auto res_op = std::unique_ptr<OpDesc>(
new OpDesc("elementwise_add", {{"X", {"var3"}}, {"Y", {"var4"}}},
{{"Out", {"var5"}}}, {}));
block->AppendAllocatedOp(std::move(res_op));
graph = std::make_unique<Graph>(program);
graph->Set<std::vector<std::string>>(
framework::paddle2cinn::kInputVars,
new std::vector<std::string>({"var1", "var2"}));
graph->Set<std::vector<std::string>>(
framework::paddle2cinn::kInternalVars,
new std::vector<std::string>({"var3", "var4"}));
graph->Set<std::vector<std::string>>(
framework::paddle2cinn::kOutputVars,
new std::vector<std::string>({"var5"}));
graph->GetOrInit<Name2VarInfoMap>(
framework::paddle2cinn::kMemOptVarInfoFromMainGraph);
});
return *graph.get();
}
CinnCompiledObject* InitDefaultCompiledObject() {
static std::once_flag initialized;
static auto compiled_obj = std::make_unique<CinnCompiledObject>();
std::call_once(initialized, [result = compiled_obj.get()]() {
auto& scope = result->scope;
scope = std::make_shared<CinnScope>();
scope->Var<CinnTensor>("cinn_var1");
scope->GetTensor("cinn_var1")->Resize(CinnShape({3, 4}));
scope->Var<CinnTensor>("cinn_var2");
scope->GetTensor("cinn_var2")->Resize(CinnShape({6, 7, 8}));
scope->Var<CinnTensor>("cinn_var3");
scope->GetTensor("cinn_var3")->Resize(CinnShape({10, 16}));
scope->Var<CinnTensor>("cinn_var4");
scope->GetTensor("cinn_var4")->Resize(CinnShape({10, 16}));
scope->Var<CinnTensor>("cinn_var5");
scope->GetTensor("cinn_var5")->Resize(CinnShape({10, 16}));
paddle2cinn_varmap = {
{"var1", "cinn_var1"}, {"var3", "cinn_var3"}, {"var4", "cinn_var4"}};
// input variables: var1, var2; output: var5
// internal variables: var3 and var4, here var3 is retained
// in result map, so the name will be used neither cinn_var3
auto& paddle2cinn_varmap = result->paddle2cinn_varmap;
paddle2cinn_varmap = {{"var1", "cinn_var1"},
{"var2", "cinn_var2"},
{"var3", "cinn_var3"},
{"var5", "cinn_var5"}};
auto& runtime_program = result->runtime_program;
std::vector<std::unique_ptr<CinnInstruction>> instructions;
instructions.emplace_back(new CinnInstruction(
cinn::common::DefaultHostTarget(), scope.get(),
{"cinn_var1", "cinn_var2"}, {"cinn_var3"}, "elementwise_add"));
instructions.emplace_back(
new CinnInstruction(cinn::common::DefaultHostTarget(), scope.get(),
{"cinn_var1", "cinn_var2"}, {"cinn_var4"}, "mul"));
instructions.emplace_back(new CinnInstruction(
cinn::common::DefaultHostTarget(), scope.get(),
{"cinn_var3", "cinn_var4"}, {"cinn_var5"}, "elementwise_add"));
runtime_program =
std::make_unique<CinnRuntimeProgram>(scope, std::move(instructions));
result->cached_index = 110;
});
return std::make_unique<CinnLaunchContext>(paddle2cinn_varmap, cinn_scope);
return compiled_obj.get();
}
TEST(CinnLaunchContextTest, TestBasic) {
auto launch_context = CreateDefaultLaunchContext();
// test IsVariableUsed
class CinnLaunchContextTest : public ::testing::Test {
public:
std::unique_ptr<CinnLaunchContext> launch_context;
CinnCompiledObject* compiled_obj;
void SetUp() override {
compiled_obj = InitDefaultCompiledObject();
launch_context = std::make_unique<CinnLaunchContext>(InitDefaultSubgraph(),
*compiled_obj);
}
};
TEST_F(CinnLaunchContextTest, TestConstructResult) {
ASSERT_EQ(launch_context->IsVariableUsed("var1"), true);
ASSERT_EQ(launch_context->IsVariableUsed("var2"), true);
ASSERT_EQ(launch_context->IsVariableUsed("var3"), true);
ASSERT_EQ(launch_context->IsVariableUsed("var4"), false);
// test UpdateCapturedEnv
platform::CPUPlace place;
framework::Scope scope;
ASSERT_NO_THROW(launch_context->UpdateCapturedEnv(scope, place));
// test IsArgumentsInitialized
ASSERT_FALSE(launch_context->IsArgumentsInitialized());
ASSERT_EQ(launch_context->IsVariableUsed("var5"), true);
// check result of ExtractInternalVarNames
ASSERT_EQ(launch_context->GetInternalVarNames(),
std::unordered_set<std::string>({"var3", "cinn_var4"}));
// check completeness of arguments list, and also check
// the two name maps of the paddle->cinn and the reverse one
// through the IsVariableUsed interface
auto&& arguments = launch_context->FinalizeArguments();
ASSERT_EQ(arguments.size(), 5);
auto check_argument_fn = [&arguments, this](const std::string& var_name,
const std::string& arg_name) {
ASSERT_EQ(launch_context->IsVariableUsed(var_name), true);
ASSERT_NO_THROW(launch_context->GetCinnBufferOfVar(var_name));
ASSERT_GT(arguments.count(arg_name), 0);
EXPECT_EQ(launch_context->GetCinnBufferOfVar(var_name),
static_cast<cinn_buffer_t*>(arguments.at(arg_name)));
auto* buffer = launch_context->GetCinnBufferOfVar(var_name);
auto&& scope = compiled_obj->scope;
ASSERT_EQ(framework::DDim(buffer->dims, buffer->dimensions),
phi::make_ddim(scope->GetTensor(arg_name)->shape().data()));
};
check_argument_fn("var1", "cinn_var1");
check_argument_fn("var2", "cinn_var2");
check_argument_fn("var3", "cinn_var3");
check_argument_fn("cinn_var4", "cinn_var4");
check_argument_fn("var5", "cinn_var5");
}
TEST(CinnLaunchContextTest, TestCheckTensorEquivalent) {
TEST_F(CinnLaunchContextTest, TestCheckTensorEquivalent) {
platform::CPUPlace place;
framework::Scope scope;
auto launch_context = CreateDefaultLaunchContext();
launch_context->UpdateCapturedEnv(scope, place);
auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
// CheckTensorEquivalent: tensor dimension not equivalent
tensor1->mutable_data<float>(phi::make_ddim({3, 5}), place);
ASSERT_THROW(launch_context->AssignExternalVariable("var1"),
ASSERT_THROW(launch_context->CheckTensorEquivalent("var1", *tensor1),
paddle::platform::EnforceNotMet);
}
TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) {
TEST_F(CinnLaunchContextTest, TestBuildCompiledProgram) {
platform::CPUPlace place;
framework::Scope scope;
auto launch_context = CreateDefaultLaunchContext();
launch_context->UpdateCapturedEnv(scope, place);
auto* tensor4 = scope.Var("var4")->GetMutable<LoDTensor>();
ParallelExecutor* pe = nullptr;
ASSERT_NO_THROW((pe = launch_context->InitializePE(place, &scope)));
// not used
ASSERT_THROW(launch_context->AssignExternalVariable("var4"),
paddle::platform::EnforceNotMet);
// not found
ASSERT_THROW(launch_context->AssignInternalVariable("cinn_var4"),
paddle::platform::EnforceNotMet);
// check details of program build by compiled instructions
const ProgramDesc& program = pe->Graph().OriginProgram();
ASSERT_EQ(program.Size(), 1);
const auto& block = program.Block(0);
// vars
std::set<std::string> var_names = block.LocalVarNames();
ASSERT_EQ(var_names.size(), 5);
for (auto&& var_name : var_names) {
auto* var = block.FindVar(var_name);
ASSERT_NE(var, nullptr);
auto* buffer = launch_context->GetCinnBufferOfVar(var_name);
ASSERT_EQ(framework::DDim(buffer->dims, buffer->dimensions),
phi::make_ddim(var->GetShape()));
}
ASSERT_TRUE(block.FindVar("var1")->Persistable());
ASSERT_FALSE(block.FindVar("var5")->Persistable());
ASSERT_TRUE(block.FindVar("var5")->IsParameter());
ASSERT_FALSE(block.FindVar("var1")->IsParameter());
// ops
ASSERT_EQ(block.OpSize(), 3);
auto* op1 = block.Op(0);
ASSERT_EQ(op1->Type(), "cinn_instruction_run");
ASSERT_EQ(op1->Input(kX), std::vector<std::string>({"var1", "var2"}));
ASSERT_EQ(op1->Output(kOutputs), std::vector<std::string>({"var3"}));
ASSERT_EQ(op1->GetAttrIfExists<int64_t>(kCachedIndex), 110);
ASSERT_EQ(op1->GetAttrIfExists<int64_t>(kInstructionIndex), 0);
auto* op3 = block.Op(2);
ASSERT_EQ(op3->Type(), "cinn_instruction_run");
ASSERT_EQ(op3->Input(kX), std::vector<std::string>({"var3", "cinn_var4"}));
ASSERT_EQ(op3->Output(kOutputs), std::vector<std::string>({"var5"}));
ASSERT_EQ(op3->GetAttrIfExists<int64_t>(kCachedIndex), 110);
ASSERT_EQ(op3->GetAttrIfExists<int64_t>(kInstructionIndex), 2);
}
TEST(CinnLaunchContextTest, TestAppendArgument) {
platform::CPUPlace cpu_place;
platform::Place place(cpu_place);
// DEPRECATED(CtfGo): following test of callback assignment
// will be deprecated after we switch to pe
TEST_F(CinnLaunchContextTest, TestCallbackAssignment) {
platform::CPUPlace place;
framework::Scope scope;
auto launch_context = CreateDefaultLaunchContext();
launch_context->UpdateCapturedEnv(scope, place);
// assign external variables
......@@ -101,33 +243,8 @@ TEST(CinnLaunchContextTest, TestAppendArgument) {
float* data1 = tensor1->mutable_data<float>(phi::make_ddim({3, 4}), place);
data1[0] = 9.99f;
data1[10] = 19.99f;
ASSERT_NO_THROW(launch_context->AssignExternalVariable("var1"));
auto* tensor3 = scope.Var("var3")->GetMutable<LoDTensor>();
tensor3->mutable_data<float>(phi::make_ddim({10, 16}), place);
ASSERT_NO_THROW(launch_context->AssignExternalVariable("var3"));
// FinalizeArguments missed check
ASSERT_THROW(launch_context->FinalizeArguments(),
paddle::platform::EnforceNotMet);
// test get internal variables
auto internal_variable_names =
launch_context->ExtractInternalVarNames({"var1"}, {"var3"});
ASSERT_EQ(internal_variable_names.size(), 1);
EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2");
auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
tensor2->mutable_data<float>(phi::make_ddim({6, 7, 8}), place);
ASSERT_NO_THROW(launch_context->AssignInternalVariable("cinn_var2"));
// check argument is set correctly and alloc/free callbacks work well
auto name2argument = launch_context->FinalizeArguments();
ASSERT_EQ(name2argument.size(), 3);
ASSERT_EQ(name2argument.count("cinn_var1"), 1);
ASSERT_TRUE(launch_context->IsArgumentsInitialized());
auto* cinn_buffer =
static_cast<cinn_buffer_t*>(name2argument.at("cinn_var1"));
auto* cinn_buffer = launch_context->GetCinnBufferOfVar("var1");
ASSERT_EQ(cinn_buffer->memory, nullptr);
cinn_buffer->external_malloc->operator()(nullptr, cinn_buffer);
ASSERT_NE(cinn_buffer->memory, nullptr);
......
......@@ -105,63 +105,29 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
auto* launch_context = cinn_compiled_object.launch_context.get();
// Step 3. Prepare arguments needed for the compiled executable program.
launch_context->UpdateCapturedEnv(scope, place);
if (!launch_context->IsArgumentsInitialized()) {
VLOG(4) << "CinnLaunchOp prepare arguments";
// 3.1 Prepare input variables: tensors of input variables have
// been initialized before graph compiled, just check the
// equiality between tensors of paddle and cinn.
for (const auto& var_name : input_no_need_buffer_variable_names) {
// the input variable declared as 'no need buffer' can not be used
PADDLE_ENFORCE_EQ(
launch_context->IsVariableUsed(var_name), false,
platform::errors::InvalidArgument(
"Input variable(%s) should not be used by cinn in execution",
var_name));
}
for (const auto& var_name : input_x_variable_names) {
// some input variables don't need for cinn because they are
// eliminated by optimized passes or some cinn operators use
// less variables
if (!launch_context->IsVariableUsed(var_name)) {
VLOG(4) << "Input variable" << var_name << " not used by cinn";
continue;
}
launch_context->AssignExternalVariable(var_name);
}
// 3.2 Prepare output variables: all output variables should
// be initialized and allocated buffer before
// the runtime program start execution, the compilation result
// includes details of their buffer assginment and we use that to
// allocate space in Paddle. For those variables allocated yet,
// like persistable parameters, just check the equiality between
// Paddle allocation and CINN buffer assginment.
auto output_variable_names = ctx.OutputNames(kOutputs);
for (const auto var_name : output_variable_names) {
PADDLE_ENFORCE_EQ(
launch_context->IsVariableUsed(var_name), true,
platform::errors::InvalidArgument(
"Output variable(%s) not used by cinn", var_name));
launch_context->AssignExternalVariable(var_name);
}
// 3.3 Prepare internal or temporary variables: Create a temporary
// scope to keep internal variables within graph or temporary
// variables needed by the compiled runtime program in addition.
// Here we directly use the names from CinnScope as Paddle variable
// names, because they will not be used outside the graph
// and should be destructed after computation finished.
auto internal_variable_names = launch_context->ExtractInternalVarNames(
input_x_variable_names, output_variable_names);
for (const auto& var_name : internal_variable_names) {
launch_context->AssignInternalVariable(var_name);
// 3.1 Input variables: tensors of input variables have
// been initialized before graph compiled, just check the
// equiality between tensors of paddle and cinn.
for (const auto& var_name : input_x_variable_names) {
// some input variables don't need for cinn because they are
// eliminated by optimized passes or some cinn operators use
// less variables
if (!launch_context->IsVariableUsed(var_name)) {
VLOG(4) << "Input variable" << var_name << " not used by cinn";
continue;
}
launch_context->CheckTensorEquivalent(var_name,
*inputs_name2tensor.at(var_name));
}
// 3.2 Output variables: the output variables will be initialized
// and allocated buffer in callbacks which are defined in the
// external_malloc/free interface of cinn_buffer_t
// in their corresponding arguments.
// 3.3 Internal variables: A temporary scope is created in
// UpdateCapturedEnv to keep the internal variables and
// they are also initialized through callbacks
// Step 4. Set CINN runtime FLAGS, such as FLAGS_cinn_cudnn_deterministic.
details::SetCinnRuntimeFlags();
......
......@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/phi/core/ddim.h"
......@@ -31,6 +32,7 @@ using LoDTensor = framework::LoDTensor;
using Variable = framework::Variable;
using Graph = framework::ir::Graph;
using Node = framework::ir::Node;
using framework::paddle2cinn::Name2VarInfoMap;
std::unique_ptr<Graph> CreateOnlyElementwiseAddGraph(
const std::string& x_name, const std::string& y_name,
......@@ -71,6 +73,16 @@ std::unique_ptr<Graph> CreateOnlyElementwiseAddGraph(
y_node->inputs = {feed_op_node_y};
y_node->outputs = {elementwise_add_node};
out_node->inputs = {elementwise_add_node};
// set necessary attributes
g->Set<std::vector<std::string>>(
framework::paddle2cinn::kInputVars,
new std::vector<std::string>({x_name, y_name}));
g->Set<std::vector<std::string>>(framework::paddle2cinn::kInternalVars,
new std::vector<std::string>({}));
g->Set<std::vector<std::string>>(framework::paddle2cinn::kOutputVars,
new std::vector<std::string>({out_name}));
g->GetOrInit<Name2VarInfoMap>(
framework::paddle2cinn::kMemOptVarInfoFromMainGraph);
return g;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册