diff --git a/paddle/fluid/operators/cinn/CMakeLists.txt b/paddle/fluid/operators/cinn/CMakeLists.txt index b80916616a18b7521d6ae32711ca247fdfd3e403..a2fc080faadcf9c24ccc703524cd71da92ce7cdb 100644 --- a/paddle/fluid/operators/cinn/CMakeLists.txt +++ b/paddle/fluid/operators/cinn/CMakeLists.txt @@ -10,6 +10,10 @@ if (WITH_TESTING) cc_test(cinn_launch_context_test SRCS cinn_launch_context_test.cc DEPS ddim lod_tensor scope cinn_launch_context) set_tests_properties(cinn_launch_context_test PROPERTIES LABELS "RUN_TYPE=CINN") + SET(CINN_RUN_ENVIRONMENT "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda") cc_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op elementwise_add_op) - set_tests_properties(cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda") + set_tests_properties(cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "${CINN_RUN_ENVIRONMENT}") + + cc_test(cinn_instruction_run_op_test SRCS cinn_instruction_run_op_test.cc DEPS cinn_compiler cinn_launch_op cinn_instruction_run_op elementwise_add_op) + set_tests_properties(cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "${CINN_RUN_ENVIRONMENT}") endif() diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..7c4bdc09a569e455b20febef278003ada923dd79 --- /dev/null +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc @@ -0,0 +1,90 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "gtest/gtest.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/operators/cinn/test_helper.h" +#include "paddle/fluid/platform/cpu_helper.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/init.h" + +USE_OP(cinn_launch); +USE_OP(cinn_instruction_run); +USE_OP_ITSELF(elementwise_add); + +namespace paddle::operators { + +using framework::paddle2cinn::CinnCompiler; + +TEST(CinnInstructionOpTest, TestWithElementwiseAdd) { + paddle::framework::InitDevices(); + platform::SetNumThreads(1); + // cache test graph into CinnCompiler + const std::string& test_op_out_name = "cinn_instruction_run_op_out"; + const std::string& add_op_out_name = "add_op_out"; + auto compilation_key = CinnCompiler::GetInstance()->AddGraph( + CreateOnlyElementwiseAddGraph("x", "y", test_op_out_name)); + + // create a cinn_launch_op and run firstly to launch the compilation + // of the above graph and cache the compiled object in CinnCompiler + auto cinn_launch_op = paddle::framework::OpRegistry::CreateOp( + "cinn_launch", {{"X", {"x", "y"}}}, {{"Out", {test_op_out_name}}}, + {{"compilation_key", compilation_key}}); + + // create cinn_instruction_run_op and elementwise_add op + auto cinn_instruction_run_op = paddle::framework::OpRegistry::CreateOp( + "cinn_instruction_run", {{"X", {"x", "y"}}}, + {{"Out", {test_op_out_name}}}, + {{"cached_index", 0}, {"instruction_index", 1}}); + auto elementwise_add_op = paddle::framework::OpRegistry::CreateOp( + "elementwise_add", {{"X", {"x"}}, {"Y", {"y"}}}, + {{"Out", {add_op_out_name}}}, {{}}); + + // check case: a compiled object not cached before cinn_launch_op run, + // so a cinn_instruction_run_op will throw an error + framework::Scope scope; + platform::CPUPlace place; + InitVariablesWithRandomValue({"x", "y"}, {10, 20}, place, &scope); + scope.Var(test_op_out_name)->GetMutable(); + ASSERT_THROW(cinn_instruction_run_op->Run(scope, place), + paddle::platform::EnforceNotMet); + cinn_launch_op->Run(scope, place); + + // Run ops and check the computation results + auto run_and_check_fn = [&](const platform::Place& place) { + framework::Scope scope; + InitVariablesWithRandomValue({"x", "y"}, {10, 20}, place, &scope); + scope.Var(test_op_out_name)->GetMutable(); + scope.Var(add_op_out_name)->GetMutable(); + cinn_instruction_run_op->Run(scope, place); + elementwise_add_op->Run(scope, place); + CompareOpResult(scope.GetVar(test_op_out_name), + scope.GetVar(add_op_out_name)); + }; + + // CPU + run_and_check_fn(platform::CPUPlace()); + run_and_check_fn(platform::CPUPlace()); +#ifdef PADDLE_WITH_CUDA + // GPU + run_and_check_fn(platform::CUDAPlace()); + run_and_check_fn(platform::CUDAPlace()); +#endif +} + +} // namespace paddle::operators diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index 282a8f69e4ec5c194bf5226132ced33ad02ac676..2ad958328ce465ca66311478e568db91395405bc 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -13,26 +13,47 @@ // limitations under the License. #include "paddle/fluid/operators/cinn/cinn_launch_context.h" +#include #include +#include #include +#include "cinn/hlir/framework/scope.h" +#include "cinn/hlir/framework/tensor.h" +#include "cinn/runtime/cinn_runtime.h" +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/string/printf.h" namespace paddle { -namespace operators { -namespace details { +namespace operators::details { + +using LoDTensor = framework::LoDTensor; CinnLaunchContext::CinnLaunchContext( const std::unordered_map& paddle2cinn_varmap, const std::shared_ptr& cinn_scope) - : paddle2cinn_varmap_(paddle2cinn_varmap), cinn_scope_(cinn_scope) { - // generate all names of cinn used variables + : cinn_scope_(cinn_scope) { + // generate all names of the cinn execution arguments auto var_names = cinn_scope_->var_names(); - cinn_variable_names_.reserve(var_names.size()); + cinn_argument_names_.reserve(var_names.size()); std::transform( var_names.begin(), var_names.end(), - std::inserter(cinn_variable_names_, cinn_variable_names_.end()), + std::inserter(cinn_argument_names_, cinn_argument_names_.end()), [](const auto& name_view) { return std::string(name_view.data()); }); - // build the variable name map of cinn2paddle - for (const auto& x : paddle2cinn_varmap_) { + // build name map between the original variables and compiled ones + BuildVarNameMap(paddle2cinn_varmap, cinn_argument_names_); +} + +void CinnLaunchContext::BuildVarNameMap( + const std::unordered_map& compiled_varmap, + const std::unordered_set& argument_names) { + for (const auto& x : compiled_varmap) { + if (!argument_names.count(x.second)) { + // exclude variables not used + continue; + } + // copy to local paddle2cinn map + paddle2cinn_varmap_.emplace(x.first, x.second); + // add an entry to local cinn2paddle map reversely auto res = cinn2paddle_varmap_.emplace(x.second, x.first); PADDLE_ENFORCE_EQ( res.second, true, @@ -40,15 +61,21 @@ CinnLaunchContext::CinnLaunchContext( "Cinn variable(%s) maps to more than one paddle variable(%s,%s)", x.second, res.first->second, x.first)); } - // supplement the relations of the remain variables not appearing in above - // map, - // they are internal variables and here we use the name from cinn compiled. - for (const auto& var_name : cinn_variable_names_) { + // supplement the relations of the remain variables + // not appearing in above map, which are internal variables + // and here we use the names from cinn compiled. + for (const auto& var_name : argument_names) { if (!cinn2paddle_varmap_.count(var_name)) { cinn2paddle_varmap_.emplace(var_name, var_name); paddle2cinn_varmap_.emplace(var_name, var_name); } } + + PADDLE_ENFORCE_EQ( + paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size(), + platform::errors::PreconditionNotMet( + "Size of variables is not euqal, paddle[%ld] vs cinn[%ld]", + paddle2cinn_varmap_.size(), cinn2paddle_varmap_.size())); } void CinnLaunchContext::UpdateCapturedEnv(const framework::Scope& scope, @@ -74,56 +101,61 @@ bool CinnLaunchContext::IsArgumentsInitialized() const { return true; } -bool CinnLaunchContext::IsVariableUsed( - const std::string& paddle_var_name) const { - return paddle2cinn_varmap_.count(paddle_var_name) > 0 && - cinn_variable_names_.count(paddle2cinn_varmap_.at(paddle_var_name)) > - 0; +bool CinnLaunchContext::IsVariableUsed(const std::string& var_name) const { + return paddle2cinn_varmap_.count(var_name) > 0; } -CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& var_name) { - PADDLE_ENFORCE_GT(cinn_variable_names_.count(var_name), 0, - platform::errors::NotFound( - "Variable(%s) not found in cinn scope.", var_name)); - return cinn_scope_->GetTensor(var_name); +CinnTensor CinnLaunchContext::GetCinnTensor(const std::string& arg_name) { + PADDLE_ENFORCE_GT(cinn_argument_names_.count(arg_name), 0, + platform::errors::InvalidArgument( + "Variable(%s) not found in cinn scope.", arg_name)); + return cinn_scope_->GetTensor(arg_name); } -std::unordered_set CinnLaunchContext::GetInternalVariableNames() { - std::unordered_set all_parameters(cinn_variable_names_); - std::for_each(name2argument_.begin(), name2argument_.end(), - [&all_parameters](const auto& name2arg) { - all_parameters.erase(name2arg.first); - }); - return all_parameters; +std::unordered_set CinnLaunchContext::ExtractInternalVarNames( + const std::vector& input_var_names, + const std::vector& output_var_names) { + std::unordered_set remain_var_names; + remain_var_names.reserve(paddle2cinn_varmap_.size()); + std::transform(paddle2cinn_varmap_.begin(), paddle2cinn_varmap_.end(), + std::inserter(remain_var_names, remain_var_names.end()), + [](const auto& name_pair) { return name_pair.first; }); + + // exclude the input variables and output variables + auto exclude_names_fn = [&remain_var_names](const std::string& var_name) { + remain_var_names.erase(var_name); + }; + std::for_each(input_var_names.begin(), input_var_names.end(), + exclude_names_fn); + std::for_each(output_var_names.begin(), output_var_names.end(), + exclude_names_fn); + return remain_var_names; } -void CinnLaunchContext::CheckTensorEquivalent( - const std::string& paddle_var_name, const LoDTensor& paddle_tensor, - const CinnTensor& cinn_tensor) { +void CinnLaunchContext::CheckTensorEquivalent(const std::string& var_name, + const LoDTensor& paddle_tensor, + const CinnTensor& cinn_tensor) { // check dimension auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data()); PADDLE_ENFORCE_EQ(paddle_tensor.dims(), cinn_dims, platform::errors::PreconditionNotMet( "Tensors' shape in variable(%s) are not equivalent, " "paddle's shape = [%s], but cinn's shape = [%s].", - paddle_var_name, paddle_tensor.dims(), cinn_dims)); + var_name, paddle_tensor.dims(), cinn_dims)); // TODO(CtfGo): check the underlying data type after CINN ready } -void CinnLaunchContext::AssignExternalVariable( - const std::string& paddle_var_name) { - PADDLE_ENFORCE_EQ( - IsVariableUsed(paddle_var_name), true, - platform::errors::InvalidArgument("Paddle variable(%s) not used by cinn", - paddle_var_name)); - - const auto& cinn_var_name = paddle2cinn_varmap_.at(paddle_var_name); - const auto& paddle_tensor = - cached_scope_->GetVar(paddle_var_name)->Get(); - CinnTensor cinn_tensor = GetCinnTensor(cinn_var_name); +void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) { + PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, + platform::errors::InvalidArgument( + "Variable(%s) not applied in cinn", var_name)); + const auto& cinn_arg_name = paddle2cinn_varmap_.at(var_name); + + const auto& paddle_tensor = cached_scope_->GetVar(var_name)->Get(); + CinnTensor cinn_tensor = GetCinnTensor(cinn_arg_name); if (paddle_tensor.IsInitialized()) { - CheckTensorEquivalent(paddle_var_name, paddle_tensor, cinn_tensor); + CheckTensorEquivalent(var_name, paddle_tensor, cinn_tensor); } auto cinn_buffer = std::make_unique(); @@ -131,9 +163,8 @@ void CinnLaunchContext::AssignExternalVariable( cinn_buffer->resize(cinn_tensor->shape().data().data(), cinn_tensor->shape().data().size()); cinn_buffer->external_malloc = new std::function( - [this, paddle_var_name](void* ctx, cinn_buffer_t* buffer) { - auto* tensor = - cached_scope_->GetVar(paddle_var_name)->GetMutable(); + [this, var_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = cached_scope_->GetVar(var_name)->GetMutable(); tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions)); buffer->memory = reinterpret_cast( tensor->mutable_data(*cached_place_)); @@ -147,25 +178,25 @@ void CinnLaunchContext::AssignExternalVariable( return 0; }); - return SetArgument(cinn_var_name, std::move(cinn_buffer)); + return AppendArgument(cinn_arg_name, std::move(cinn_buffer)); } -void CinnLaunchContext::AssignInternalVariable( - const std::string& cinn_var_name) { - PADDLE_ENFORCE_GT( - cinn_variable_names_.count(cinn_var_name), 0, - platform::errors::InvalidArgument("Variable(%s) not found in cinn socpe.", - cinn_var_name)); - CinnTensor cinn_tensor = GetCinnTensor(cinn_var_name); +void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) { + PADDLE_ENFORCE_EQ(IsVariableUsed(var_name), true, + platform::errors::InvalidArgument( + "Variable(%s) not applied in cinn", var_name)); + const auto& cinn_arg_name = paddle2cinn_varmap_.at(var_name); + + CinnTensor cinn_tensor = GetCinnTensor(cinn_arg_name); auto cinn_buffer = std::make_unique(); // assign dimensions and alloc/free callback of cinn_buffer_t cinn_buffer->resize(cinn_tensor->shape().data().data(), cinn_tensor->shape().data().size()); cinn_buffer->external_malloc = new std::function( - [this, cinn_var_name](void* ctx, cinn_buffer_t* buffer) { + [this, var_name](void* ctx, cinn_buffer_t* buffer) { auto* tensor = - cached_temp_scope_->Var(cinn_var_name)->GetMutable(); + cached_temp_scope_->Var(var_name)->GetMutable(); tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions)); buffer->memory = reinterpret_cast( tensor->mutable_data(*cached_place_)); @@ -175,53 +206,51 @@ void CinnLaunchContext::AssignInternalVariable( // internal variables should release its buffer immediately // if no instruction use it cinn_buffer->external_free = new std::function( - [this, cinn_var_name](void* ctx, cinn_buffer_t* buffer) { + [this, var_name](void* ctx, cinn_buffer_t* buffer) { auto* tensor = - cached_temp_scope_->GetVar(cinn_var_name)->GetMutable(); + cached_temp_scope_->GetVar(var_name)->GetMutable(); tensor->clear(); return 0; }); - return SetArgument(cinn_var_name, std::move(cinn_buffer)); + return AppendArgument(cinn_arg_name, std::move(cinn_buffer)); } -void CinnLaunchContext::SetArgument(const std::string& cinn_var_name, - std::unique_ptr&& buffer) { - VLOG(4) << "SetArgument-" << name2argument_.size() << ": name(" - << cinn_var_name << "), dims(" - << framework::DDim(buffer->dims, buffer->dimensions) << ")."; - - name2argument_.emplace(cinn_var_name, buffer.get()); +void CinnLaunchContext::AppendArgument( + const std::string& arg_name, std::unique_ptr&& buffer) { + name2argument_.emplace(arg_name, buffer.get()); hold_buffers_.emplace_back(std::move(buffer)); + VLOG(4) << string::Sprintf( + "Append an argument:name(%s),dims(%s),argument size:(%lu)", arg_name, + framework::DDim(buffer->dims, buffer->dimensions).to_str(), + name2argument_.size()); } const std::map& CinnLaunchContext::FinalizeArguments() const { // Check all execution parameters are assigned valued. - std::for_each(cinn_variable_names_.begin(), cinn_variable_names_.end(), - [this](const auto& var_name) { - PADDLE_ENFORCE_GT(name2argument_.count(var_name), 0, - platform::errors::InvalidArgument( - "Variable(%s) is missed for launching " - "compiled program execution", - var_name)); + std::for_each(cinn_argument_names_.begin(), cinn_argument_names_.end(), + [this](const auto& arg_name) { + PADDLE_ENFORCE_GT( + name2argument_.count(arg_name), 0, + platform::errors::NotFound( + "Argument(%s) is missed for execution", arg_name)); }); return name2argument_; } cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar( - const std::string& paddle_var_name) { - auto res = paddle2cinn_varmap_.find(paddle_var_name); + const std::string& var_name) { + auto it = paddle2cinn_varmap_.find(var_name); PADDLE_ENFORCE_NE( - res, paddle2cinn_varmap_.end(), + it, paddle2cinn_varmap_.end(), platform::errors::InvalidArgument( - "Variable(%s) not found in compilation result", paddle_var_name)); - auto it = name2argument_.find(res->second); - PADDLE_ENFORCE_NE(it, name2argument_.end(), - platform::errors::InvalidArgument( - "Argument(%s) not be initialized", res->second)); - return static_cast(it->second); + "Variable(%s) not found in compilation result", var_name)); + auto res = name2argument_.find(it->second); + PADDLE_ENFORCE_NE(res, name2argument_.end(), + platform::errors::NotFound( + "Argument(%s) not be initialized", it->second)); + return static_cast(res->second); } -} // namespace details -} // namespace operators +} // namespace operators::details } // namespace paddle diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index 71ddeb35420b52c12787cb3873fbe5b7d4f7b8c1..52c90175a7b0d045c2077bbc0e4fae12c575fdd4 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -19,21 +19,33 @@ #include #include #include -#include "cinn/hlir/framework/scope.h" -#include "cinn/hlir/framework/tensor.h" -#include "cinn/runtime/cinn_runtime.h" -#include "paddle/fluid/framework/ddim.h" +#include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/place.h" + +// type declaration forward +struct cinn_buffer_t; +struct cinn_pod_value_t; +namespace cinn::hlir::framework { +class Tensor; +class Scope; +class Program; +} // namespace cinn::hlir::framework namespace paddle { -namespace operators { -namespace details { +namespace operators::details { -using LoDTensor = framework::LoDTensor; using CinnTensor = ::cinn::hlir::framework::Tensor; using CinnScope = ::cinn::hlir::framework::Scope; +// This class is used to cache some reusable data among repeated +// executions for efficiency and it also provides easy interfaces +// to get details of the compilation result. +// A object of this class is constructed and saved in the +// compilation cache once a graph compiled by CINN. +// Generally speaking, here, a variable is refer to a Paddle +// Variable while a CINN variable is called an Argument. class CinnLaunchContext { public: explicit CinnLaunchContext( @@ -48,55 +60,63 @@ class CinnLaunchContext { // Return whether execution arguments has been initialized bool IsArgumentsInitialized() const; - // Return whether a Paddle variable used on compiled kernels - bool IsVariableUsed(const std::string& paddle_var_name) const; + // Return whether a Paddle variable used in cinn execution + bool IsVariableUsed(const std::string& var_name) const; // Assign tensor buffer to input or output variables - void AssignExternalVariable(const std::string& paddle_var_name); + void AssignExternalVariable(const std::string& var_name); // Assign tensor buffer to internal variables - void AssignInternalVariable(const std::string& cinn_var_name); + void AssignInternalVariable(const std::string& var_name); - // Extract internal variable names from CinnScope - // by excluding used input and output variables - std::unordered_set GetInternalVariableNames(); + // Extract internal variable names from all applied variables + // in execution by excluding the input and output variables + std::unordered_set ExtractInternalVarNames( + const std::vector& input_var_names, + const std::vector& output_var_names); - // Finalize all execution arguments and return them + // Finalize all execution arguments and return the name->argument map const std::map& FinalizeArguments() const; - cinn_buffer_t* GetCinnBufferOfVar(const std::string& paddle_var_name); + // Return the cinn_buffer_t* of a specific variable + cinn_buffer_t* GetCinnBufferOfVar(const std::string& var_name); private: - // Get CinnTensor with CINN variable name - CinnTensor GetCinnTensor(const std::string& var_name); - - // Check whether tensors from Paddle and CINN of the same variable + // Get CinnTensor with CINN argument name + CinnTensor GetCinnTensor(const std::string& arg_name); + // Build the name maps of paddle->cinn and cinn->paddle + // in reverse for all variables used in cinn execution + void BuildVarNameMap( + const std::unordered_map& compiled_varmap, + const std::unordered_set& argument_names); + + // Check whether the tensor in Paddle and the compiled + // tensor returned by CINN of a same variable // are equivalent in type and dimension void CheckTensorEquivalent(const std::string& var_name, - const LoDTensor& paddle_tensor, + const framework::LoDTensor& paddle_tensor, const CinnTensor& cinn_tensor); - // Set an argument with (cinn name)->(cinn_buffer_t) pair - void SetArgument(const std::string& cinn_var_name, - std::unique_ptr&& buffer); + // Append an argument with (cinn name)->(cinn_buffer_t) pair + void AppendArgument(const std::string& arg_name, + std::unique_ptr&& buffer); private: const framework::Scope* cached_scope_ = nullptr; const platform::Place* cached_place_ = nullptr; std::unique_ptr cached_temp_scope_ = nullptr; - // a variable name map from paddle to cinn + // a name map from paddle variables to cinn execution arguments std::unordered_map paddle2cinn_varmap_; - // a variable name map from cinn to paddle + // a name map from cinn execution arguments to paddle variables std::unordered_map cinn2paddle_varmap_; - // the variable scope of cinn + // the names of the cinn arguments used in compiled executable program + std::unordered_set cinn_argument_names_; + // the variable scope compiled from cinn const std::shared_ptr cinn_scope_; - // all names of cinn variables used by compiled executable program - std::unordered_set cinn_variable_names_; - - // because a cinn_pod_value_t does not own the cinn_buffer_t object, - // an extra stroage is necessary to keep the object and it can + // because a cinn_pod_value_t does not own a cinn_buffer_t object, + // an extra stroage is necessary to keep those objects and they can // not be released until the runtime program finish execution. std::vector> hold_buffers_; @@ -105,6 +125,5 @@ class CinnLaunchContext { std::map name2argument_; }; -} // namespace details -} // namespace operators +} // namespace operators::details } // namespace paddle diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc index da7640c3c0f682e7b5364543f7c06d1c6e463e94..a97636a4e9f98e66b5fe6c363573af6e41302090 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc @@ -13,14 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cinn/cinn_launch_context.h" +#include "cinn/hlir/framework/scope.h" +#include "cinn/hlir/framework/tensor.h" +#include "cinn/runtime/cinn_runtime.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/scope.h" namespace paddle { -namespace operators { -namespace details { +namespace operators::details { +using LoDTensor = framework::LoDTensor; using CinnShape = ::cinn::hlir::framework::Shape; std::unique_ptr CreateDefaultLaunchContext() { @@ -86,7 +89,7 @@ TEST(CinnLaunchContextTest, TestAssignVariablePreCondition) { paddle::platform::EnforceNotMet); } -TEST(CinnLaunchContextTest, TestSetArgument) { +TEST(CinnLaunchContextTest, TestAppendArgument) { platform::CPUPlace cpu_place; platform::Place place(cpu_place); framework::Scope scope; @@ -109,7 +112,8 @@ TEST(CinnLaunchContextTest, TestSetArgument) { ASSERT_THROW(launch_context->FinalizeArguments(), paddle::platform::EnforceNotMet); // test get internal variables - auto internal_variable_names = launch_context->GetInternalVariableNames(); + auto internal_variable_names = + launch_context->ExtractInternalVarNames({"var1"}, {"var3"}); ASSERT_EQ(internal_variable_names.size(), 1); EXPECT_EQ(*internal_variable_names.begin(), "cinn_var2"); @@ -134,6 +138,5 @@ TEST(CinnLaunchContextTest, TestSetArgument) { EXPECT_FLOAT_EQ(shadow_data[10], 19.99f); } -} // namespace details -} // namespace operators +} // namespace operators::details } // namespace paddle diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h index bd9b30f559bdb5e6af3081125d9278ad21046cd7..1db9f2f25e270fa61309f3d2e2522b37c73992f4 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn/cinn_launch_op.h @@ -155,7 +155,8 @@ class CinnLaunchOpKernel : public framework::OpKernel { // Here we directly use the names from CinnScope as Paddle variable // names, because they will not be used outside the graph // and should be destructed after computation finished. - auto internal_variable_names = launch_context->GetInternalVariableNames(); + auto internal_variable_names = launch_context->ExtractInternalVarNames( + input_x_variable_names, output_variable_names); for (const auto& var_name : internal_variable_names) { launch_context->AssignInternalVariable(var_name); } diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc index b4cd91ea8a4bce6f8a2bbeb01d15f03cb5053de7..fb3b4d99a19ded872cec342d7dd3bf2f345c330b 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc @@ -18,154 +18,54 @@ limitations under the License. */ #include #include #include "gtest/gtest.h" -#include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/cinn/test_helper.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/init.h" USE_OP(cinn_launch); USE_OP_ITSELF(elementwise_add); -namespace paddle { -namespace operators { +namespace paddle::operators { -using framework::ir::Graph; -using framework::ir::Node; +using framework::paddle2cinn::CinnCompiler; -std::unique_ptr CreateOnlyElementwiseAddGraph( - const std::string& x_name, const std::string& y_name, - const std::string& out_name) { - auto g = std::make_unique(framework::ProgramDesc()); - framework::OpDesc feed_op_x, feed_op_y; - feed_op_x.SetType("feed"); - feed_op_x.SetOutput("Out", {x_name}); - feed_op_y.SetType("feed"); - feed_op_y.SetOutput("Out", {y_name}); - - framework::VarDesc x_var(x_name); - framework::VarDesc y_var(y_name); - framework::VarDesc out_var(out_name); - - framework::OpDesc elementwise_add_op; - elementwise_add_op.SetType("add"); - elementwise_add_op.SetInput("X", {x_name}); - elementwise_add_op.SetInput("Y", {y_name}); - elementwise_add_op.SetOutput("Out", {out_name}); - - auto* feed_op_node_x = g->CreateOpNode(&feed_op_x); - auto* feed_op_node_y = g->CreateOpNode(&feed_op_y); - auto* elementwise_add_node = g->CreateOpNode(&elementwise_add_op); - auto* x_node = g->CreateVarNode(&x_var); - auto* y_node = g->CreateVarNode(&y_var); - auto* out_node = g->CreateVarNode(&out_var); - - // fill op node - feed_op_node_x->outputs = {x_node}; - feed_op_node_y->outputs = {y_node}; - elementwise_add_node->inputs = {x_node, y_node}; - elementwise_add_node->outputs = {out_node}; - - // fill variable node - x_node->inputs = {feed_op_node_x}; - x_node->outputs = {elementwise_add_node}; - y_node->inputs = {feed_op_node_y}; - y_node->outputs = {elementwise_add_node}; - out_node->inputs = {elementwise_add_node}; - return g; -} - -void CreateInputVariablesWithRandomData( - const std::vector& variable_names, - const framework::DDim& common_ddim, framework::Scope* scope) { - std::random_device seed; - std::default_random_engine engine(seed()); - std::uniform_real_distribution dist(0.f, 2.f); - - for (const auto& var_name : variable_names) { - auto* tensor = scope->Var(var_name)->GetMutable(); - auto* data = tensor->mutable_data(common_ddim, platform::CPUPlace()); - for (auto i = 0; i < tensor->numel(); ++i) { - data[i] = dist(engine); - } - } -} - -void CopyInputDataToPlace(const framework::Scope& scope, - const platform::Place& dst_place, - framework::Scope* dst_scope) { - for (const auto& var_name : scope.LocalVarNames()) { - const auto& src_tensor = scope.GetVar(var_name)->Get(); - auto* dst_tensor = dst_scope->Var(var_name)->GetMutable(); - paddle::framework::TensorCopySync(src_tensor, dst_place, dst_tensor); - } -} - -TEST(CinnLaunchOpTest, TestElementwiseAddPass) { +TEST(CinnLaunchOpTest, TestWithElementwiseAdd) { paddle::framework::InitDevices(); platform::SetNumThreads(1); // cache test graph into CinnCompiler - const auto& test_out_name = "test_out"; - const auto& expected_out_name = "expected_out"; + const std::string& test_op_out_name = "cinn_launch_op_out"; + const std::string& add_op_out_name = "add_op_out"; auto compilation_key = CinnCompiler::GetInstance()->AddGraph( - CreateOnlyElementwiseAddGraph("test_x", "test_y", test_out_name)); + CreateOnlyElementwiseAddGraph("x", "y", test_op_out_name)); + // create cinn_launch_op and elementwise_add op auto cinn_launch_op = paddle::framework::OpRegistry::CreateOp( - "cinn_launch", {{"X", {"test_x", "test_y"}}}, {{"Out", {test_out_name}}}, + "cinn_launch", {{"X", {"x", "y"}}}, {{"Out", {test_op_out_name}}}, {{"compilation_key", compilation_key}}); auto elementwise_add_op = paddle::framework::OpRegistry::CreateOp( - "elementwise_add", {{"X", {"test_x"}}, {"Y", {"test_y"}}}, - {{"Out", {expected_out_name}}}, {{}}); - // prepare input data - framework::Scope init_scope; - CreateInputVariablesWithRandomData({"test_x", "test_y"}, {10, 20}, - &init_scope); + "elementwise_add", {{"X", {"x"}}, {"Y", {"y"}}}, + {{"Out", {add_op_out_name}}}, {{}}); + // Run ops and check the computation results auto run_and_check_fn = [&](const platform::Place& place) { framework::Scope scope; - CopyInputDataToPlace(init_scope, place, &scope); - scope.Var(test_out_name)->GetMutable(); - scope.Var(expected_out_name)->GetMutable(); - - platform::Place run_place(place); - cinn_launch_op->Run(scope, run_place); - elementwise_add_op->Run(scope, run_place); - - LoDTensor test_out, expected_out; - paddle::framework::TensorCopySync( - scope.Var(test_out_name)->Get(), platform::CPUPlace(), - &test_out); - paddle::framework::TensorCopySync( - scope.Var(expected_out_name)->Get(), platform::CPUPlace(), - &expected_out); - - ASSERT_TRUE(test_out.IsInitialized()); - ASSERT_TRUE(expected_out.IsInitialized()); - ASSERT_EQ(test_out.dims(), expected_out.dims()); - const auto* test_data = test_out.data(); - const auto* excepted_data = expected_out.data(); - for (auto i = 0; i < expected_out.numel(); ++i) { - EXPECT_FLOAT_EQ(test_data[i], excepted_data[i]); - } + InitVariablesWithRandomValue({"x", "y"}, {10, 20}, place, &scope); + scope.Var(test_op_out_name)->GetMutable(); + scope.Var(add_op_out_name)->GetMutable(); + cinn_launch_op->Run(scope, place); + elementwise_add_op->Run(scope, place); + CompareOpResult(scope.GetVar(test_op_out_name), + scope.GetVar(add_op_out_name)); }; - LOG(INFO) << "Check compute result on cpu"; + // CPU run_and_check_fn(platform::CPUPlace()); run_and_check_fn(platform::CPUPlace()); - #ifdef PADDLE_WITH_CUDA - // create an new elementwise_add op - // because the above one cached the cpu kernel - LOG(INFO) << "Check compute result on gpu"; - cinn_launch_op = paddle::framework::OpRegistry::CreateOp( - "cinn_launch", {{"X", {"test_x", "test_y"}}}, {{"Out", {test_out_name}}}, - {{"compilation_key", compilation_key}}); - elementwise_add_op = paddle::framework::OpRegistry::CreateOp( - "elementwise_add", {{"X", {"test_x"}}, {"Y", {"test_y"}}}, - {{"Out", {expected_out_name}}}, {{}}); + // GPU run_and_check_fn(platform::CUDAPlace()); run_and_check_fn(platform::CUDAPlace()); #endif @@ -175,8 +75,6 @@ namespace details { // Testing helper function used on CinnLaunchOpKernel in the following: // firstly build test data, then check both expected and illegal situations -using CinnShape = ::cinn::hlir::framework::Shape; - TEST(CinnLaunchOpHelperTest, TestPlaceToCinnTarget) { ASSERT_EQ(PlaceToCinnTarget(platform::CPUPlace()), ::cinn::common::DefaultHostTarget()); @@ -187,5 +85,4 @@ TEST(CinnLaunchOpHelperTest, TestPlaceToCinnTarget) { } } // namespace details -} // namespace operators -} // namespace paddle +} // namespace paddle::operators diff --git a/paddle/fluid/operators/cinn/test_helper.h b/paddle/fluid/operators/cinn/test_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..7b8abcc33d59dcb97950ea8aa43f078b70be0799 --- /dev/null +++ b/paddle/fluid/operators/cinn/test_helper.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include "gtest/gtest.h" +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" + +namespace paddle::operators { + +using LoDTensor = framework::LoDTensor; +using Variable = framework::Variable; +using Graph = framework::ir::Graph; +using Node = framework::ir::Node; + +std::unique_ptr CreateOnlyElementwiseAddGraph( + const std::string& x_name, const std::string& y_name, + const std::string& out_name) { + auto g = std::make_unique(framework::ProgramDesc()); + framework::OpDesc feed_op_x, feed_op_y; + feed_op_x.SetType("feed"); + feed_op_x.SetOutput("Out", {x_name}); + feed_op_y.SetType("feed"); + feed_op_y.SetOutput("Out", {y_name}); + + framework::VarDesc x_var(x_name); + framework::VarDesc y_var(y_name); + framework::VarDesc out_var(out_name); + + framework::OpDesc elementwise_add_op; + elementwise_add_op.SetType("add"); + elementwise_add_op.SetInput("X", {x_name}); + elementwise_add_op.SetInput("Y", {y_name}); + elementwise_add_op.SetOutput("Out", {out_name}); + + auto* feed_op_node_x = g->CreateOpNode(&feed_op_x); + auto* feed_op_node_y = g->CreateOpNode(&feed_op_y); + auto* elementwise_add_node = g->CreateOpNode(&elementwise_add_op); + auto* x_node = g->CreateVarNode(&x_var); + auto* y_node = g->CreateVarNode(&y_var); + auto* out_node = g->CreateVarNode(&out_var); + + // fill op node + feed_op_node_x->outputs = {x_node}; + feed_op_node_y->outputs = {y_node}; + elementwise_add_node->inputs = {x_node, y_node}; + elementwise_add_node->outputs = {out_node}; + + // fill variable node + x_node->inputs = {feed_op_node_x}; + x_node->outputs = {elementwise_add_node}; + y_node->inputs = {feed_op_node_y}; + y_node->outputs = {elementwise_add_node}; + out_node->inputs = {elementwise_add_node}; + return g; +} + +template +void InitVariablesWithRandomValue(const std::vector& var_names, + const framework::DDim& common_ddim, + const platform::Place& place, + framework::Scope* scope) { + std::random_device seed; + std::default_random_engine engine(seed()); + std::uniform_real_distribution dist(0, 100); + + LoDTensor tmp_tensor; + auto* tmp_data = + tmp_tensor.mutable_data(common_ddim, platform::CPUPlace()); + for (const auto& var_name : var_names) { + auto* tensor = scope->Var(var_name)->GetMutable(); + for (auto i = 0; i < tensor->numel(); ++i) { + tmp_data[i] = static_cast(dist(engine)); + } + paddle::framework::TensorCopySync(tmp_tensor, place, tensor); + } +} + +template +void CompareOpResult(Variable* test_out, Variable* expected_out) { + LoDTensor test_tensor, expected_tensor; + paddle::framework::TensorCopySync(test_out->Get(), + platform::CPUPlace(), &test_tensor); + paddle::framework::TensorCopySync(expected_out->Get(), + platform::CPUPlace(), &expected_tensor); + + ASSERT_TRUE(test_tensor.IsInitialized()); + ASSERT_TRUE(expected_tensor.IsInitialized()); + ASSERT_EQ(test_tensor.dims(), expected_tensor.dims()); + const auto* test_data = test_tensor.data(); + const auto* excepted_data = expected_tensor.data(); + for (auto i = 0; i < expected_tensor.numel(); ++i) { + EXPECT_EQ(test_data[i], excepted_data[i]); + } +} + +} // namespace paddle::operators