improve CinnLaunchOpKernel implement (#36936)

1. 功能不变，简化CinnLaunchOpKernel实现：将原先直接从Scope获取变量信息的方式改为借助参数ExecutionContext标准接口获取，简化了实现逻辑，相应地也简化了辅助函数的实现，原先cinn_launch_op_helper较为冗余，删除不必要的接口并迁移至cinn_launch_op.cc中定义。 2. 修复CinnLaunchOp InferShape判断是否有指定输出：HasOutput->HasOutputs 3. 添加详细的注释和debug信息，方便问题排查和代码维护

improve CinnLaunchOpKernel implement (#36936)
1. 功能不变，简化CinnLaunchOpKernel实现：将原先直接从Scope获取变量信息的方式改为借助参数ExecutionContext标准接口获取，简化了实现逻辑，相应地也简化了辅助函数的实现，原先cinn_launch_op_helper较为冗余，删除不必要的接口并迁移至cinn_launch_op.cc中定义。 2. 修复CinnLaunchOp InferShape判断是否有指定输出：HasOutput->HasOutputs 3. 添加详细的注释和debug信息，方便问题排查和代码维护
0590277a · CtfGo · GitHub · 3121f889 · 0590277a · 0590277a
7 changed file
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -169,12 +169,8 @@ if (WITH_ASCEND_CL)
 endif()

 if (WITH_CINN)
-  cc_library(cinn_launch_op_helper SRCS cinn_launch_op_helper.cc DEPS operator cinn)
-  cc_test(cinn_launch_op_helper_test SRCS cinn_launch_op_helper_test.cc DEPS cinn_launch_op_helper)
-  op_library(cinn_launch_op SRCS cinn_launch_op.cc cinn_launch_op.cu.cc DEPS cinn_compiler cinn_launch_op_helper cinn ${OP_HEADER_DEPS})
-  if (WITH_GPU)
-      nv_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op elementwise_add_op)
-  endif()
+  op_library(cinn_launch_op SRCS cinn_launch_op.cc cinn_launch_op.cu.cc DEPS transform_desc cinn_compiler cinn ${OP_HEADER_DEPS})
+  cc_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op elementwise_add_op)
 endif()

 # FIXME(typhoonzero): operator deps may not needed.

--- a/paddle/fluid/operators/cinn_launch_op.cc
+++ b/paddle/fluid/operators/cinn_launch_op.cc
@@ -13,17 +13,177 @@
 // limitations under the License.

 #include "paddle/fluid/operators/cinn_launch_op.h"
+#include "cinn/frontend/var_type_utils.h"
+#include "paddle/fluid/string/string_helper.h"

 namespace paddle {
 namespace operators {

+namespace details {
+
+const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place) {
+  if (platform::is_cpu_place(place)) {
+    return ::cinn::common::DefaultHostTarget();
+  } else if (platform::is_gpu_place(place)) {
+    return ::cinn::common::DefaultNVGPUTarget();
+  }
+
+  PADDLE_THROW(platform::errors::InvalidArgument(
+      "CINN is not supported on current place:%s", place));
+  return ::cinn::common::UnkTarget();
+}
+
+void DebugCinnCompiledResult(const CinnCompiledObject& result) {
+  if (!VLOG_IS_ON(4)) {
+    return;
+  }
+  const auto& cinn_runtime_program = result.runtime_program;
+  const auto& cinn_scope = *(result.scope);
+  const auto& paddle2cinn_varmap = result.paddle2cinn_varmap;
+
+  VLOG(4) << "Compiled runtime_program instrunction size:["
+          << cinn_runtime_program->size() << "]";
+
+  std::vector<std::string> infos;
+  auto cinn_var_names = cinn_scope.var_names();
+  infos.reserve(cinn_var_names.size());
+  std::transform(cinn_var_names.begin(), cinn_var_names.end(),
+                 std::back_inserter(infos),
+                 [](const auto& name_view) { return name_view.data(); });
+  VLOG(4) << "Compiled scope variable names:["
+          << string::join_strings(infos, ',') << "]";
+
+  infos.clear();
+  infos.reserve(paddle2cinn_varmap.size());
+  std::transform(paddle2cinn_varmap.begin(), paddle2cinn_varmap.end(),
+                 std::back_inserter(infos), [](const auto& paddle2cinn) {
+                   return paddle2cinn.first + "->" + paddle2cinn.second;
+                 });
+  VLOG(4) << "Compiled paddle2cinn_varmap:[" << string::join_strings(infos, ',')
+          << "]";
+}
+
+std::vector<std::string> MapPaddleVariablesToCinn(
+    const std::vector<std::string>& paddle_names,
+    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap) {
+  std::vector<std::string> result;
+  result.reserve(result.size());
+  std::transform(
+      paddle_names.begin(), paddle_names.end(), std::back_inserter(result),
+      [&paddle2cinn_varmap](const std::string& pd_name) {
+        PADDLE_ENFORCE_GT(paddle2cinn_varmap.count(pd_name), 0,
+                          platform::errors::NotFound(
+                              "Not found the corresponding cinn variable "
+                              "of paddle variable(%s) in compilation result.",
+                              pd_name));
+        return paddle2cinn_varmap.at(pd_name);
+      });
+  return result;
+}
+
+std::vector<CinnTensor> GetCinnTensorsFromCompiledScope(
+    const std::vector<std::string>& cinn_names, const CinnScope& cinn_scope) {
+  std::vector<CinnTensor> result;
+  result.reserve(cinn_names.size());
+  std::transform(cinn_names.begin(), cinn_names.end(),
+                 std::back_inserter(result),
+                 [&cinn_scope](const std::string& var_name) {
+                   PADDLE_ENFORCE_NOT_NULL(
+                       cinn_scope.FindVar(var_name),
+                       platform::errors::NotFound(
+                           "Variable(%s) not found in cinn scope.", var_name));
+                   return cinn_scope.GetTensor(var_name);
+                 });
+  return result;
+}
+
+void CheckTensorEquivalent(const std::string& paddle_name,
+                           const LoDTensor* paddle_tensor,
+                           const CinnTensor& cinn_tensor) {
+  PADDLE_ENFORCE_EQ(
+      paddle_tensor->IsInitialized(), true,
+      platform::errors::InvalidArgument(
+          "The tensor in variable(%s) is not initialized.", paddle_name));
+
+  // check dimension
+  auto cinn_dims = framework::make_ddim(cinn_tensor->shape().data());
+  PADDLE_ENFORCE_EQ(paddle_tensor->dims(), cinn_dims,
+                    platform::errors::InvalidArgument(
+                        "The tensor dimension in variable(%s) "
+                        "is not equivalent, paddle is [%s] "
+                        "but cinn is [%s].",
+                        paddle_name, paddle_tensor->dims(), cinn_dims));
+
+  // TODO(CtfGo): check the underlying data type after CINN ready
+}
+
+void TensorMutableDataWithCinnInfo(const platform::Place& place,
+                                   const CinnTensor& cinn_tensor,
+                                   LoDTensor* paddle_tensor) {
+  // TODO(CtfGo): support mutable corresponding c++ type after CINN ready
+  paddle_tensor->mutable_data<float>(
+      framework::make_ddim(cinn_tensor->shape().data()), place);
+}
+
+std::vector<std::string> SeperateTempVar(
+    const CinnScope& cinn_scope,
+    const std::vector<std::string>& input_cinn_names,
+    const std::vector<std::string>& output_cinn_names) {
+  auto cinn_var_names = cinn_scope.var_names();
+  std::unordered_set<std::string> all_cinn_names;
+  all_cinn_names.reserve(cinn_var_names.size());
+  std::transform(
+      cinn_var_names.begin(), cinn_var_names.end(),
+      std::inserter(all_cinn_names, all_cinn_names.end()),
+      [](const auto& name_view) { return std::string(name_view.data()); });
+
+  auto exclude_fn = [&all_cinn_names](const auto& cinn_name) {
+    PADDLE_ENFORCE_EQ(all_cinn_names.erase(cinn_name), 1,
+                      platform::errors::NotFound(
+                          "Variable(%s) not found in cinn scope", cinn_name));
+  };
+
+  std::for_each(input_cinn_names.begin(), input_cinn_names.end(), exclude_fn);
+  std::for_each(output_cinn_names.begin(), output_cinn_names.end(), exclude_fn);
+  return {all_cinn_names.begin(), all_cinn_names.end()};
+}
+
+std::unique_ptr<cinn_buffer_t> ShareTensorWithCinnBuffer(LoDTensor* tensor) {
+  // convert paddle dimensions array to cinn format
+  std::vector<cinn_dimension_t> cinn_dims(tensor->dims().size());
+  for (auto i = 0; i < tensor->dims().size(); ++i) {
+    cinn_dims[i] = static_cast<cinn_dimension_t>(tensor->dims().at(i));
+  }
+
+  auto cinn_buffer = std::make_unique<cinn_buffer_t>();
+  // assign size and memory
+  cinn_buffer->resize(cinn_dims.data(), cinn_dims.size());
+  cinn_buffer->memory = reinterpret_cast<uint8_t*>(tensor->data<float>());
+  return cinn_buffer;
+}
+
+void CheckArgumentsNotMissed(
+    const CinnScope& cinn_scope,
+    const std::map<std::string, cinn_pod_value_t>& name2argument) {
+  auto cinn_var_names = cinn_scope.var_names();
+  std::for_each(cinn_var_names.begin(), cinn_var_names.end(),
+                [&name2argument](const auto& name_view) {
+                  PADDLE_ENFORCE_GT(
+                      name2argument.count(name_view.data()), 0,
+                      platform::errors::InvalidArgument(
+                          "Parameter(%s) is not assgined.", name_view.data()));
+                });
+}
+
+}  // namespace details
+
 class CinnLaunchOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
    OP_INOUT_CHECK(ctx->HasInputs(kX), "Input", kX, "CinnLaunchOp");
-    OP_INOUT_CHECK(ctx->HasOutput(kOutputs), "Output", kOutputs,
+    OP_INOUT_CHECK(ctx->HasOutputs(kOutputs), "Output", kOutputs,
                   "CinnLaunchOp");
  }

@@ -76,7 +236,7 @@ In addition, there is an attribute named 'compilation_key' should be
 set necessarily to get corresponding ir::Graph object of the graph
 or its computation result.

-It accomplishs the computation of graph following several steps:
+It accomplishes the computation of graph following several steps:
  1. Fetch ir::Graph object from CinnCompiler using kCompilationKey
  2. Compile the graph to a compiled object, and insert it to the
     global cache so that we can directly query it from this cache next time

--- a/paddle/fluid/operators/cinn_launch_op.h
+++ b/paddle/fluid/operators/cinn_launch_op.h
@@ -24,8 +24,6 @@
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
-#include "paddle/fluid/operators/cinn_launch_op_helper.h"
-#include "paddle/fluid/string/string_helper.h"

 namespace paddle {
 namespace operators {
@@ -35,15 +33,63 @@ static constexpr char kOutputs[] = "Out";
 static constexpr char kCompilationKey[] = "compilation_key";

 using LoDTensor = framework::LoDTensor;
-using Name2ConstTensor = std::map<std::string, const LoDTensor*>;
-using CinnTensor = cinn::hlir::framework::Tensor;
-using Name2CinnTensor = std::unordered_map<std::string, CinnTensor>;
-using framework::paddle2cinn::CinnCompiler;
+using CinnTensor = ::cinn::hlir::framework::Tensor;
+using CinnScope = ::cinn::hlir::framework::Scope;
+using CinnCompiler = framework::paddle2cinn::CinnCompiler;
+using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject;
+
+namespace details {
+
+// Tranform Paddle place to CINN target
+const ::cinn::common::Target& PlaceToCinnTarget(const platform::Place& place);
+
+// Print detailed compilation result of graph for debug
+void DebugCinnCompiledResult(const CinnCompiledObject& result);
+
+// Transform names of Paddle variables to CINN ones
+std::vector<std::string> MapPaddleVariablesToCinn(
+    const std::vector<std::string>& paddle_names,
+    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap);
+
+// Get CinnTensor with variable name from CinnScope
+std::vector<CinnTensor> GetCinnTensorsFromCompiledScope(
+    const std::vector<std::string>& cinn_names, const CinnScope& cinn_scope);
+
+// Check whether tensors from Paddle and CINN respectively
+// of the same variable are equivalent in type and dimension
+void CheckTensorEquivalent(const std::string& paddle_name,
+                           const LoDTensor* paddle_tensor,
+                           const CinnTensor& cinn_tensor);
+
+// Allocate buffer to a Paddle tensor with assginment information from CINN
+void TensorMutableDataWithCinnInfo(const platform::Place& place,
+                                   const CinnTensor& cinn_tensor,
+                                   LoDTensor* paddle_tensor);
+
+// Extract temporary variable names from CinnScope by excluding
+// input and output variables
+std::vector<std::string> SeperateTempVar(
+    const CinnScope& cinn_scope,
+    const std::vector<std::string>& input_cinn_names,
+    const std::vector<std::string>& output_cinn_names);
+
+// Share the buffer of a Paddle tensor to CINN by packing memory address
+// in a cinn_buffer_t object
+std::unique_ptr<cinn_buffer_t> ShareTensorWithCinnBuffer(LoDTensor* tensor);
+
+// Check all execution arguments are carried
+void CheckArgumentsNotMissed(
+    const CinnScope& cinn_scope,
+    const std::map<std::string, cinn_pod_value_t>& name2argument);
+
+}  // namespace details

 template <typename DeviceContext, typename T>
 class CinnLaunchOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
+    const auto& scope = ctx.scope();
+    const auto& place = ctx.GetPlace();
    // Step 1. Find graph object and prepare input
    PADDLE_ENFORCE_EQ(ctx.HasAttr(kCompilationKey), true,
                      platform::errors::NotFound(
@@ -51,62 +97,125 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
                          kCompilationKey));
    const auto& compilation_key =
        ctx.template Attr<std::string>(kCompilationKey);
-    VLOG(2) << "CinnLaunchOp compilation_key:" << compilation_key;
+    VLOG(4) << "CinnLaunchOp attribute(" << kCompilationKey << ") "
+            << "value:" << compilation_key;

    const auto& graph = CinnCompiler::GetInstance()->FindGraph(compilation_key);
    auto input_variable_names = ctx.InputNames(kX);
-    Name2ConstTensor input_tensors =
-        details::GetConstTensors(ctx.scope(), input_variable_names);
+    const auto& input_tensors = ctx.MultiInput<LoDTensor>(kX);
+    std::map<std::string, const LoDTensor*> inputs_name2tensor;
+    std::transform(input_variable_names.begin(), input_variable_names.end(),
+                   input_tensors.begin(),
+                   std::inserter(inputs_name2tensor, inputs_name2tensor.end()),
+                   [](const std::string& name, const LoDTensor* tensor) {
+                     return std::make_pair(name, tensor);
+                   });

    // Step 2. Get compilation result of the graph
-    auto target = details::PlaceToCinnTarget(ctx.GetPlace());
+    auto target = details::PlaceToCinnTarget(place);
    const auto& cinn_compiled_object =
-        CinnCompiler::GetInstance()->Compile(graph, input_tensors, target);
-    VLOG(2) << "CinnLaunchOp compile graph done on " << ctx.GetPlace();
+        CinnCompiler::GetInstance()->Compile(graph, inputs_name2tensor, target);
+    details::DebugCinnCompiledResult(cinn_compiled_object);

    const auto& cinn_runtime_program = cinn_compiled_object.runtime_program;
-    const auto& compiled_scope = *(cinn_compiled_object.scope.get());
+    const auto& cinn_scope = *(cinn_compiled_object.scope);
    const auto& paddle2cinn_varmap = cinn_compiled_object.paddle2cinn_varmap;

-    // Step 3. Initialize all variables of the compilation runtime program
-    //         in paddle, and pack them into execution arguments
-    VLOG(2) << "CinnLaunchOp prepare execution arguments";
+    // Step 3. Initialize all variables needed for cinn compiled runtime
+    //         program execution, and share buffers of their tensors into
+    //         cinn buffers through execution arguments passed.
+    VLOG(4) << "CinnLaunchOp initialize variables and prepare arguments";
    std::map<std::string, cinn_pod_value_t> name2argument;
+    // because a cinn_pod_value_t does not own the cinn_buffer_t object,
+    // an extra stroage is necessary to keep the object and it can
+    // not be released until runtime program finish  execution.
    std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers;
-    // prepare input variables
-    Name2CinnTensor input_compiled_tensors = details::GetCompiledTensors(
-        input_variable_names, compiled_scope, paddle2cinn_varmap);
-    details::CheckTensorEquivalent(input_tensors, input_compiled_tensors);
-    details::AppendExecutionArguments(ctx.scope(), input_variable_names,
-                                      paddle2cinn_varmap, &name2argument,
-                                      &hold_buffers);
-    // prepare output variables
+
+    // 3.1 Prepare input variables: because tensors of input variables have
+    //     been initialized before graph compiled, just check the
+    //     equiality between tensors of paddle and cinn.
+    auto input_cinn_names = details::MapPaddleVariablesToCinn(
+        input_variable_names, paddle2cinn_varmap);
+    auto input_cinn_tensors =
+        details::GetCinnTensorsFromCompiledScope(input_cinn_names, cinn_scope);
+    for (auto i = 0; i < input_variable_names.size(); ++i) {
+      const auto& var_name = input_variable_names.at(i);
+      const auto& cinn_name = input_cinn_names.at(i);
+      auto* tensor = scope.GetVar(var_name)->GetMutable<LoDTensor>();
+      details::CheckTensorEquivalent(var_name, tensor,
+                                     input_cinn_tensors.at(i));
+
+      VLOG(4) << "Prepare input argument-" << i << ":"
+              << "name(" << var_name << "->" << cinn_name << "), "
+              << "tensor(type:" << tensor->type() << ","
+              << "dims:" << tensor->dims() << ").";
+      auto buffer = details::ShareTensorWithCinnBuffer(tensor);
+      name2argument.emplace(input_cinn_names.at(i), buffer.get());
+      hold_buffers.emplace_back(std::move(buffer));
+    }
+
+    // 3.2 Prepare output variables: all output variables should
+    //     be initialized and allocated buffer in advance before
+    //     the runtime program start execution, the compilation result
+    //     includes details of their buffer assginment which used by
+    //     Paddle tensor allocation. For those variables allocated yet,
+    //     like persistable parameters, just check the equiality between
+    //     Paddle allocation and CINN buffer assginment.
    auto output_variable_names = ctx.OutputNames(kOutputs);
-    Name2CinnTensor output_compiled_tensors = details::GetCompiledTensors(
-        output_variable_names, compiled_scope, paddle2cinn_varmap);
-    details::InitializeOutputVar(ctx.scope(), ctx.GetPlace(),
-                                 output_compiled_tensors);
-    Name2ConstTensor output_tensors =
-        details::GetConstTensors(ctx.scope(), output_variable_names);
-    details::CheckTensorEquivalent(output_tensors, output_compiled_tensors);
-    details::AppendExecutionArguments(ctx.scope(), output_variable_names,
-                                      paddle2cinn_varmap, &name2argument,
-                                      &hold_buffers);
-    // prepare temporary variables
-    auto temp_variable_names =
-        details::SeperateTempVar(compiled_scope, paddle2cinn_varmap,
-                                 input_variable_names, output_variable_names);
-    auto temp_scope = ctx.scope().NewTmpScope();
+    auto output_cinn_names = details::MapPaddleVariablesToCinn(
+        output_variable_names, paddle2cinn_varmap);
+    auto output_cinn_tensors =
+        details::GetCinnTensorsFromCompiledScope(output_cinn_names, cinn_scope);
+    for (auto i = 0; i < output_variable_names.size(); ++i) {
+      const auto& var_name = output_variable_names.at(i);
+      const auto& cinn_name = output_cinn_names.at(i);
+      auto* tensor = scope.GetVar(var_name)->GetMutable<LoDTensor>();
+      if (tensor->IsInitialized()) {
+        details::CheckTensorEquivalent(var_name, tensor,
+                                       output_cinn_tensors.at(i));
+      } else {
+        details::TensorMutableDataWithCinnInfo(place, output_cinn_tensors.at(i),
+                                               tensor);
+      }
+
+      VLOG(4) << "Prepare outnput argument-" << i << ":"
+              << "name(" << var_name << "->" << cinn_name << "), "
+              << "tensor(type:" << tensor->type() << ","
+              << "dims:" << tensor->dims() << ").";
+      auto buffer = details::ShareTensorWithCinnBuffer(tensor);
+      name2argument.emplace(output_cinn_names.at(i), buffer.get());
+      hold_buffers.emplace_back(std::move(buffer));
+    }
+
+    // 3.3 Prepare temporary variables: Create a temporary scope
+    //     to keep temporary variables needed by compiled runtime program
+    //     in addition, they directly use the names from CinnScope.
+    auto temp_variable_names = details::SeperateTempVar(
+        cinn_scope, input_cinn_names, output_cinn_names);
+    auto temp_scope = scope.NewTmpScope();
    if (!temp_variable_names.empty()) {
-      details::InitializeTempVar(temp_variable_names, compiled_scope,
-                                 ctx.GetPlace(), temp_scope.get());
-      details::AppendExecutionArguments(*temp_scope, temp_variable_names,
-                                        paddle2cinn_varmap, &name2argument,
-                                        &hold_buffers);
+      auto temp_cinn_tensors = details::GetCinnTensorsFromCompiledScope(
+          temp_variable_names, cinn_scope);
+      for (auto i = 0; i < output_variable_names.size(); ++i) {
+        const auto& var_name = temp_variable_names.at(i);
+        auto* tensor = temp_scope->Var(var_name)->GetMutable<LoDTensor>();
+        details::TensorMutableDataWithCinnInfo(place, temp_cinn_tensors.at(i),
+                                               tensor);
+
+        VLOG(4) << "Prepare temporary argument-" << i << ":"
+                << "name(" << var_name << "->" << var_name << "), "
+                << "tensor(type:" << tensor->type() << ","
+                << "dims:" << tensor->dims() << ").";
+        auto buffer = details::ShareTensorWithCinnBuffer(tensor);
+        name2argument.emplace(var_name, buffer.get());
+        hold_buffers.emplace_back(std::move(buffer));
+      }
    }
-    // Step 4. Launch CINN to execute the compilation runtime program
+
+    // Step 4. Launch CINN to execute the compiled runtime program
+    details::CheckArgumentsNotMissed(cinn_scope, name2argument);
    cinn_runtime_program->Execute(&name2argument);
-    VLOG(2) << "CinnLaunchOp launch runtime_program execution done.";
+    VLOG(4) << "CinnLaunchOp launch execution done.";
  }
 };


--- a/paddle/fluid/operators/cinn_launch_op_helper.cc
+++ b/paddle/fluid/operators/cinn_launch_op_helper.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/cinn_launch_op_helper.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/place.h"
-
-namespace paddle {
-namespace operators {
-namespace details {
-
-using LoDTensor = framework::LoDTensor;
-using Scope = framework::Scope;
-using Name2ConstTensor = std::map<std::string, const LoDTensor*>;
-using CinnTensor = cinn::hlir::framework::Tensor;
-using CinnScope = cinn::hlir::framework::Scope;
-using Name2CinnTensor = std::unordered_map<std::string, CinnTensor>;
-
-const cinn::common::Target& PlaceToCinnTarget(const platform::Place& place) {
-  if (platform::is_cpu_place(place)) {
-    return cinn::common::DefaultHostTarget();
-  } else if (platform::is_gpu_place(place)) {
-    return cinn::common::DefaultNVGPUTarget();
-  }
-
-  PADDLE_THROW(platform::errors::InvalidArgument(
-      "CINN is not supported on current place:%s", place));
-  return cinn::common::UnkTarget();
-}
-
-Name2ConstTensor GetConstTensors(
-    const Scope& scope, const std::vector<std::string>& variable_names) {
-  Name2ConstTensor name2tensor;
-  for (const auto& var_name : variable_names) {
-    auto* var_ptr = scope.FindVar(var_name);
-    PADDLE_ENFORCE_NOT_NULL(
-        var_ptr, platform::errors::NotFound("Variable(%s) not found in Scope.",
-                                            var_name));
-    PADDLE_ENFORCE_EQ(var_ptr->IsType<framework::LoDTensor>(), true,
-                      platform::errors::InvalidArgument(
-                          "Variable(%s) is not LoDTensor that is "
-                          "the only supported by compiler now.",
-                          var_name));
-    name2tensor.emplace(var_name, &var_ptr->Get<framework::LoDTensor>());
-  }
-
-  return name2tensor;
-}
-
-Name2CinnTensor GetCompiledTensors(
-    const std::vector<std::string>& paddle_var_names,
-    const CinnScope& compiled_scope,
-    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap) {
-  Name2CinnTensor name2tensor;
-  for (const auto& pd_name : paddle_var_names) {
-    PADDLE_ENFORCE_GT(paddle2cinn_varmap.count(pd_name), 0,
-                      platform::errors::NotFound(
-                          "the corresponding compiled one of variable(%s) "
-                          "not found in compilation result.",
-                          pd_name));
-    const auto& cinn_name = paddle2cinn_varmap.at(pd_name);
-    PADDLE_ENFORCE_NOT_NULL(
-        compiled_scope.FindVar(cinn_name),
-        platform::errors::NotFound("Variable(%s) not found in compiled scope.",
-                                   pd_name));
-    name2tensor.emplace(pd_name, compiled_scope.GetTensor(cinn_name));
-  }
-  return name2tensor;
-}
-
-void CheckTensorEquivalent(const Name2ConstTensor& paddle_tensors,
-                           const Name2CinnTensor& compiled_tensors) {
-  for (const auto& name2tensor : paddle_tensors) {
-    const auto& pd_name = name2tensor.first;
-    const auto* paddle_tensor = name2tensor.second;
-    PADDLE_ENFORCE_EQ(
-        paddle_tensor->IsInitialized(), true,
-        platform::errors::InvalidArgument(
-            "The tensor in variable(%s) is not initialized.", pd_name));
-
-    PADDLE_ENFORCE_GT(compiled_tensors.count(pd_name), 0,
-                      platform::errors::NotFound(
-                          "the corresponding compiled tensor of variable(%s) "
-                          "not found in compilation result.",
-                          pd_name));
-    const auto& cinn_tensor = compiled_tensors.at(pd_name);
-    auto compiled_dim = framework::make_ddim(cinn_tensor->shape().data());
-
-    PADDLE_ENFORCE_EQ(paddle_tensor->dims(), compiled_dim,
-                      platform::errors::InvalidArgument(
-                          "The tensor dimension in variable(%s) "
-                          "is not equivalent, paddle is [%s] "
-                          "but compiled result is [%s].",
-                          pd_name, paddle_tensor->dims(), compiled_dim));
-    // TODO(CtfGo): check the underlying data type is equivalent
-  }
-}
-
-void InitializeOutputVar(const Scope& scope, const platform::Place& place,
-                         const Name2CinnTensor& compiled_tensors) {
-  for (const auto& name2tensor : compiled_tensors) {
-    const auto& pd_name = name2tensor.first;
-    const auto& cinn_tensor = name2tensor.second;
-    auto* var_ptr = scope.FindVar(pd_name);
-    PADDLE_ENFORCE_NOT_NULL(
-        var_ptr, platform::errors::NotFound("Variable(%s) not found in scope.",
-                                            pd_name));
-    auto* paddle_tensor = var_ptr->GetMutable<LoDTensor>();
-    if (!paddle_tensor->IsInitialized()) {
-      // TODO(CtfGo): support mutable corresponding c++ type with the
-      // compilation type
-      paddle_tensor->mutable_data<float>(
-          framework::make_ddim(cinn_tensor->shape().data()), place);
-      VLOG(2) << "Variable(" << pd_name
-              << ") is initialized using compilation result, type:"
-              << paddle_tensor->type() << ", dims:" << paddle_tensor->dims();
-    }
-  }
-}
-
-std::vector<std::string> SeperateTempVar(
-    const CinnScope& compiled_scope,
-    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
-    const std::vector<std::string>& input_var_names,
-    const std::vector<std::string>& output_var_names) {
-  std::unordered_set<std::string> all_paddle_names, all_cinn_names;
-  for_each(paddle2cinn_varmap.begin(), paddle2cinn_varmap.end(),
-           [&all_paddle_names](const auto& name_pd2cinn) {
-             all_paddle_names.insert(name_pd2cinn.first);
-           });
-  auto cinn_names_view = compiled_scope.var_names();
-  for_each(cinn_names_view.begin(), cinn_names_view.end(),
-           [&all_cinn_names](const auto& str_view) {
-             all_cinn_names.emplace(str_view.data(), str_view.size());
-           });
-
-  auto exclude_fn = [&](const auto& pd_name) {
-    PADDLE_ENFORCE_EQ(all_paddle_names.erase(pd_name), 1,
-                      platform::errors::NotFound(
-                          "The corresponding compiled one of variable(%s) "
-                          "not found in compilation result.",
-                          pd_name));
-    PADDLE_ENFORCE_EQ(all_cinn_names.erase(paddle2cinn_varmap.at(pd_name)), 1,
-                      platform::errors::NotFound(
-                          "Variable(%s) not found in compiled scope", pd_name));
-  };
-  for_each(input_var_names.begin(), input_var_names.end(), exclude_fn);
-  for_each(output_var_names.begin(), output_var_names.end(), exclude_fn);
-
-  if (all_cinn_names.empty()) {
-    VLOG(2) << "No temporary variable is needed during "
-               "execution in cinn runtime program";
-    return {};
-  }
-
-  return {all_cinn_names.begin(), all_cinn_names.end()};
-}
-
-void InitializeTempVar(const std::vector<std::string>& variable_names,
-                       const CinnScope& compiled_scope,
-                       const platform::Place& place, Scope* temp_scope) {
-  for (const auto& var_name : variable_names) {
-    PADDLE_ENFORCE_NOT_NULL(
-        compiled_scope.FindVar(var_name),
-        platform::errors::NotFound(
-            "Temporary variable(%s) not found in compiled scope", var_name));
-    const auto& cinn_tensor = compiled_scope.GetTensor(var_name);
-    // use the same variable name defined by CINN
-    auto* var_ptr = temp_scope->Var(var_name);
-    auto* paddle_tensor = var_ptr->GetMutable<LoDTensor>();
-    auto compiled_ddim = framework::make_ddim(cinn_tensor->shape().data());
-    // TODO(CtfGo): support mutable corresponding c++ type
-    paddle_tensor->mutable_data<float>(compiled_ddim, place);
-    VLOG(2) << "Add temporary variable(" << var_name << "), dimension is ["
-            << compiled_ddim << "]";
-  }
-}
-
-void SharePaddleTensorWithCinnBuffer(LoDTensor* paddle_tensor,
-                                     cinn_buffer_t* cinn_buffer) {
-  std::vector<cinn_dimension_t> cinn_dims(paddle_tensor->dims().size());
-  for (auto i = 0; i < cinn_dims.size(); ++i) {
-    cinn_dims[i] = static_cast<cinn_dimension_t>(paddle_tensor->dims().at(i));
-  }
-  cinn_buffer->resize(cinn_dims.data(), cinn_dims.size());
-  cinn_buffer->memory =
-      reinterpret_cast<uint8_t*>(paddle_tensor->data<float>());
-}
-
-void AppendExecutionArguments(
-    const Scope& scope, const std::vector<std::string>& variable_names,
-    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
-    std::map<std::string, cinn_pod_value_t>* name2argument,
-    std::vector<std::unique_ptr<cinn_buffer_t>>* hold_buffers) {
-  for (const auto& pd_name : variable_names) {
-    auto* var_ptr = scope.FindVar(pd_name);
-    PADDLE_ENFORCE_NOT_NULL(
-        var_ptr, platform::errors::NotFound("Variable(%s) not found in Scope.",
-                                            pd_name));
-    auto* paddle_tensor = var_ptr->GetMutable<LoDTensor>();
-    // if not found a paddle variable in the map,
-    // which means it is a temporary variable extra added,
-    // so the paddle name is same with cinn
-    const auto& cinn_name = paddle2cinn_varmap.count(pd_name)
-                                ? paddle2cinn_varmap.at(pd_name)
-                                : pd_name;
-    std::unique_ptr<cinn_buffer_t> buffer_ptr(new cinn_buffer_t());
-    SharePaddleTensorWithCinnBuffer(paddle_tensor, buffer_ptr.get());
-    name2argument->emplace(cinn_name, buffer_ptr.get());
-    hold_buffers->emplace_back(std::move(buffer_ptr));
-  }
-}
-
-}  // namespace details
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/cinn_launch_op_helper.h
+++ b/paddle/fluid/operators/cinn_launch_op_helper.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include "cinn/common/target.h"
-#include "cinn/hlir/framework/graph_compiler.h"
-#include "cinn/hlir/framework/scope.h"
-#include "paddle/fluid/framework/lod_tensor.h"
-#include "paddle/fluid/framework/scope.h"
-
-namespace paddle {
-namespace operators {
-namespace details {
-
-const cinn::common::Target& PlaceToCinnTarget(const platform::Place& place);
-// Get the underlying tensor of a variable,
-// result: paddle name --> const LoDTensor*
-std::map<std::string, const framework::LoDTensor*> GetConstTensors(
-    const framework::Scope& scope,
-    const std::vector<std::string>& variable_names);
-
-// Get the compiled tensor of a paddle variable,
-// result: paddle name --> CinnTensor
-std::unordered_map<std::string, cinn::hlir::framework::Tensor>
-GetCompiledTensors(
-    const std::vector<std::string>& paddle_var_names,
-    const cinn::hlir::framework::Scope& compiled_scope,
-    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap);
-
-// Check a original tensor of Paddle is equivalent
-// to the complied tensor from CINN
-void CheckTensorEquivalent(
-    /*paddle name -> const LoDTensor**/
-    const std::map<std::string, const framework::LoDTensor*>& paddle_tensors,
-    /*paddle name -> CinnTensor*/
-    const std::unordered_map<std::string, cinn::hlir::framework::Tensor>&
-        compiled_tensors);
-
-// Initialize output variables with the compilation result from CINN
-void InitializeOutputVar(
-    const framework::Scope& scope, const platform::Place& place,
-    /*paddle name -> CinnTensor*/
-    const std::unordered_map<std::string, cinn::hlir::framework::Tensor>&
-        compiled_tensors);
-
-// Extract extral temporary variables by
-// excluding input/output variables from compiled scope
-std::vector<std::string> SeperateTempVar(
-    const cinn::hlir::framework::Scope& compiled_scope,
-    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
-    const std::vector<std::string>& input_var_names,
-    const std::vector<std::string>& output_var_names);
-
-// Initialize temporary variables in a temp scope,
-// using the definition in compiled_scope
-void InitializeTempVar(const std::vector<std::string>& variable_names,
-                       const cinn::hlir::framework::Scope& compiled_scope,
-                       const platform::Place& place,
-                       framework::Scope* temp_scope);
-
-// Share paddle tensor to a cinn one through cinn_buffer_t object
-void SharePaddleTensorWithCinnBuffer(framework::LoDTensor* paddle_tensor,
-                                     cinn_buffer_t* cinn_buffer);
-
-// Pack tensors of all variables as execution arguments,
-// which will be passed into compilation runtime program to execute
-void AppendExecutionArguments(
-    const framework::Scope& scope,
-    const std::vector<std::string>& variable_names,
-    const std::unordered_map<std::string, std::string>& paddle2cinn_varmap,
-    std::map<std::string, cinn_pod_value_t>* name2argument,
-    std::vector<std::unique_ptr<cinn_buffer_t>>* hold_buffers);
-
-}  // namespace details
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/cinn_launch_op_helper_test.cc
+++ b/paddle/fluid/operators/cinn_launch_op_helper_test.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/cinn_launch_op_helper.h"
-#include <string>
-#include <vector>
-#include "gtest/gtest.h"
-#include "paddle/fluid/framework/selected_rows.h"
-#include "paddle/fluid/platform/enforce.h"
-
-namespace paddle {
-namespace operators {
-namespace details {
-
-using LoDTensor = framework::LoDTensor;
-using Scope = framework::Scope;
-
-using CinnShape = cinn::hlir::framework::Shape;
-using CinnTensor = cinn::hlir::framework::Tensor;
-using CinnScope = cinn::hlir::framework::Scope;
-
-TEST(CinnLaunchOpHelperTest, TestPlaceToCinnTarget) {
-  ASSERT_EQ(PlaceToCinnTarget(platform::CPUPlace()),
-            cinn::common::DefaultHostTarget());
-  ASSERT_EQ(PlaceToCinnTarget(platform::CUDAPlace(0)),
-            cinn::common::DefaultNVGPUTarget());
-}
-
-TEST(CinnLaunchOpHelperTest, TestGetConstTensors) {
-  // build test data
-  Scope scope;
-  auto* var1 = scope.Var("lodtensor_var_1");
-  var1->GetMutable<LoDTensor>();
-  auto* var2 = scope.Var("lodtensor_var_2");
-  var2->GetMutable<LoDTensor>();
-  auto* var3 = scope.Var("selectedrows_var_1");
-  var3->GetMutable<framework::SelectedRows>();
-  // get expected result with legal input
-  auto name2tensor =
-      GetConstTensors(scope, {"lodtensor_var_1", "lodtensor_var_2"});
-  ASSERT_EQ(name2tensor.size(), 2);
-  EXPECT_EQ(name2tensor.at("lodtensor_var_1"), &var1->Get<LoDTensor>());
-  EXPECT_EQ(name2tensor.at("lodtensor_var_2"), &var2->Get<LoDTensor>());
-}
-
-TEST(CinnLaunchOpHelperTest, TestGetCompiledTensors) {
-  // build test data
-  std::unordered_map<std::string, std::string> paddle2cinn_varmap(
-      {{"pd_var1", "cinn_var1"},
-       {"pd_var2", "cinn_var2"},
-       {"pd_var3", "cinn_var3"}});
-  CinnScope compiled_scope;
-  compiled_scope.Var<CinnTensor>("cinn_var1");
-  compiled_scope.Var<CinnTensor>("cinn_var2");
-  // get expected result with legal input
-  auto name2tensor = GetCompiledTensors({"pd_var1", "pd_var2"}, compiled_scope,
-                                        paddle2cinn_varmap);
-  ASSERT_EQ(name2tensor.size(), 2);
-  EXPECT_EQ(name2tensor.at("pd_var1").get(),
-            compiled_scope.GetTensor("cinn_var1").get());
-  EXPECT_EQ(name2tensor.at("pd_var2").get(),
-            compiled_scope.GetTensor("cinn_var2").get());
-}
-
-TEST(CinnLaunchOpHelperTest, TestCheckTensorEquivalent) {
-  // build test data
-  platform::CPUPlace place;
-  Scope scope;
-  CinnScope compiled_scope;
-  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
-  auto dims1 = std::vector<int>({2, 3});
-  tensor1->mutable_data<float>(framework::make_ddim(dims1), place);
-  auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
-  auto dims2 = std::vector<int>({5, 6, 7});
-  tensor2->mutable_data<float>(framework::make_ddim(dims2), place);
-  auto* tensor3 = scope.Var("var3")->GetMutable<LoDTensor>();
-  tensor3->mutable_data<float>(framework::make_ddim({10, 20}), place);
-  auto* tensor4 = scope.Var("var4")->GetMutable<LoDTensor>();
-  tensor4->mutable_data<float>(framework::make_ddim({2, 4, 6}), place);
-  compiled_scope.Var<CinnTensor>("var1");
-  compiled_scope.Var<CinnTensor>("var2");
-  compiled_scope.Var<CinnTensor>("var3");
-  auto compiled_tensor1 = compiled_scope.GetTensor("var1");
-  compiled_tensor1->Resize(CinnShape(dims1));
-  auto compiled_tensor2 = compiled_scope.GetTensor("var2");
-  compiled_tensor2->Resize(CinnShape(dims2));
-  auto compiled_tensor3 = compiled_scope.GetTensor("var3");
-  compiled_tensor3->Resize(CinnShape({10}));
-  // expected equality
-  CheckTensorEquivalent(
-      {{"var1", tensor1}, {"var2", tensor2}},
-      {{"var1", compiled_tensor1}, {"var2", compiled_tensor2}});
-}
-
-TEST(CinnLaunchOpHelperTest, TestInitializeOutputVar) {
-  // build test data
-  platform::CPUPlace place;
-  Scope scope;
-  scope.Var("var1");
-  scope.Var("var2");
-  CinnScope compiled_scope;
-  compiled_scope.Var<CinnTensor>("var1");
-  compiled_scope.Var<CinnTensor>("var2");
-  compiled_scope.Var<CinnTensor>("var3");
-  auto compiled_tensor1 = compiled_scope.GetTensor("var1");
-  compiled_tensor1->Resize(CinnShape({2, 3}));
-  auto compiled_tensor2 = compiled_scope.GetTensor("var2");
-  compiled_tensor2->Resize(CinnShape({5, 6, 7}));
-  auto compiled_tensor3 = compiled_scope.GetTensor("var3");
-  compiled_tensor3->Resize(CinnShape({10}));
-  // expected result
-  InitializeOutputVar(scope, place,
-                      {{"var1", compiled_tensor1}, {"var2", compiled_tensor2}});
-  auto* var1 = scope.FindVar("var1");
-  ASSERT_TRUE(var1->IsType<LoDTensor>());
-  EXPECT_TRUE(var1->Get<LoDTensor>().IsInitialized());
-  EXPECT_EQ(var1->Get<LoDTensor>().dims(), framework::make_ddim({2, 3}));
-  auto* var2 = scope.FindVar("var2");
-  ASSERT_TRUE(var2->IsType<LoDTensor>());
-  EXPECT_TRUE(var2->Get<LoDTensor>().IsInitialized());
-  EXPECT_EQ(var2->Get<LoDTensor>().dims(), framework::make_ddim({5, 6, 7}));
-}
-
-TEST(CinnLaunchOpHelperTest, TestSeperateTempVar) {
-  CinnScope compiled_scope;
-  compiled_scope.Var<CinnTensor>("cinn_temp_var1");
-  compiled_scope.Var<CinnTensor>("cinn_input_var1");
-  compiled_scope.Var<CinnTensor>("cinn_input_var2");
-  compiled_scope.Var<CinnTensor>("cinn_temp_var2");
-  compiled_scope.Var<CinnTensor>("cinn_output_var1");
-  auto variable_names =
-      SeperateTempVar(compiled_scope, {{"input_var1", "cinn_input_var1"},
-                                       {"input_var2", "cinn_input_var2"},
-                                       {"output_var1", "cinn_output_var1"}},
-                      {"input_var1", "input_var2"}, {"output_var1"});
-  ASSERT_EQ(variable_names.size(), 2);
-}
-
-TEST(CinnLaunchOpHelperTest, TestInitializeTempVar) {
-  // build test data
-  Scope temp_scope;
-  platform::CPUPlace place;
-  CinnScope compiled_scope;
-  compiled_scope.Var<CinnTensor>("temp_var1");
-  compiled_scope.Var<CinnTensor>("temp_var2");
-  compiled_scope.Var<CinnTensor>("var3");
-  auto compiled_tensor1 = compiled_scope.GetTensor("temp_var1");
-  compiled_tensor1->Resize(CinnShape({2, 3}));
-  auto compiled_tensor2 = compiled_scope.GetTensor("temp_var2");
-  compiled_tensor2->Resize(CinnShape({5, 6, 7}));
-  auto compiled_tensor3 = compiled_scope.GetTensor("var3");
-  compiled_tensor3->Resize(CinnShape({10}));
-  // expected result
-  InitializeTempVar({"temp_var1", "temp_var2"}, compiled_scope, place,
-                    &temp_scope);
-  ASSERT_EQ(temp_scope.LocalVarNames().size(), 2);
-  auto* temp_var1 = temp_scope.FindVar("temp_var1");
-  ASSERT_NE(temp_var1, nullptr);
-  EXPECT_TRUE(temp_var1->IsType<LoDTensor>());
-  EXPECT_TRUE(temp_var1->Get<LoDTensor>().IsInitialized());
-  EXPECT_EQ(temp_var1->Get<LoDTensor>().dims(), framework::make_ddim({2, 3}));
-  auto* temp_var2 = temp_scope.FindVar("temp_var2");
-  ASSERT_NE(temp_var2, nullptr);
-  EXPECT_TRUE(temp_var2->IsType<LoDTensor>());
-  EXPECT_TRUE(temp_var2->Get<LoDTensor>().IsInitialized());
-  EXPECT_EQ(temp_var2->Get<LoDTensor>().dims(),
-            framework::make_ddim({5, 6, 7}));
-}
-
-TEST(CinnLaunchOpHelperTest, TestSharePaddleTensorWithCinnBuffer) {
-  // build test data
-  Scope scope;
-  platform::CPUPlace place;
-  auto* var1 = scope.Var("var1");
-  auto* tensor1 = var1->GetMutable<LoDTensor>();
-  tensor1->mutable_data<float>(framework::make_ddim({5, 6}), place);
-  auto* data1 = tensor1->data<float>();
-  data1[0] = 9.99;
-  data1[10] = 19.99;
-  ASSERT_EQ(tensor1->numel(), 30);
-  ASSERT_EQ(tensor1->dims().size(), 2);
-  // excepted result
-  cinn_buffer_t cinn_buffer;
-  SharePaddleTensorWithCinnBuffer(tensor1, &cinn_buffer);
-  ASSERT_NE(cinn_buffer.memory, nullptr);
-  ASSERT_EQ(cinn_buffer.num_elements(), 30);
-  auto* shadow_data = reinterpret_cast<float*>(cinn_buffer.memory);
-  EXPECT_FLOAT_EQ(shadow_data[0], 9.99);
-  EXPECT_FLOAT_EQ(shadow_data[10], 19.99);
-}
-
-TEST(CinnLaunchOpHelperTest, TestAppendExecutionArguments) {
-  // build test data
-  Scope scope;
-  platform::CPUPlace place;
-  auto* var1 = scope.Var("var1");
-  auto* tensor1 = var1->GetMutable<LoDTensor>();
-  tensor1->mutable_data<float>(framework::make_ddim({5, 6}), place);
-  auto* var2 = scope.Var("temp_var2");
-  auto* tensor2 = var2->GetMutable<LoDTensor>();
-  tensor2->mutable_data<float>(framework::make_ddim({10}), place);
-  // expected result
-  std::map<std::string, cinn_pod_value_t> name2argument;
-  std::vector<std::unique_ptr<cinn_buffer_t>> hold_buffers;
-  AppendExecutionArguments(scope, {"var1", "temp_var2"},
-                           {{"var1", "cinn_var1"}}, &name2argument,
-                           &hold_buffers);
-  ASSERT_EQ(name2argument.size(), 2);
-  ASSERT_EQ(hold_buffers.size(), 2);
-  EXPECT_NE(name2argument.count("cinn_var1"), 0);
-  EXPECT_NE(name2argument.count("temp_var2"), 0);
-  EXPECT_EQ(static_cast<cinn_buffer_t*>(name2argument.at("cinn_var1")),
-            hold_buffers.front().get());
-  EXPECT_EQ(static_cast<cinn_buffer_t*>(name2argument.at("temp_var2")),
-            hold_buffers.back().get());
-}
-
-}  // namespace details
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/cinn_launch_op_test.cc
+++ b/paddle/fluid/operators/cinn_launch_op_test.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

+#include "paddle/fluid/operators/cinn_launch_op.h"
 #include <stdlib.h>
 #include <random>
 #include <string>
@@ -31,10 +32,8 @@ USE_OP(elementwise_add);
 namespace paddle {
 namespace operators {

-using framework::LoDTensor;
 using framework::ir::Graph;
 using framework::ir::Node;
-using framework::paddle2cinn::CinnCompiler;

 std::unique_ptr<Graph> CreateOnlyElementwiseAddGraph(
    const std::string& x_name, const std::string& y_name,
@@ -159,6 +158,7 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
  run_and_check_fn(platform::CPUPlace());
  run_and_check_fn(platform::CPUPlace());

+#ifdef PADDLE_WITH_CUDA
  // create an new elementwise_add op
  // because the above one cached the cpu kernel
  LOG(INFO) << "Check compute result on gpu";
@@ -170,7 +170,135 @@ TEST(CinnLaunchOpTest, TestElementwiseAddPass) {
      {{"Out", {expected_out_name}}}, {{}});
  run_and_check_fn(platform::CUDAPlace());
  run_and_check_fn(platform::CUDAPlace());
+#endif
 }

+namespace details {
+// Testing helper function used on CinnLaunchOpKernel in the following:
+// firstly build test data, then check both expected and illegal situations
+
+using CinnShape = ::cinn::hlir::framework::Shape;
+
+TEST(CinnLaunchOpHelperTest, TestPlaceToCinnTarget) {
+  ASSERT_EQ(PlaceToCinnTarget(platform::CPUPlace()),
+            ::cinn::common::DefaultHostTarget());
+  ASSERT_EQ(PlaceToCinnTarget(platform::CUDAPlace(0)),
+            ::cinn::common::DefaultNVGPUTarget());
+  ASSERT_THROW(PlaceToCinnTarget(platform::XPUPlace()),
+               paddle::platform::EnforceNotMet);
+}
+
+TEST(CinnLaunchOpHelperTest, TestMapPaddleVariablesToCinn) {
+  std::unordered_map<std::string, std::string> varmap(
+      {{"var1", "cinn_var1"}, {"var2", "cinn_var2"}, {"var3", "cinn_var3"}});
+
+  auto cinn_names = MapPaddleVariablesToCinn({"var1", "var3"}, varmap);
+  ASSERT_EQ(cinn_names.size(), 2);
+  EXPECT_EQ(cinn_names, std::vector<std::string>({"cinn_var1", "cinn_var3"}));
+  ASSERT_THROW(MapPaddleVariablesToCinn({"var1", "not_exist"}, varmap),
+               paddle::platform::EnforceNotMet);
+}
+
+TEST(CinnLaunchOpHelperTest, TestGetCinnTensorsFromCompiledScope) {
+  CinnScope cinn_scope;
+  cinn_scope.Var<CinnTensor>("cinn_var1");
+  cinn_scope.Var<CinnTensor>("cinn_var2");
+  cinn_scope.Var<CinnTensor>("cinn_var3");
+
+  auto cinn_tensors =
+      GetCinnTensorsFromCompiledScope({"cinn_var1", "cinn_var3"}, cinn_scope);
+  ASSERT_EQ(cinn_tensors.size(), 2);
+  ASSERT_EQ(cinn_tensors.front().get(),
+            cinn_scope.GetTensor("cinn_var1").get());
+  ASSERT_EQ(cinn_tensors.back().get(), cinn_scope.GetTensor("cinn_var3").get());
+  ASSERT_THROW(
+      GetCinnTensorsFromCompiledScope({"cinn_var1", "not_exist"}, cinn_scope),
+      paddle::platform::EnforceNotMet);
+}
+
+TEST(CinnLaunchOpHelperTest, TestCheckTensorEquivalent) {
+  platform::CPUPlace place;
+  framework::Scope scope;
+  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
+  tensor1->mutable_data<float>(framework::make_ddim({5, 8}), place);
+
+  CinnScope cinn_scope;
+  cinn_scope.Var<CinnTensor>("cinn_var1");
+  auto cinn_tensor1 = cinn_scope.GetTensor("cinn_var1");
+  cinn_tensor1->Resize(CinnShape({5, 8}));
+  cinn_tensor1->set_type(::cinn::common::type_of<float>());
+
+  ASSERT_NO_THROW(CheckTensorEquivalent("var1", tensor1, cinn_tensor1));
+  auto tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
+  ASSERT_THROW(CheckTensorEquivalent("var2", tensor2, cinn_tensor1),
+               paddle::platform::EnforceNotMet);
+
+  cinn_tensor1->Resize(CinnShape({5, 7}));
+  ASSERT_THROW(CheckTensorEquivalent("var1", tensor1, cinn_tensor1),
+               paddle::platform::EnforceNotMet);
+}
+
+TEST(CinnLaunchOpHelperTest, TestTensorMutableDataWithCinnInfo) {
+  platform::CPUPlace place;
+  framework::Scope scope;
+  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
+  CinnScope cinn_scope;
+  cinn_scope.Var<CinnTensor>("cinn_var1");
+  auto cinn_tensor1 = cinn_scope.GetTensor("cinn_var1");
+  cinn_tensor1->Resize(CinnShape({5, 8}));
+
+  ASSERT_NO_THROW(TensorMutableDataWithCinnInfo(place, cinn_tensor1, tensor1));
+  ASSERT_TRUE(tensor1->IsInitialized());
+  ASSERT_EQ(tensor1->dims(), framework::make_ddim({5, 8}));
+}
+
+TEST(CinnLaunchOpHelperTest, TestSeperateTempVar) {
+  CinnScope cinn_scope;
+  cinn_scope.Var<CinnTensor>("cinn_var1");
+  cinn_scope.Var<CinnTensor>("cinn_var2");
+  cinn_scope.Var<CinnTensor>("cinn_var3");
+  cinn_scope.Var<CinnTensor>("cinn_var4");
+
+  auto temp_names =
+      SeperateTempVar(cinn_scope, {"cinn_var1", "cinn_var2"}, {"cinn_var4"});
+  ASSERT_EQ(temp_names.size(), 1);
+  EXPECT_EQ(temp_names.front(), "cinn_var3");
+  ASSERT_THROW(
+      SeperateTempVar(cinn_scope, {"cinn_var1", "not_exist"}, {"cinn_var4"}),
+      paddle::platform::EnforceNotMet);
+}
+
+TEST(CinnLaunchOpHelperTest, TestShareTensorWithCinnBuffer) {
+  platform::CPUPlace place;
+  framework::Scope scope;
+  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
+  tensor1->mutable_data<float>(framework::make_ddim({5, 6}), place);
+  auto* data1 = tensor1->data<float>();
+  data1[0] = 9.99f;
+  data1[10] = 19.99f;
+
+  auto cinn_buffer = ShareTensorWithCinnBuffer(tensor1);
+  ASSERT_NE(cinn_buffer->memory, nullptr);
+  ASSERT_EQ(cinn_buffer->num_elements(), 30);
+  auto* shadow_data = reinterpret_cast<float*>(cinn_buffer->memory);
+  EXPECT_FLOAT_EQ(shadow_data[0], 9.99f);
+  EXPECT_FLOAT_EQ(shadow_data[10], 19.99f);
+}
+
+TEST(CinnLaunchOpHelperTest, TestCheckArgumentsNotMissed) {
+  CinnScope cinn_scope;
+  cinn_scope.Var<CinnTensor>("cinn_var1");
+  cinn_scope.Var<CinnTensor>("cinn_var2");
+  std::map<std::string, cinn_pod_value_t> name2argument(
+      {{"cinn_var1", cinn_pod_value_t()}, {"cinn_var2", cinn_pod_value_t()}});
+
+  ASSERT_NO_THROW(CheckArgumentsNotMissed(cinn_scope, name2argument));
+  name2argument.erase("cinn_var2");
+  ASSERT_THROW(CheckArgumentsNotMissed(cinn_scope, name2argument),
+               paddle::platform::EnforceNotMet);
+}
+
+}  // namespace details
+
 }  // namespace operators
 }  // namespace paddle