[NewIR]Support Build(GroupPtr) Logic in NewIRCompiler and Add UT (#56960)

* [NewIR]Support Build(GroupOps) in NewIRCompiler and Add UT * fix unittest

[NewIR]Support Build(GroupPtr) Logic in NewIRCompiler and Add UT (#56960)
* [NewIR]Support Build(GroupOps) in NewIRCompiler and Add UT * fix unittest
0dc6efaa · Aurelius84 · GitHub · 7daffbf8 · 0dc6efaa · 0dc6efaa
7 changed file
--- a/paddle/cinn/hlir/framework/new_ir/group.h
+++ b/paddle/cinn/hlir/framework/new_ir/group.h
@@ -16,6 +16,7 @@
 #include <string>
 #include <vector>

+#include "paddle/cinn/hlir/framework/new_ir/utils.h"
 #include "paddle/cinn/hlir/framework/op.h"
 #include "paddle/ir/core/operation.h"

@@ -30,20 +31,26 @@ struct Group {
 public:
  explicit Group(const std::vector<::ir::Operation*>& group_ops)
      : ops(group_ops) {
-    op_pattern_kind = OpPatternKind::kElementWise;
-    fn_name = "fn_";
-    for (auto& op : group_ops) {
-      fn_name += "_" + op->name();
+    Initialize();
  }
+
+  explicit Group(std::initializer_list<::ir::Operation*> group_ops)
+      : ops(group_ops) {
+    Initialize();
  }

+  int group_id;
+  std::string fn_name;
+  OpPatternKind op_pattern_kind;
  std::vector<::ir::Operation*> ops;
  std::vector<std::string> input_names;
  std::vector<std::string> output_names;
-  int group_id;
-  // FIXME(Aurelius84): This should be refactored with CinnGroupOp
-  OpPatternKind op_pattern_kind;
-  std::string fn_name;
+
+ private:
+  void Initialize() {
+    op_pattern_kind = OpPatternKind::kElementWise;
+    fn_name = CompatibleInfo::GroupOpsName(ops);
+  }
 };

 }  // namespace newir

--- a/paddle/cinn/hlir/framework/new_ir/op_lowering_impl.cc
+++ b/paddle/cinn/hlir/framework/new_ir/op_lowering_impl.cc
@@ -43,7 +43,7 @@ ir::Tensor GetTensor(const ::ir::Value& value) {
  auto type_info = value.type().dyn_cast<paddle::dialect::DenseTensorType>();
  auto in_shape = phi::vectorize<int>(type_info.dims());
  auto dtype = type_info.dtype();
-  std::string input_id = CompatibleInfo::InputName(value);
+  std::string input_id = CompatibleInfo::ValueName(value);
  return lang::CreatePlaceHolder(
      in_shape, utils::ConvertIRType(dtype), input_id);
 }
@@ -56,15 +56,16 @@ std::vector<ir::Tensor> CollectInputTensor(
  for (auto& operand : op->operands()) {
    CHECK(operand);
    auto in_value = operand.source();
-    ir::Tensor tensor;
+    VLOG(4) << "input tensor name: " << CompatibleInfo::ValueName(in_value);
+    // NOTE(Aurelius84): Need always to create placeholder for input tensor.
+    ir::Tensor tensor = details::GetTensor(in_value);
    if (!tensor_map->count(in_value)) {
-      tensor = details::GetTensor(in_value);
      // record tensor.
      (*tensor_map)[in_value] = tensor;
      // record func input args
-      if (func_args != nullptr) func_args->push_back(tensor);
-    } else {
-      tensor = tensor_map->at(in_value);
+      if (func_args != nullptr) {
+        func_args->push_back(tensor);
+      }
    }
    tensors.push_back(tensor);
  }
@@ -76,7 +77,7 @@ void CollectOutputInfo(const ::ir::Operation* op,
                       std::vector<std::vector<int>>* out_shapes) {
  auto op_results = op->results();
  for (auto& out_value : op_results) {
-    std::string output_id = CompatibleInfo::OutputName(out_value);
+    std::string output_id = CompatibleInfo::ValueName(out_value);
    // group->output_names.push_back(output_id);
    auto type_info =
        out_value.type().dyn_cast<paddle::dialect::DenseTensorType>();
@@ -265,11 +266,11 @@ std::vector<ir::LoweredFunc> OpLowererImpl::PostProcess(
      // output arg tensors
      group_func_arg_tensors->push_back(tensor);
      // output args
+      group->output_names.push_back(tensor->name);
      group_func_args.emplace_back(tensor->buffer, ir::Argument::IO::kOutput);
      arg_name_set.insert(tensor->buffer->name);
    }
  }
-
  if (!done_op_schedule) {
    std::unordered_set<std::string> args_set;
    for (auto arg : group_func_args) {
@@ -329,6 +330,8 @@ std::vector<ir::Expr> OpLowererImpl::LowerOps(

    std::vector<ir::Tensor> op_func_arg_tensors =
        details::CollectInputTensor(op, group_func_arg_tensors, tensor_map);
+    VLOG(4) << "input size:" << op_func_arg_tensors.size();
+
    std::string cinn_op_name = CompatibleInfo::OpName(*op);
    const hlir::framework::Operator* cinn_op = Operator::Get(cinn_op_name);
    auto op_impl = OpStrategy::SelectImpl(strategy[cinn_op](
@@ -348,6 +351,9 @@ std::vector<ir::Expr> OpLowererImpl::LowerOps(
    }
  }

+  VLOG(4) << "group_func_arg_tensors.size(): "
+          << group_func_arg_tensors->size();
+
  return func_bodies;
 }

@@ -364,7 +370,7 @@ std::vector<ir::LoweredFunc> OpLowererImpl::DoOpLower(
  // set tensor name = operand hash name
  auto op_results = op->results();
  for (const auto& result : op_results) {
-    std::string output_id = CompatibleInfo::OutputName(result);
+    std::string output_id = CompatibleInfo::ValueName(result);
    cinn_inputs.push_back(common::CINNValue(output_id));
  }

@@ -400,6 +406,8 @@ std::vector<ir::LoweredFunc> OpLowererImpl::DoOpLower(
    }
  }

+  VLOG(4) << "op_func_arg_tensors.size(): " << op_func_arg_tensors->size();
+
  // 2.Do lower
  std::string lower_fn_name = CompatibleInfo::OpFuncName(*op);
  std::vector<ir::LoweredFunc> funcs = lang::LowerVec(lower_fn_name,

--- a/paddle/cinn/hlir/framework/new_ir/utils.cc
+++ b/paddle/cinn/hlir/framework/new_ir/utils.cc
@@ -36,13 +36,8 @@ std::string CompatibleInfo::OpName(const ::ir::Operation& op) {
  return cinn_op_name;
 }

-std::string CompatibleInfo::InputName(const ::ir::Value& value) {
-  return CompatibleInfo::kInputPrefix +
-         std::to_string(std::hash<::ir::Value>()(value));
-}
-
-std::string CompatibleInfo::OutputName(const ::ir::Value& value) {
-  return CompatibleInfo::kOutputPrefix +
+std::string CompatibleInfo::ValueName(const ::ir::Value& value) {
+  return CompatibleInfo::kNamePrefix +
         std::to_string(std::hash<::ir::Value>()(value));
 }

@@ -55,10 +50,10 @@ std::string CompatibleInfo::OpFuncName(const ::ir::Operation& op) {

 std::string CompatibleInfo::GroupOpsName(
    const std::vector<::ir::Operation*>& ops) {
-  std::string name = "fn_";
+  std::string name = "fn";
  for (auto* op : ops) {
    std::string op_name = OpName(*op);
-    name += cinn::common::Context::Global().NewName(op_name);
+    name += "_" + cinn::common::Context::Global().NewName(op_name);
  }
  return name;
 }
@@ -69,7 +64,7 @@ std::vector<std::string> CompatibleInfo::InputNames(const ::ir::Operation& op,
  std::unordered_set<std::string> repeat;
  for (int i = 0; i < op.num_operands(); ++i) {
    auto value = op.operand_source(i);
-    std::string name = CompatibleInfo::InputName(value);
+    std::string name = CompatibleInfo::ValueName(value);
    if (!allow_duplicate && repeat.count(name)) {
      continue;
    }
@@ -84,7 +79,7 @@ std::vector<std::string> CompatibleInfo::OutputNames(
  std::vector<std::string> names;
  for (int i = 0; i < op.num_results(); ++i) {
    auto value = op.result(i);
-    std::string name = CompatibleInfo::OutputName(value);
+    std::string name = CompatibleInfo::ValueName(value);
    names.push_back(std::move(name));
  }
  return names;

--- a/paddle/cinn/hlir/framework/new_ir/utils.h
+++ b/paddle/cinn/hlir/framework/new_ir/utils.h
@@ -24,17 +24,14 @@ namespace framework {
 namespace newir {

 struct CompatibleInfo {
-  static constexpr char* kInputPrefix = "input_";
-  static constexpr char* kOutputPrefix = "output_";
+  static constexpr char* kNamePrefix = "var_";
  // TODO(Aurelius): Need add name mapping logic in REGISTER_CINN_OP
  // macros or attempt to unify Op name with Paddle and CINN.
  static const std::unordered_map<std::string, std::string> OP_NAMES;

  static std::string OpName(const ::ir::Operation& op);

-  static std::string InputName(const ::ir::Value& value);
-
-  static std::string OutputName(const ::ir::Value& value);
+  static std::string ValueName(const ::ir::Value& value);

  static std::string OpFuncName(const ::ir::Operation& op);


--- a/paddle/cinn/hlir/framework/new_ir_compiler.cc
+++ b/paddle/cinn/hlir/framework/new_ir_compiler.cc
@@ -35,7 +35,6 @@ std::unique_ptr<Program> NewIRCompiler::Build() {
       ++it) {
    std::vector<::ir::Operation*> ops = {*it};
    groups.push_back(std::make_shared<newir::Group>(ops));
-    groups.back()->fn_name = CompatibleInfo::GroupOpsName(groups.back()->ops);
  }
  VLOG(4) << "Groups size: " << groups.size();
  return std::move(Build(groups));
@@ -103,15 +102,12 @@ std::vector<std::unique_ptr<Instruction>> NewIRCompiler::BuildInstructions(
    const std::vector<newir::GroupPtr>& groups) {
  std::vector<std::unique_ptr<Instruction>> instructions;
  for (int idx = 0; idx < groups.size(); ++idx) {
-    // TODO(Aurelius84): only support single op in groups
-    auto& op = *(groups[idx]->ops[0]);
-
    auto& fn_name = groups[idx]->fn_name;
-    auto instr = std::unique_ptr<Instruction>(
-        new Instruction(target_,
+    auto instr =
+        std::unique_ptr<Instruction>(new Instruction(target_,
                                                     scope_.get(),
-                        CompatibleInfo::InputNames(op),
-                        CompatibleInfo::OutputNames(op),
+                                                     groups[idx]->input_names,
+                                                     groups[idx]->output_names,
                                                     fn_name));
    VLOG(1) << "Lookup kernel name: " << fn_name;
    auto* fn_ptr = compiler_->Lookup(fn_name);
@@ -119,7 +115,7 @@ std::vector<std::unique_ptr<Instruction>> NewIRCompiler::BuildInstructions(
    instr->SetLoweredFunc(reinterpret_cast<void*>(fn_ptr), fn_name);
    // As some instruction like reduce, will generate more than one kernel.
    // So try to find the rest kernel, if it exists.
-    // SetSubKernels(instr.get(), op_func_name);
+    // SetSubKernels(instr.get(), fn_name);
    instr->Finalize();
    instructions.push_back(std::move(instr));
  }
@@ -131,16 +127,15 @@ std::shared_ptr<Scope> BuildScope(const Target& target,
  std::unordered_set<::ir::Value> visited;
  auto scope = std::make_shared<Scope>();

-  auto create_var = [&](const std::string& name_prefix, ::ir::Value value) {
+  auto create_var = [&](::ir::Value value) {
    if (visited.count(value) > 0) return;
    visited.emplace(value);

-    std::string name =
-        name_prefix + std::to_string(std::hash<::ir::Value>()(value));
+    std::string name = CompatibleInfo::ValueName(value);
    auto type_info = value.type().dyn_cast<paddle::dialect::DenseTensorType>();
    auto* var = scope->Var<Tensor>(name);
    auto& tensor = absl::get<Tensor>(*var);
-    // NOTE: can be replaced with phi::vectorized ?
+
    std::vector<Shape::dim_t> shape;
    for (auto i = 0; i < type_info.dims().size(); ++i) {
      shape.push_back(Shape::dim_t(type_info.dims()[i]));
@@ -150,14 +145,12 @@ std::shared_ptr<Scope> BuildScope(const Target& target,
  };

  for (auto it = program.block()->begin(); it != program.block()->end(); ++it) {
-    for (auto i = 0; i < (*it)->num_operands(); ++i) {
-      auto in_value = (*it)->operand_source(i);
-      create_var(CompatibleInfo::kInputPrefix, in_value);
+    for (auto& oprand : (*it)->operands()) {
+      create_var(oprand.source());
    }

-    for (auto i = 0; i < (*it)->num_results(); ++i) {
-      auto out_value = (*it)->result(i);
-      create_var(CompatibleInfo::kOutputPrefix, out_value);
+    for (auto& result : (*it)->results()) {
+      create_var(result);
    }
  }
  return scope;

--- a/paddle/cinn/hlir/framework/new_ir_compiler.h
+++ b/paddle/cinn/hlir/framework/new_ir_compiler.h
@@ -40,11 +40,11 @@ class NewIRCompiler final {

  std::unique_ptr<Program> Build();

+  std::unique_ptr<Program> Build(const std::vector<newir::GroupPtr>& groups);
+
 private:
  CINN_DISALLOW_COPY_AND_ASSIGN(NewIRCompiler);

-  std::unique_ptr<Program> Build(const std::vector<newir::GroupPtr>& groups);
-
  std::vector<ir::LoweredFunc> GetOpFunc(const ::ir::Operation& op, int idx);

  void ProcessFunction(const std::vector<ir::LoweredFunc>& lowered_funcs);

--- a/test/cpp/ir/cinn/new_ir_compiler_test.cc
+++ b/test/cpp/ir/cinn/new_ir_compiler_test.cc
@@ -17,6 +17,7 @@
 #include <memory>
 #include <sstream>
 #include <string>
+#include <tuple>
 #include <unordered_map>

 #include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.h"
@@ -31,10 +32,15 @@
 #include "paddle/cinn/hlir/framework/convert_to_dialect.h"
 #include "paddle/cinn/hlir/framework/new_ir_compiler.h"

-std::unique_ptr<::ir::Program> BuildProgram() {
+using cinn::hlir::framework::newir::Group;
+using cinn::hlir::framework::newir::GroupPtr;
+
+using ProgramInfo =
+    std::tuple<std::shared_ptr<::ir::Program>, std::vector<GroupPtr>>;
+ProgramInfo BuildProgram() {
  ::ir::IrContext* ctx = ::ir::IrContext::Instance();
  ctx->GetOrRegisterDialect<paddle::dialect::PaddleDialect>();
-  auto program = std::make_unique<::ir::Program>(ctx);
+  auto program = std::make_shared<::ir::Program>(ctx);
  ::ir::Builder builder = ::ir::Builder(ctx, program->block());

  const float value_one = 1.0;  // relu(tan(1.)) = 1.5;
@@ -51,17 +57,30 @@ std::unique_ptr<::ir::Program> BuildProgram() {
                                             phi::DataType::FLOAT32,
                                             phi::GPUPlace());

-  auto tanh_op_x = builder.Build<paddle::dialect::TanOp>(full_op_x->result(0));
-  auto relu_op_x = builder.Build<paddle::dialect::ReluOp>(tanh_op_x->result(0));
-  auto tanh_op_y = builder.Build<paddle::dialect::TanOp>(full_op_y->result(0));
-  auto relu_op_y = builder.Build<paddle::dialect::ReluOp>(tanh_op_y->result(0));
-
-  return std::move(program);
+  auto tan_op_x = builder.Build<paddle::dialect::TanOp>(full_op_x->result(0));
+  auto relu_op_x = builder.Build<paddle::dialect::ReluOp>(tan_op_x->result(0));
+  auto tan_op_y = builder.Build<paddle::dialect::TanOp>(relu_op_x->result(0));
+  auto relu_op_y = builder.Build<paddle::dialect::ReluOp>(tan_op_y->result(0));
+
+  std::vector<GroupPtr> groups;
+  groups.emplace_back(
+      std::make_shared<Group>(std::initializer_list<::ir::Operation*>(
+          {full_op_x.operation()})));  // For coverage
+  groups.emplace_back(std::make_shared<Group>(
+      std::initializer_list<::ir::Operation*>({full_op_y.operation()})));
+  groups.emplace_back(std::make_shared<Group>(
+      std::vector<::ir::Operation*>({tan_op_x.operation(),
+                                     relu_op_x.operation(),
+                                     tan_op_y.operation(),
+                                     relu_op_y.operation()})));
+
+  return {program, groups};
 }

 TEST(NewIRCompier, CompilerAndRun) {
  // Step 1: Construct ir::Program
-  std::unique_ptr<::ir::Program> program = BuildProgram();
+  auto prog_info = BuildProgram();
+  std::shared_ptr<::ir::Program> program = std::get<0>(prog_info);
  EXPECT_EQ(program->block()->size(), 6u);
  LOG(INFO) << program->block()->size();

@@ -89,9 +108,42 @@ TEST(NewIRCompier, CompilerAndRun) {
  }
 }

+TEST(NewIRCompier, CompileGroupOps) {
+  // Step 1: Construct ir::Program
+  auto prog_info = BuildProgram();
+  std::shared_ptr<::ir::Program> program = std::get<0>(prog_info);
+  std::vector<GroupPtr> groups = std::get<1>(prog_info);
+  EXPECT_EQ(program->block()->size(), 6u);
+  LOG(INFO) << program->block()->size();
+
+  std::stringstream ss;
+  program->Print(ss);
+  LOG(INFO) << ss.str();
+
+  // Step 2: Compiler New ir::Program into Runtime Program
+  auto target = cinn::common::DefaultNVGPUTarget();
+  auto scope = cinn::hlir::framework::BuildScope(target, *program);
+  ASSERT_EQ(scope->var_names().size(), 6);
+
+  cinn::hlir::framework::NewIRCompiler ir_compiler(*program, target, scope);
+  auto runtime_program = ir_compiler.Build(groups);
+
+  // Step 3: Execute Runtime Instruction and check Scope.
+  ASSERT_NO_THROW(runtime_program->Execute());
+  for (auto& var_name : scope->var_names()) {
+    std::string name = {var_name.begin(), var_name.end()};
+    std::vector<float> data =
+        cinn::GetTensorData<float>(scope->GetTensor(name), target);
+    for (int i = 0; i < 1; ++i) {
+      LOG_FIRST_N(INFO, 10) << "data: " << data[i];
+    }
+  }
+}
+
 TEST(RuntimeDialect, CompilerAndRun) {
  // Step 1: Construct ir::Program
-  std::unique_ptr<::ir::Program> program = BuildProgram();
+  auto prog_info = BuildProgram();
+  std::shared_ptr<::ir::Program> program = std::get<0>(prog_info);
  EXPECT_EQ(program->block()->size(), 6u);

  // Step 2: Compiler New ir::Program into Runtime Program
@@ -103,7 +155,7 @@ TEST(RuntimeDialect, CompilerAndRun) {
  auto runtime_program = ir_compiler.Build();

  // Step 3: Convert into cinn::dialect::RuntimeDialect
-  std::unique_ptr<::ir::Program> ir_runtime_program =
+  std::shared_ptr<::ir::Program> ir_runtime_program =
      cinn::hlir::framework::ConvertToRuntimeDialect(*runtime_program);

  // Step 4: Run cinn::dialect::RuntimeDialect