Support feed op new ir (#54840)

* add fetch kernel * support fetch var in new ir * fix bug * polish code * change array equal to np.testing * support feed in new ir * fix bug * try to hack combine op * add scope guard * revert atan2 op * polish code

Support feed op new ir (#54840)
* add fetch kernel * support fetch var in new ir * fix bug * polish code * change array equal to np.testing * support feed in new ir * fix bug * try to hack combine op * add scope guard * revert atan2 op * polish code
1e323137 · hong · GitHub · 5d9af9db · 1e323137 · 1e323137
10 changed file
--- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -952,8 +952,8 @@ void BuildOpFuncList(
    auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();
-    if (op_name == "builtin.combine") {
+    if (op_name == "builtin.combine" || op_name == "pd.feed") {
-      VLOG(6) << "skip process pd.fetch op";
+      VLOG(6) << "skip process " << op_name;
      continue;
    }

--- a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
@@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
                                 local_scope_,
                                 value_2_var_name_map_,
                                 execution_config_);
-    SetFeedVarsInplaceSkip(feed_names);
+    // SetFeedVarsInplaceSkip(feed_names);
    // convert vec func_list to graph
    Convert(&op_func_nodes);
    UpdateSyncOpNum();

--- a/paddle/fluid/framework/new_executor/standalone_executor.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor.cc
@@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
    if (FLAGS_enable_new_ir_in_executor) {
      VLOG(6) << "begin to translate" << std::endl;
      auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
      auto kernel_program =
          paddle::dialect::PdOpLowerToKernelPass(base_program.get());
      interpretercores_.emplace_back(std::make_shared<InterpreterCore>(

--- a/paddle/fluid/ir/dialect/pd_op.yaml
+++ b/paddle/fluid/ir/dialect/pd_op.yaml
@@ -2,6 +2,7 @@
  inputs: []
  attrs:
  - {typename: str, name: name}
+  - {typename: int, name: col}
  outputs:
  - {typename: Tensor, name: out, optional: false, intermediate: false}
  no_need_buffer: null

--- a/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
@@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey(
    ir::Operation* op,
    const phi::Place& place,
    const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
+  if (op->name() == "pd.feed") {
+    return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32};
+  }
  phi::Backend kernel_backend = phi::Backend::UNDEFINED;
  phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
  phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
@@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey(
        continue;
      }
      auto input_tmp = op->operand(i).source();
      auto new_input_tmp = map_value_pair.at(input_tmp);
      auto input_type = new_input_tmp.type();
      dialect::AllocatedDenseTensorType type;
      if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
@@ -181,7 +186,8 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
    std::vector<ir::Type> op_output_types;
    if ((*it)->num_results() > 0) {
-      auto result_type = (*it)->result(0).type();
+      for (size_t i = 0; i < (*it)->num_results(); ++i) {
+        auto result_type = (*it)->result(i).type();
        if (result_type.isa<dialect::DenseTensorType>()) {
          auto allocated_dense_tensor_dtype =
              paddle::dialect::AllocatedDenseTensorType::get(
@@ -209,6 +215,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
          op_output_types.push_back(t1);
        }
      }
+    }
    // constuct input
    std::vector<ir::OpResult> vec_inputs;
@@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
    // only deal with single output
    if ((*it)->num_results() > 0) {
-      map_value_pair[(*it)->result(0)] = op1->result(0);
+      for (size_t i = 0; i < (*it)->num_results(); ++i) {
+        map_value_pair[(*it)->result(i)] = op1->result(i);
+      }
    }
    program->block()->push_back(op1);

--- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
+++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
@@ -66,6 +66,27 @@ void BuildScope(ir::Block* block,
      continue;
    }
+    if (op_name == "pd.feed") {
+      auto ptr = (*it)->result(0);
+      std::string name = "inner_var_" + std::to_string(count++);
+      name_map->emplace(ptr, name);
+      auto var = scope->Var(name);
+      // TODO(phlrain): need to update here, support StringTensor
+      auto out_tensor = var->GetMutable<phi::DenseTensor>();
+      name_map->emplace(ptr, name);
+      auto feed_var = scope->Var("feed");
+      int index =
+          (*it)->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
+      auto feed_list = feed_var->Get<paddle::framework::FeedList>();
+      auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));
+      out_tensor->ShareDataWith(in_tensor);
+      continue;
+    }
    if (op_name == "builtin.combine") {
      auto out_value = (*it)->result(0);
@@ -162,12 +183,12 @@ void BuildInferMetaContext(
  auto runtime_info = std::get<3>(op_yaml_info);
  // int input_index = 0;
  std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;
  for (size_t input_index = 0; input_index < vec_param_list.size();
       input_index++) {
    auto& t = vec_param_list[input_index];
    if (input_index_map.count(t)) {
      // get information from input
      ir::Value ptr = op->operand(input_index_map[t]).source();
@@ -197,7 +218,7 @@ void BuildInferMetaContext(
        if (var->IsType<phi::DenseTensor>()) {
          const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
          ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
-        } else {
+        } else if (var->IsType<paddle::framework::TensorRefArray>()) {
          paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
              inputs;
          auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
@@ -206,6 +227,9 @@ void BuildInferMetaContext(
          }
          ctx->EmplaceBackInputs(std::move(inputs));
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
+                                                  var->Type()));
        }
      }
    }
@@ -238,8 +262,7 @@ void BuildInferMetaContext(
    }
  }
-  // update here, support fetch list for now
+  // TODO(phlrain): use var type instead of op name
-  // [todo update here]
  if (op->attributes().count("op_name") &&
      (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
       "pd.fetch")) {
@@ -249,10 +272,12 @@ void BuildInferMetaContext(
    auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
    ctx->EmplaceBackOutput(out_tensor);
  } else {
-    ir::Value out_ptr = op->result(0);
+    for (size_t i = 0; i < op->num_results(); ++i) {
+      ir::Value out_ptr = op->result(i);
      auto name = name_map.at(out_ptr);
      ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
    }
+  }
 }
 void BuildPhiKernelContext(
@@ -293,10 +318,14 @@ void BuildPhiKernelContext(
      // get information from input
      ir::Value ptr = op->operand(input_index_map[t]).source();
      auto in_var_name = name_map.at(ptr);
      if (input_map != nullptr) {
        // only deal with single input for now, [todo] need support multi input
        // like concat
+        // TODO(phlrain): OpFuncNode need input_index and output_index,
+        // construct input_index and output_here,  should remove input_index and
+        // output_index from OpFuncNode Each in_var_name named "inner_var_" +
+        // index, len("inner_var_") = 10
        size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
        (*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id);
      }
@@ -331,7 +360,7 @@ void BuildPhiKernelContext(
        if (var->IsType<phi::DenseTensor>()) {
          const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
          ctx->EmplaceBackInput(tensor_in);
-        } else {
+        } else if (var->IsType<paddle::framework::TensorRefArray>()) {
          paddle::small_vector<const phi::TensorBase*> inputs;
          auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
          for (size_t i = 0; i < tensor_array.size(); ++i) {
@@ -339,6 +368,13 @@ void BuildPhiKernelContext(
          }
          ctx->EmplaceBackInputs(std::move(inputs));
+        } else if (var->IsType<paddle::framework::FeedList>()) {
+          auto feed_list = var->Get<paddle::framework::FeedList>();
+          auto* in_tensor = &(PADDLE_GET(phi::DenseTensor, feed_list.at(0)));
+          ctx->EmplaceBackOutput(in_tensor);
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
+                                                  var->Type()));
        }
      }
    }
@@ -371,6 +407,7 @@ void BuildPhiKernelContext(
    }
  }
+  // TODO(phlrain): use var type instead of op name
  if (op->attributes().count("op_name") &&
      (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
       "pd.fetch")) {
@@ -380,7 +417,8 @@ void BuildPhiKernelContext(
    auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
    ctx->EmplaceBackOutput(out_tensor);
  } else {
-    ir::Value out_ptr = op->result(0);
+    for (size_t i = 0; i < op->num_results(); ++i) {
+      ir::Value out_ptr = op->result(i);
      auto name = name_map.at(out_ptr);
      ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
          &(scope->Var(name)->Get<phi::DenseTensor>())));
@@ -388,10 +426,16 @@ void BuildPhiKernelContext(
      if (output_map != nullptr) {
        // only deal with single input for now, [todo] need support multi input
        // like concat
+        // TODO(phlrain): OpFuncNode need input_index and output_index,
+        // construct input_index and output_here,  should remove input_index and
+        // output_index from OpFuncNode Each in_var_name named "inner_var_" +
+        // index, len("inner_var_") = 10
        size_t tmp_id = std::atol(name.substr(4, 100).c_str());
        (*output_map)["out"].push_back(tmp_id);
      }
    }
+  }
 }
 }  // namespace ir
--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx,
      GenerateOperationOutput(ctx, op_desc, output_infos);
  ir::AttributeMap attribute_map = {
      {"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
+      {"col",
+       ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
  };
  ir::Operation* operation =

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -297,6 +297,7 @@
    out : Out
 - op : atan2
+  backward : atan2_grad
  inputs :
    {x : X1, y : X2}
  outputs :

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -1635,6 +1635,7 @@ class Executor:
            )
            self._feed_data(program, feed, feed_var_name, scope)
            if hasattr(program, 'lr_scheduler'):
                from paddle.optimizer.lr import LRScheduler

--- a/test/ir/new_ir/test_standalone_new_ir.py
+++ b/test/ir/new_ir/test_standalone_new_ir.py
@@ -27,13 +27,15 @@ class TestNewIr(unittest.TestCase):
        place = paddle.CPUPlace()
        exe = paddle.static.Executor(place)
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
                x = paddle.ones([2, 2], dtype="float32")
                y = paddle.ones([2, 2], dtype="float32")
                z = x + y
-        out = exe.run(
+            out = exe.run(main_program, {}, fetch_list=[z.name])
-            paddle.static.default_main_program(), {}, fetch_list=[z.name]
-        )
        gold_res = np.ones([2, 2], dtype="float32") * 2
@@ -45,15 +47,44 @@ class TestCombineOp(unittest.TestCase):
        place = paddle.CPUPlace()
        exe = paddle.static.Executor(place)
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
                x = paddle.ones([2, 2], dtype="float32")
                y = paddle.ones([2, 2], dtype="float32")
                z = paddle.linalg.multi_dot([x, y])
+            out = exe.run(main_program, {}, fetch_list=[z.name])
+        gold_res = np.ones([2, 2], dtype="float32") * 2
+        np.testing.assert_array_equal(out[0], gold_res)
+class TestFeedOp(unittest.TestCase):
+    def test_with_new_ir(self):
+        place = paddle.CPUPlace()
+        exe = paddle.static.Executor(place)
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.static.data("x", [2, 2], dtype="float32")
+                y = paddle.static.data("y", [2, 2], dtype="float32")
+                z = x + y
+            np_a = np.random.rand(2, 2).astype("float32")
+            np_b = np.random.rand(2, 2).astype("float32")
            out = exe.run(
-            paddle.static.default_main_program(), {}, fetch_list=[z.name]
+                main_program,
+                feed={"x": np_a, "y": np_b},
+                fetch_list=[z.name],
            )
-        gold_res = np.ones([2, 2], dtype="float32") * 2
+        gold_res = np_a + np_b
        np.testing.assert_array_equal(out[0], gold_res)