Support feed op new ir (#54840)

* add fetch kernel * support fetch var in new ir * fix bug * polish code * change array equal to np.testing * support feed in new ir * fix bug * try to hack combine op * add scope guard * revert atan2 op * polish code

Support feed op new ir (#54840)
* add fetch kernel * support fetch var in new ir * fix bug * polish code * change array equal to np.testing * support feed in new ir * fix bug * try to hack combine op * add scope guard * revert atan2 op * polish code
1e323137 · hong · GitHub · 5d9af9db · 1e323137 · 1e323137
10 changed file
--- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -952,8 +952,8 @@ void BuildOpFuncList(

    auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();

-    if (op_name == "builtin.combine") {
-      VLOG(6) << "skip process pd.fetch op";
+    if (op_name == "builtin.combine" || op_name == "pd.feed") {
+      VLOG(6) << "skip process " << op_name;
      continue;
    }


--- a/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
+++ b/paddle/fluid/framework/new_executor/new_ir_interpreter.cc
@@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
                                 local_scope_,
                                 value_2_var_name_map_,
                                 execution_config_);
-    SetFeedVarsInplaceSkip(feed_names);
+    // SetFeedVarsInplaceSkip(feed_names);
    // convert vec func_list to graph
    Convert(&op_func_nodes);
    UpdateSyncOpNum();

--- a/paddle/fluid/framework/new_executor/standalone_executor.cc
+++ b/paddle/fluid/framework/new_executor/standalone_executor.cc
@@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
    if (FLAGS_enable_new_ir_in_executor) {
      VLOG(6) << "begin to translate" << std::endl;
      auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
-
      auto kernel_program =
          paddle::dialect::PdOpLowerToKernelPass(base_program.get());
      interpretercores_.emplace_back(std::make_shared<InterpreterCore>(

--- a/paddle/fluid/ir/dialect/pd_op.yaml
+++ b/paddle/fluid/ir/dialect/pd_op.yaml
@@ -2,6 +2,7 @@
  inputs: []
  attrs:
  - {typename: str, name: name}
+  - {typename: int, name: col}
  outputs:
  - {typename: Tensor, name: out, optional: false, intermediate: false}
  no_need_buffer: null

--- a/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
@@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey(
    ir::Operation* op,
    const phi::Place& place,
    const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
+  if (op->name() == "pd.feed") {
+    return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32};
+  }
  phi::Backend kernel_backend = phi::Backend::UNDEFINED;
  phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
  phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
@@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey(
        continue;
      }
      auto input_tmp = op->operand(i).source();
+
      auto new_input_tmp = map_value_pair.at(input_tmp);
+
      auto input_type = new_input_tmp.type();
      dialect::AllocatedDenseTensorType type;
      if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
@@ -181,32 +186,34 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {

    std::vector<ir::Type> op_output_types;
    if ((*it)->num_results() > 0) {
-      auto result_type = (*it)->result(0).type();
-      if (result_type.isa<dialect::DenseTensorType>()) {
-        auto allocated_dense_tensor_dtype =
-            paddle::dialect::AllocatedDenseTensorType::get(
-                ctx,
-                phi::TransToPhiPlace(kernel_key.backend()),
-                result_type.dyn_cast<dialect::DenseTensorType>());
-        op_output_types.push_back(allocated_dense_tensor_dtype);
-      } else if (result_type.isa<ir::VectorType>()) {
-        auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];
-
-        if (pos1.isa<dialect::DenseTensorType>()) {
+      for (size_t i = 0; i < (*it)->num_results(); ++i) {
+        auto result_type = (*it)->result(i).type();
+        if (result_type.isa<dialect::DenseTensorType>()) {
          auto allocated_dense_tensor_dtype =
              paddle::dialect::AllocatedDenseTensorType::get(
                  ctx,
                  phi::TransToPhiPlace(kernel_key.backend()),
-                  pos1.dyn_cast<dialect::DenseTensorType>());
+                  result_type.dyn_cast<dialect::DenseTensorType>());
          op_output_types.push_back(allocated_dense_tensor_dtype);
-        } else {
-          PADDLE_THROW(phi::errors::Unimplemented(
-              "only support dense tensor in vector type for now"));
+        } else if (result_type.isa<ir::VectorType>()) {
+          auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];
+
+          if (pos1.isa<dialect::DenseTensorType>()) {
+            auto allocated_dense_tensor_dtype =
+                paddle::dialect::AllocatedDenseTensorType::get(
+                    ctx,
+                    phi::TransToPhiPlace(kernel_key.backend()),
+                    pos1.dyn_cast<dialect::DenseTensorType>());
+            op_output_types.push_back(allocated_dense_tensor_dtype);
+          } else {
+            PADDLE_THROW(phi::errors::Unimplemented(
+                "only support dense tensor in vector type for now"));
+          }
+
+          ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
+          op_output_types.clear();
+          op_output_types.push_back(t1);
        }
-
-        ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
-        op_output_types.clear();
-        op_output_types.push_back(t1);
      }
    }

@@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {

    // only deal with single output
    if ((*it)->num_results() > 0) {
-      map_value_pair[(*it)->result(0)] = op1->result(0);
+      for (size_t i = 0; i < (*it)->num_results(); ++i) {
+        map_value_pair[(*it)->result(i)] = op1->result(i);
+      }
    }

    program->block()->push_back(op1);

--- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
+++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
@@ -66,6 +66,27 @@ void BuildScope(ir::Block* block,
      continue;
    }

+    if (op_name == "pd.feed") {
+      auto ptr = (*it)->result(0);
+      std::string name = "inner_var_" + std::to_string(count++);
+      name_map->emplace(ptr, name);
+      auto var = scope->Var(name);
+      // TODO(phlrain): need to update here, support StringTensor
+      auto out_tensor = var->GetMutable<phi::DenseTensor>();
+
+      name_map->emplace(ptr, name);
+
+      auto feed_var = scope->Var("feed");
+      int index =
+          (*it)->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
+      auto feed_list = feed_var->Get<paddle::framework::FeedList>();
+      auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));
+
+      out_tensor->ShareDataWith(in_tensor);
+
+      continue;
+    }
+
    if (op_name == "builtin.combine") {
      auto out_value = (*it)->result(0);

@@ -162,12 +183,12 @@ void BuildInferMetaContext(
  auto runtime_info = std::get<3>(op_yaml_info);

  // int input_index = 0;
+
  std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;

  for (size_t input_index = 0; input_index < vec_param_list.size();
       input_index++) {
    auto& t = vec_param_list[input_index];
-
    if (input_index_map.count(t)) {
      // get information from input
      ir::Value ptr = op->operand(input_index_map[t]).source();
@@ -197,7 +218,7 @@ void BuildInferMetaContext(
        if (var->IsType<phi::DenseTensor>()) {
          const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
          ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
-        } else {
+        } else if (var->IsType<paddle::framework::TensorRefArray>()) {
          paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
              inputs;
          auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
@@ -206,6 +227,9 @@ void BuildInferMetaContext(
          }

          ctx->EmplaceBackInputs(std::move(inputs));
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
+                                                  var->Type()));
        }
      }
    }
@@ -238,8 +262,7 @@ void BuildInferMetaContext(
    }
  }

-  // update here, support fetch list for now
-  // [todo update here]
+  // TODO(phlrain): use var type instead of op name
  if (op->attributes().count("op_name") &&
      (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
       "pd.fetch")) {
@@ -249,9 +272,11 @@ void BuildInferMetaContext(
    auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
    ctx->EmplaceBackOutput(out_tensor);
  } else {
-    ir::Value out_ptr = op->result(0);
-    auto name = name_map.at(out_ptr);
-    ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
+    for (size_t i = 0; i < op->num_results(); ++i) {
+      ir::Value out_ptr = op->result(i);
+      auto name = name_map.at(out_ptr);
+      ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
+    }
  }
 }

@@ -293,10 +318,14 @@ void BuildPhiKernelContext(
      // get information from input
      ir::Value ptr = op->operand(input_index_map[t]).source();
      auto in_var_name = name_map.at(ptr);
-
      if (input_map != nullptr) {
        // only deal with single input for now, [todo] need support multi input
        // like concat
+        // TODO(phlrain): OpFuncNode need input_index and output_index,
+        // construct input_index and output_here,  should remove input_index and
+        // output_index from OpFuncNode Each in_var_name named "inner_var_" +
+        // index, len("inner_var_") = 10
+
        size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
        (*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id);
      }
@@ -331,7 +360,7 @@ void BuildPhiKernelContext(
        if (var->IsType<phi::DenseTensor>()) {
          const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
          ctx->EmplaceBackInput(tensor_in);
-        } else {
+        } else if (var->IsType<paddle::framework::TensorRefArray>()) {
          paddle::small_vector<const phi::TensorBase*> inputs;
          auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
          for (size_t i = 0; i < tensor_array.size(); ++i) {
@@ -339,6 +368,13 @@ void BuildPhiKernelContext(
          }

          ctx->EmplaceBackInputs(std::move(inputs));
+        } else if (var->IsType<paddle::framework::FeedList>()) {
+          auto feed_list = var->Get<paddle::framework::FeedList>();
+          auto* in_tensor = &(PADDLE_GET(phi::DenseTensor, feed_list.at(0)));
+          ctx->EmplaceBackOutput(in_tensor);
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
+                                                  var->Type()));
        }
      }
    }
@@ -371,6 +407,7 @@ void BuildPhiKernelContext(
    }
  }

+  // TODO(phlrain): use var type instead of op name
  if (op->attributes().count("op_name") &&
      (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
       "pd.fetch")) {
@@ -380,16 +417,23 @@ void BuildPhiKernelContext(
    auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
    ctx->EmplaceBackOutput(out_tensor);
  } else {
-    ir::Value out_ptr = op->result(0);
-    auto name = name_map.at(out_ptr);
-    ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
-        &(scope->Var(name)->Get<phi::DenseTensor>())));
-
-    if (output_map != nullptr) {
-      // only deal with single input for now, [todo] need support multi input
-      // like concat
-      size_t tmp_id = std::atol(name.substr(4, 100).c_str());
-      (*output_map)["out"].push_back(tmp_id);
+    for (size_t i = 0; i < op->num_results(); ++i) {
+      ir::Value out_ptr = op->result(i);
+      auto name = name_map.at(out_ptr);
+      ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
+          &(scope->Var(name)->Get<phi::DenseTensor>())));
+
+      if (output_map != nullptr) {
+        // only deal with single input for now, [todo] need support multi input
+        // like concat
+        // TODO(phlrain): OpFuncNode need input_index and output_index,
+        // construct input_index and output_here,  should remove input_index and
+        // output_index from OpFuncNode Each in_var_name named "inner_var_" +
+        // index, len("inner_var_") = 10
+
+        size_t tmp_id = std::atol(name.substr(4, 100).c_str());
+        (*output_map)["out"].push_back(tmp_id);
+      }
    }
  }
 }

--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx,
      GenerateOperationOutput(ctx, op_desc, output_infos);
  ir::AttributeMap attribute_map = {
      {"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
+      {"col",
+       ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
  };

  ir::Operation* operation =

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -297,6 +297,7 @@
    out : Out

 - op : atan2
+  backward : atan2_grad
  inputs :
    {x : X1, y : X2}
  outputs :

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -1635,6 +1635,7 @@ class Executor:
            )

            self._feed_data(program, feed, feed_var_name, scope)
+
            if hasattr(program, 'lr_scheduler'):
                from paddle.optimizer.lr import LRScheduler


--- a/test/ir/new_ir/test_standalone_new_ir.py
+++ b/test/ir/new_ir/test_standalone_new_ir.py
@@ -27,13 +27,15 @@ class TestNewIr(unittest.TestCase):
        place = paddle.CPUPlace()
        exe = paddle.static.Executor(place)

-        x = paddle.ones([2, 2], dtype="float32")
-        y = paddle.ones([2, 2], dtype="float32")
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.ones([2, 2], dtype="float32")
+                y = paddle.ones([2, 2], dtype="float32")

-        z = x + y
-        out = exe.run(
-            paddle.static.default_main_program(), {}, fetch_list=[z.name]
-        )
+                z = x + y
+            out = exe.run(main_program, {}, fetch_list=[z.name])

        gold_res = np.ones([2, 2], dtype="float32") * 2

@@ -45,18 +47,47 @@ class TestCombineOp(unittest.TestCase):
        place = paddle.CPUPlace()
        exe = paddle.static.Executor(place)

-        x = paddle.ones([2, 2], dtype="float32")
-        y = paddle.ones([2, 2], dtype="float32")
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.ones([2, 2], dtype="float32")
+                y = paddle.ones([2, 2], dtype="float32")

-        z = paddle.linalg.multi_dot([x, y])
-        out = exe.run(
-            paddle.static.default_main_program(), {}, fetch_list=[z.name]
-        )
+                z = paddle.linalg.multi_dot([x, y])
+            out = exe.run(main_program, {}, fetch_list=[z.name])

        gold_res = np.ones([2, 2], dtype="float32") * 2

        np.testing.assert_array_equal(out[0], gold_res)


+class TestFeedOp(unittest.TestCase):
+    def test_with_new_ir(self):
+        place = paddle.CPUPlace()
+        exe = paddle.static.Executor(place)
+
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.static.data("x", [2, 2], dtype="float32")
+                y = paddle.static.data("y", [2, 2], dtype="float32")
+
+                z = x + y
+
+            np_a = np.random.rand(2, 2).astype("float32")
+            np_b = np.random.rand(2, 2).astype("float32")
+            out = exe.run(
+                main_program,
+                feed={"x": np_a, "y": np_b},
+                fetch_list=[z.name],
+            )
+
+        gold_res = np_a + np_b
+
+        np.testing.assert_array_equal(out[0], gold_res)
+
+
 if __name__ == "__main__":
    unittest.main()