New ir support combine op (#54682)

* add kernel dialect * change DenseTensorTypeStorage to DenseTensorType * add test case` * add first pd_op to kernel dialect * lower pd op to kernel dialect * update * update * remove useless code * add attrite print test * fix bug * update * update * update * update * polish code * fix bug * polish code and add python test * add test * fix test error * add env flag * fix bug * revert test env * change cc_test_old to cc_test * fix build_static bug * fix type test error * udpate cmake * disable test in windows * update * update * fix bug * split file * fix conflict * polish code and fix conflict * polish code * fix bug

New ir support combine op (#54682)
* add kernel dialect * change DenseTensorTypeStorage to DenseTensorType * add test case` * add first pd_op to kernel dialect * lower pd op to kernel dialect * update * update * remove useless code * add attrite print test * fix bug * update * update * update * update * polish code * fix bug * polish code and add python test * add test * fix test error * add env flag * fix bug * revert test env * change cc_test_old to cc_test * fix build_static bug * fix type test error * udpate cmake * disable test in windows * update * update * fix bug * split file * fix conflict * polish code and fix conflict * polish code * fix bug
0cf841c9 · hong · GitHub · 83c78be9 · 0cf841c9 · 0cf841c9
9 changed file
--- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -948,7 +948,7 @@ void BuildOpFuncList(
    auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();
-    if (op_name == "pd.fetch") {
+    if (op_name == "pd.fetch" || op_name == "builtin.combine") {
      VLOG(6) << "skip process pd.fetch op";
      continue;
    }

--- a/paddle/fluid/framework/phi_tensor_base_vector.h
+++ b/paddle/fluid/framework/phi_tensor_base_vector.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/extended_tensor.h"
+namespace paddle {
+namespace framework {
+template <typename T>
+struct PhiVectorType;
+template <typename T>
+class PhiVector : public phi::ExtendedTensor,
+                  public phi::TypeInfoTraits<phi::TensorBase, PhiVector<T>> {
+ public:
+  PhiVector() = default;
+  explicit PhiVector(const std::vector<T>& init_data) : data_(init_data) {}
+  PhiVector(PhiVector&& other) = default;
+  PhiVector(const PhiVector& other) = default;
+  PhiVector& operator=(const PhiVector& other) = default;
+  PhiVector& operator=(const std::vector<T>& other) {
+    data_ = other;
+    return *this;
+  }
+  PhiVector& operator=(PhiVector&& other) = default;
+  /// \brief Destroy the PhiVector and release exclusive resources.
+  virtual ~PhiVector() = default;
+ public:
+  /// \brief Returns the name of the class for type traits.
+  /// \return The name of the class.
+  static const char* name() { return PhiVectorType<T>().type_name; }
+  size_t size() const { return data_.size(); }
+  void resize(size_t size) { data_.resize(size); }
+  void clear() { data_.clear(); }
+  void emplace_back(const T& feed_data) { data_.emplace_back(feed_data); }
+  const T& operator[](size_t index) const { return data_[index]; }
+  T& operator[](size_t index) { return data_[index]; }
+  T& at(size_t index) { return data_.at(index); }
+  const T& at(size_t index) const { return data_.at(index); }
+  typename std::vector<T>::iterator begin() { return data_.begin(); }
+  typename std::vector<T>::const_iterator begin() const {
+    return data_.begin();
+  }
+  typename std::vector<T>::iterator end() { return data_.end(); }
+  typename std::vector<T>::const_iterator end() const { return data_.end(); }
+ private:
+  std::vector<T> data_;
+};
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/string_array.h
+++ b/paddle/fluid/framework/string_array.h
@@ -20,6 +20,8 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "paddle/fluid/framework/phi_tensor_base_vector.h"
+#include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/extended_tensor.h"
 namespace paddle {
@@ -102,73 +104,11 @@ class Vocab : public phi::ExtendedTensor,
 // Kernel. It can be used when you define a non-tensor type that needs to be
 // stored in a vector as PHI kernel argument.
-template <typename T>
-struct PhiVectorType;
 template <>
 struct PhiVectorType<std::string> {
  const char* type_name = "PhiVectorString";
 };
-template <typename T>
-class PhiVector : public phi::ExtendedTensor,
-                  public phi::TypeInfoTraits<phi::TensorBase, PhiVector<T>> {
- public:
-  PhiVector() = default;
-  explicit PhiVector(const std::vector<T>& init_data) : data_(init_data) {}
-  PhiVector(PhiVector&& other) = default;
-  PhiVector(const PhiVector& other) = default;
-  PhiVector& operator=(const PhiVector& other) = default;
-  PhiVector& operator=(const std::vector<T>& other) {
-    data_ = other;
-    return *this;
-  }
-  PhiVector& operator=(PhiVector&& other) = default;
-  /// \brief Destroy the PhiVector and release exclusive resources.
-  virtual ~PhiVector() = default;
- public:
-  /// \brief Returns the name of the class for type traits.
-  /// \return The name of the class.
-  static const char* name() { return PhiVectorType<T>().type_name; }
-  size_t size() const { return data_.size(); }
-  void resize(size_t size) { data_.resize(size); }
-  void clear() { data_.clear(); }
-  void emplace_back(const T& feed_data) { data_.emplace_back(feed_data); }
-  const T& operator[](size_t index) const { return data_[index]; }
-  T& operator[](size_t index) { return data_[index]; }
-  T& at(size_t index) { return data_.at(index); }
-  const T& at(size_t index) const { return data_.at(index); }
-  typename std::vector<T>::iterator begin() { return data_.begin(); }
-  typename std::vector<T>::const_iterator begin() const {
-    return data_.begin();
-  }
-  typename std::vector<T>::iterator end() { return data_.end(); }
-  typename std::vector<T>::const_iterator end() const { return data_.end(); }
- private:
-  std::vector<T> data_;
-};
 using String = std::string;
 using Strings = PhiVector<std::string>;

--- a/paddle/fluid/framework/tensor_ref_array.h
+++ b/paddle/fluid/framework/tensor_ref_array.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/fluid/framework/phi_tensor_base_vector.h"
+namespace paddle {
+namespace framework {
+template <>
+struct PhiVectorType<const phi::DenseTensor*> {
+  const char* type_name = "PhiTensorRefArray";
+};
+using TensorRefArray = PhiVector<const phi::DenseTensor*>;
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/type_info.cc
+++ b/paddle/fluid/framework/type_info.cc
@@ -40,5 +40,7 @@ template class TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>;
 template class TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>;
 template class TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>;
 template class TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>;
+template class TypeInfoTraits<phi::TensorBase,
+                              paddle::framework::TensorRefArray>;
 }  // namespace phi
--- a/paddle/fluid/framework/var_type_traits.h
+++ b/paddle/fluid/framework/var_type_traits.h
@@ -25,6 +25,7 @@
 #include "paddle/fluid/framework/lod_tensor_array.h"
 #include "paddle/fluid/framework/raw_tensor.h"
 #include "paddle/fluid/framework/string_array.h"
+#include "paddle/fluid/framework/tensor_ref_array.h"
 #include "paddle/fluid/platform/place.h"
 #ifdef PADDLE_WITH_CUDA
 #include <cudnn.h>
@@ -210,7 +211,8 @@ using VarTypeRegistry = detail::VarTypeRegistryImpl<
    std::vector<int>,
    std::vector<float>,
    std::vector<std::string>,
-    RawTensor>;
+    RawTensor,
+    TensorRefArray>;
 template <typename T>
 struct VarTypeTrait {
  static_assert(VarTypeRegistry::IsRegistered<T>(), "Must be registered type");

--- a/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
@@ -34,11 +34,17 @@ phi::KernelKey GetKernelKey(
    ir::Operation* op,
    const phi::Place& place,
    const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
+  phi::Backend kernel_backend = phi::Backend::UNDEFINED;
+  phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
+  phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
  paddle::dialect::OpYamlInfoInterface op_info_interface =
      op->dyn_cast<paddle::dialect::OpYamlInfoInterface>();
+  std::vector<paddle::dialect::OpInputInfo> input_info;
+  if (op_info_interface) {
    auto op_info_res = op_info_interface.GetOpInfo();
-  auto input_info = std::get<0>(op_info_res);
+    input_info = std::get<0>(op_info_res);
    // only suppurt non vector input for now
    std::map<std::string, int> input_map;
@@ -61,11 +67,6 @@ phi::KernelKey GetKernelKey(
    }
    auto runtime_info = std::get<3>(op_info_res);
-  // get dtype infomation
-  phi::Backend kernel_backend = phi::Backend::UNDEFINED;
-  phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
-  phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
    auto attr_map = op->attributes();
    auto data_type_info = runtime_info.kernel_key_dtype;
    if (data_type_info.size() > 0 && data_type_info[0] != "") {
@@ -82,10 +83,10 @@ phi::KernelKey GetKernelKey(
                .dyn_cast<paddle::dialect::DenseTensorType>();
        kernel_data_type = TransToPhiDataType(type.dtype());
      } else {
-      PADDLE_ENFORCE_EQ(
+        PADDLE_ENFORCE_EQ(attr_type_map.count(slot_name),
-          attr_type_map.count(slot_name),
                          true,
-          phi::errors::PreconditionNotMet("[%s] MUST in attr map", slot_name));
+                          phi::errors::PreconditionNotMet(
+                              "[%s] MUST in attr map", slot_name));
        kernel_data_type = attr_map.at(slot_name)
                               .dyn_cast<paddle::dialect::DataTypeAttribute>()
                               .data();
@@ -93,23 +94,30 @@ phi::KernelKey GetKernelKey(
    }
    // parse all the input tensor
    if (tensor_input_number == 0 || op->name() == "pd.full_") {
      // all the information have to get from attribute and context
      kernel_backend = paddle::experimental::ParseBackend(place);
+    }
+  }
-  } else {
+  if (op->num_operands() > 0) {
    paddle::experimental::detail::KernelKeyParser kernel_key_parser;
-    for (size_t i = 0; i < input_info.size(); ++i) {
+    for (size_t i = 0; i < op->num_operands(); ++i) {
      // todo filter attribute tensor
-      if (input_info[i].is_mutable_attribute) {
+      if ((input_info.size() > i) && input_info[i].is_mutable_attribute) {
        continue;
      }
      auto input_tmp = op->operand(i).source();
      auto new_input_tmp = map_value_pair.at(input_tmp);
-      dialect::AllocatedDenseTensorType type =
+      auto input_type = new_input_tmp.type();
-          new_input_tmp.type().dyn_cast<dialect::AllocatedDenseTensorType>();
+      dialect::AllocatedDenseTensorType type;
+      if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
+        type = input_type.dyn_cast<dialect::AllocatedDenseTensorType>();
+      } else if (input_type.isa<ir::VectorType>()) {
+        type = input_type.dyn_cast<ir::VectorType>()[0]
+                   .dyn_cast<dialect::AllocatedDenseTensorType>();
+      }
      // fake tensor here
      auto ptr = new phi::Allocation(nullptr, 0, type.place());
@@ -164,7 +172,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
  for (auto it = block->begin(); it != block->end(); ++it) {
    VLOG(6) << "op name " << (*it)->name();
    auto kernel_key = GetKernelKey(*it, cpu_place, map_value_pair);
+    VLOG(6) << "kernel type " << kernel_key;
    // create new Op
    // only for single output
@@ -172,14 +180,35 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
    std::vector<ir::Type> op_output_types;
    if ((*it)->num_results() > 0) {
-      // filter tensor attribute
+      auto result_type = (*it)->result(0).type();
+      if (result_type.isa<dialect::DenseTensorType>()) {
+        auto allocated_dense_tensor_dtype =
+            paddle::dialect::AllocatedDenseTensorType::get(
+                ctx,
+                phi::TransToPhiPlace(kernel_key.backend()),
+                result_type.dyn_cast<dialect::DenseTensorType>());
+        op_output_types.push_back(allocated_dense_tensor_dtype);
+      } else if (result_type.isa<ir::VectorType>()) {
+        auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];
+        if (pos1.isa<dialect::DenseTensorType>()) {
          auto allocated_dense_tensor_dtype =
              paddle::dialect::AllocatedDenseTensorType::get(
                  ctx,
                  phi::TransToPhiPlace(kernel_key.backend()),
-              (*it)->result(0).type().dyn_cast<dialect::DenseTensorType>());
+                  pos1.dyn_cast<dialect::DenseTensorType>());
          op_output_types.push_back(allocated_dense_tensor_dtype);
+        } else {
+          PADDLE_THROW(phi::errors::Unimplemented(
+              "only support dense tensor in vector type for now"));
+        }
+        ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
+        op_output_types.clear();
+        op_output_types.push_back(t1);
      }
+    }
    // constuct input
    std::vector<ir::OpResult> vec_inputs;
@@ -194,13 +223,16 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
    paddle::dialect::OpYamlInfoInterface op_info_interface =
        (*it)->dyn_cast<paddle::dialect::OpYamlInfoInterface>();
+    std::string kernel_fn_str;
+    if (op_info_interface) {
      auto op_info_res = op_info_interface.GetOpInfo();
      auto runtime_info = std::get<3>(op_info_res);
+      kernel_fn_str = runtime_info.kernel_func[0];
+    }
    std::unordered_map<std::string, ir::Attribute> op1_attribute{
        {"op_name", ir::StrAttribute::get(ctx, (*it)->name())},
-        {"kernel_name",
+        {"kernel_name", ir::StrAttribute::get(ctx, kernel_fn_str)},
-         ir::StrAttribute::get(ctx, runtime_info.kernel_func[0])},
        {"kernel_key", dialect::KernelAttribute::get(ctx, kernel_key)}};
    auto op_attr_map = (*it)->attributes();

--- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
+++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
@@ -30,6 +30,8 @@
 #include "paddle/fluid/framework/variable_helper.h"
 #include "paddle/phi/core/kernel_context.h"
+#include "paddle/fluid/framework/string_array.h"
+#include "paddle/fluid/framework/tensor_ref_array.h"
 #include "paddle/fluid/ir/dialect/kernel_attribute.h"
 #include "paddle/fluid/ir/dialect/pd_attribute.h"
@@ -70,6 +72,36 @@ void BuildScope(ir::Block* block,
      continue;
    }
+    if (op_name == "builtin.combine") {
+      auto out_value = (*it)->result(0);
+      VLOG(5) << "process builtin combine";
+      std::string name;
+      if (name_map->find(out_value) != name_map->end()) {
+        name = name_map->at(out_value);
+      } else {
+        name = "inner_var_" + std::to_string(count++);
+        name_map->emplace(out_value, name);
+      }
+      auto var = scope->Var(name);
+      auto tensor_array = var->GetMutable<paddle::framework::TensorRefArray>();
+      for (size_t i = 0; i < input_num; ++i) {
+        auto ptr = (*it)->operand(i).source();
+        PADDLE_ENFORCE_EQ(name_map->count(ptr),
+                          true,
+                          phi::errors::PreconditionNotMet(
+                              "can not found input of combine op"));
+        tensor_array->emplace_back(
+            &(scope->Var(name_map->at(ptr))->Get<phi::DenseTensor>()));
+      }
+      continue;
+    }
    if (input_num > 0) {
      for (size_t i = 0; i < input_num; ++i) {
        auto ptr = (*it)->operand(i).source();
@@ -138,7 +170,10 @@ void BuildInferMetaContext(
  // int input_index = 0;
  std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;
-  for (auto& t : vec_param_list) {
+  for (size_t input_index = 0; input_index < vec_param_list.size();
+       input_index++) {
+    auto& t = vec_param_list[input_index];
    if (input_index_map.count(t)) {
      // get information from input
      ir::Value ptr = op->operand(input_index_map[t]).source();
@@ -165,8 +200,19 @@ void BuildInferMetaContext(
      } else {
        VLOG(6) << "ctx->EmplaceBackInput: " << t << "\t" << in_var_name;
        auto var = scope->Var(in_var_name);
+        if (var->IsType<phi::DenseTensor>()) {
          const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
          ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
+        } else {
+          paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
+              inputs;
+          auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
+          for (size_t i = 0; i < tensor_array.size(); ++i) {
+            inputs.emplace_back(std::move(phi::MetaTensor(*tensor_array[i])));
+          }
+          ctx->EmplaceBackInputs(std::move(inputs));
+        }
      }
    }
@@ -277,8 +323,18 @@ void BuildPhiKernelContext(
                                            in_var_name));
        auto var = scope->Var(in_var_name);
+        if (var->IsType<phi::DenseTensor>()) {
          const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
          ctx->EmplaceBackInput(tensor_in);
+        } else {
+          paddle::small_vector<const phi::TensorBase*> inputs;
+          auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
+          for (size_t i = 0; i < tensor_array.size(); ++i) {
+            inputs.emplace_back(tensor_array[i]);
+          }
+          ctx->EmplaceBackInputs(std::move(inputs));
+        }
      }
    }

--- a/test/ir/new_ir/test_standalone_new_ir.py
+++ b/test/ir/new_ir/test_standalone_new_ir.py
@@ -22,7 +22,33 @@ import paddle
 paddle.enable_static()
-class TestNewIr(unittest.TestCase):
+# class TestNewIr(unittest.TestCase):
+#     def test_with_new_ir(self):
+#         place = paddle.CPUPlace()
+#         exe = paddle.static.Executor(place)
+#         x = paddle.ones([2, 2], dtype="float32")
+#         y = paddle.ones([2, 2], dtype="float32")
+#         z = x + y
+#         out = exe.run(
+#             paddle.static.default_main_program(), {}, fetch_list=[z.name]
+#         )
+#         gold_res = np.ones([2, 2], dtype="float32") * 2
+#         self.assertEqual(
+#             np.array_equal(
+#                 np.array(
+#                     paddle.static.global_scope().find_var(z.name).get_tensor()
+#                 ),
+#                 gold_res,
+#             ),
+#             True,
+#         )
+class TestCombineOp(unittest.TestCase):
    def test_with_new_ir(self):
        place = paddle.CPUPlace()
        exe = paddle.static.Executor(place)
@@ -30,7 +56,7 @@ class TestNewIr(unittest.TestCase):
        x = paddle.ones([2, 2], dtype="float32")
        y = paddle.ones([2, 2], dtype="float32")
-        z = x + y
+        z = paddle.linalg.multi_dot([x, y])
        out = exe.run(
            paddle.static.default_main_program(), {}, fetch_list=[z.name]
        )