[NewIR]New ir support print op (#55648)

* new ir support print op * fix gpu bug * fix bug * update * remove layout to string * remove usless header * polish code * fix bug * posolis code

[NewIR]New ir support print op (#55648)
* new ir support print op * fix gpu bug * fix bug * update * remove layout to string * remove usless header * polish code * fix bug * posolis code
75c29ac1 · hong · GitHub · 33e50b27 · 75c29ac1 · 75c29ac1
12 changed file
--- a/paddle/fluid/ir/dialect/pd_op.yaml
+++ b/paddle/fluid/ir/dialect/pd_op.yaml
@@ -139,7 +139,7 @@
  - {typename: 'bool', name: print_tensor_shape, default_value: 'true'}
  - {typename: 'bool', name: print_tensor_layout, default_value: 'true'}
  - {typename: 'bool', name: print_tensor_lod, default_value: 'true'}
-  - {typename: 'str', name: print_phase, default_value: 'BOTH'}
+  - {typename: 'str', name: print_phase, default_value: '"BOTH"'}
  - {typename: 'bool', name: is_forward, default_value: 'true'}
  outputs:
    - typename: Tensor
@@ -147,6 +147,17 @@
      optional: false
      no_need_buffer: false
      data_transform: {}
+  infer_meta:
+    func: UnchangedInferMeta
+    param: [in]
+  kernel:
+    func: [print_kernel]
+    param: [in, first_n, message, summarize, print_tensor_name, print_tensor_type, print_tensor_shape, print_tensor_layout, print_tensor_lod, print_phase, is_forward ]
+    backend: null
+    layout: null
+    data_type: null
+    dispatch: {print: null}
+    force_backend: null
  no_need_buffer: null
  data_transform: null
  inplace: null

--- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
@@ -270,10 +270,8 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,

    auto kernel_key =
        GetKernelKey(*it, place, map_value_pair, std::move(op_info_parser));
-    VLOG(6) << "kernel type " << kernel_key;

-    // only for single output
-    // need update new kernel key layout and data tyep
+    VLOG(6) << "kernel type " << kernel_key;

    std::vector<ir::Type> op_output_types;
    if ((*it)->num_results() > 0) {

--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -162,7 +162,6 @@ if(WITH_XPU)
  set(COMMON_OP_DEPS ${COMMON_OP_DEPS} xpulib)
 endif()
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} layer)
-set(COMMON_OP_DEPS ${COMMON_OP_DEPS} tensor_formatter)
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} op_version_registry)


@@ -176,7 +175,6 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} op_version_registry)
 set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COMMON_OP_DEPS})
 set(GLOB_OPERATOR_DEPS ${OPERATOR_DEPS} CACHE INTERNAL "Global Op dependencies")

-cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS})
 if (WITH_PYTHON)
  cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind)
 endif()

--- a/paddle/fluid/operators/assert_op.cc
+++ b/paddle/fluid/operators/assert_op.cc
@@ -14,7 +14,7 @@

 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/controlflow/while_op_helper.h"
-#include "paddle/fluid/operators/tensor_formatter.h"
+#include "paddle/phi/kernels/funcs/tensor_formatter.h"

 namespace phi {
 class DenseTensor;
@@ -70,7 +70,7 @@ class AssertOp : public framework::OperatorBase {
      return;
    }

-    TensorFormatter formatter;
+    funcs::TensorFormatter formatter;
    formatter.SetSummarize(Attr<int64_t>(kSummarize));

    const std::vector<std::string> &x_names = Inputs(kData);

--- a/paddle/fluid/operators/print_op.cc
+++ b/paddle/fluid/operators/print_op.cc
@@ -14,7 +14,7 @@

 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/operators/tensor_formatter.h"
+#include "paddle/phi/kernels/funcs/tensor_formatter.h"

 namespace phi {
 class DenseTensor;
@@ -87,7 +87,7 @@ class PrintOp : public framework::OperatorBase {
    int first_n = Attr<int>("first_n");
    if (first_n > 0 && ++times_ > first_n) return;

-    TensorFormatter formatter;
+    funcs::TensorFormatter formatter;
    const std::string &name =
        Attr<bool>("print_tensor_name") ? printed_var_name : "";
    formatter.SetPrintTensorType(Attr<bool>("print_tensor_type"));

--- a/paddle/phi/kernels/cpu/feed_with_place_kernel.cc
+++ b/paddle/phi/kernels/cpu/feed_with_place_kernel.cc
@@ -18,6 +18,8 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/feed_with_place_impl.h"

+#include "paddle/phi/kernels/funcs/tensor_formatter.h"
+
 namespace phi {

 template <typename T, typename Context>
@@ -50,5 +52,19 @@ PD_REGISTER_KERNEL(shadow_feed,
                   phi::complex64,
                   phi::complex128) {}

+PD_REGISTER_KERNEL(print_kernel,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::PrintKernel,
+                   bool,
+                   float,
+                   int32_t,
+                   int64_t,
+                   double,
+                   phi::float16,
+                   phi::bfloat16,
+                   phi::complex64,
+                   phi::complex128) {}
+
 PD_REGISTER_KERNEL(
    shadow_output, CPU, ALL_LAYOUT, phi::ShadowOutputKernel, float) {}
--- a/paddle/phi/kernels/feed_with_place_kernel.h
+++ b/paddle/phi/kernels/feed_with_place_kernel.h
@@ -35,4 +35,19 @@ void ShadowFeedKernel(const Context& ctx,
                      const DenseTensor& x,
                      DenseTensor* out);

+template <typename T, typename Context>
+void PrintKernel(const Context& ctx,
+                 const DenseTensor& x,
+                 int first_n,
+                 const std::string& message,
+                 int summarize,
+                 bool print_tensor_name,
+                 bool print_tensor_type,
+                 bool print_tensor_shape,
+                 bool print_tensor_layout,
+                 bool print_tensor_lod,
+                 const std::string& print_phase,
+                 bool is_forward,
+                 DenseTensor* out);
+
 }  // namespace phi
--- a/paddle/fluid/operators/tensor_formatter.cc
+++ b/paddle/fluid/operators/tensor_formatter.cc
@@ -12,14 +12,16 @@
   See the License for the specific language governing permissions and
   limitations under the License. */

-#include "paddle/fluid/operators/tensor_formatter.h"
+#include "paddle/phi/kernels/funcs/tensor_formatter.h"

 #include <string>

-#include "paddle/fluid/framework/convert_utils.h"
+#include "paddle/phi/backends/context_pool.h"
+#include "paddle/phi/common/place.h"
+#include "paddle/phi/core/tensor_utils.h"

 namespace paddle {
-namespace operators {
+namespace funcs {

 void TensorFormatter::SetPrintTensorType(bool print_tensor_type) {
  print_tensor_type_ = print_tensor_type;
@@ -63,7 +65,7 @@ std::string TensorFormatter::Format(const phi::DenseTensor& print_tensor,

  if (print_tensor_lod_) {
    log_stream << "  - lod: {";
-    const framework::LoD& lod = print_tensor.lod();
+    const phi::LoD& lod = print_tensor.lod();
    for (auto level : lod) {
      log_stream << "{";
      bool is_first = true;
@@ -87,29 +89,26 @@ std::string TensorFormatter::Format(const phi::DenseTensor& print_tensor,
  }

  if (print_tensor_layout_) {
-    log_stream << "  - layout: "
-               << phi::DataLayoutToString(print_tensor.layout()) << std::endl;
+    log_stream << "  - layout: " << print_tensor.layout() << std::endl;
  }

-  std::type_index dtype = framework::ToTypeIndex(
-      framework::TransToProtoVarType(print_tensor.dtype()));
+  auto dtype = print_tensor.dtype();
  if (print_tensor_type_) {
-    log_stream << "  - dtype: " << platform::demangle(dtype.name())
-               << std::endl;
+    log_stream << "  - dtype: " << dtype << std::endl;
  }

-  if (framework::IsType<const float>(dtype)) {
+  if (dtype == phi::DataType::FLOAT32) {
    FormatData<float>(print_tensor, log_stream);
-  } else if (framework::IsType<const double>(dtype)) {
+  } else if (dtype == phi::DataType::FLOAT64) {
    FormatData<double>(print_tensor, log_stream);
-  } else if (framework::IsType<const int>(dtype)) {
+  } else if (dtype == phi::DataType::INT32) {
    FormatData<int>(print_tensor, log_stream);
-  } else if (framework::IsType<const int64_t>(dtype)) {
+  } else if (dtype == phi::DataType::INT64) {
    FormatData<int64_t>(print_tensor, log_stream);
-  } else if (framework::IsType<const bool>(dtype)) {
+  } else if (dtype == phi::DataType::BOOL) {
    FormatData<bool>(print_tensor, log_stream);
  } else {
-    log_stream << "  - data: unprintable type: " << dtype.name() << std::endl;
+    log_stream << "  - data: unprintable type: " << dtype << std::endl;
  }
  return log_stream.str();
 }
@@ -122,11 +121,15 @@ void TensorFormatter::FormatData(const phi::DenseTensor& print_tensor,
                           : std::min(summarize_, print_tensor.numel());
  const T* data = nullptr;
  phi::DenseTensor cpu_tensor;
-  if (paddle::platform::is_cpu_place(print_tensor.place())) {
+  if (print_tensor.place().GetType() == phi::AllocationType::CPU) {
    data = print_tensor.data<T>();
  } else {
-    platform::CPUPlace cpu_place;
-    paddle::framework::TensorCopy(print_tensor, cpu_place, &cpu_tensor);
+    phi::CPUPlace cpu_place;
+
+    phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
+    auto dev_ctx = pool.Get(print_tensor.place());
+
+    phi::Copy(*dev_ctx, print_tensor, cpu_place, true, &cpu_tensor);
    data = cpu_tensor.data<T>();
  }

@@ -151,5 +154,5 @@ template void TensorFormatter::FormatData<int>(
 template void TensorFormatter::FormatData<int64_t>(
    const phi::DenseTensor& print_tensor, std::stringstream& log_stream);

-}  // namespace operators
+}  // namespace funcs
 }  // namespace paddle
--- a/paddle/fluid/operators/tensor_formatter.h
+++ b/paddle/fluid/operators/tensor_formatter.h
@@ -15,15 +15,14 @@
 #pragma once
 #include <string>

-#include "paddle/fluid/framework/data_layout.h"
-#include "paddle/fluid/framework/var_type.h"
+#include "paddle/phi/core/dense_tensor.h"

 namespace phi {
 class DenseTensor;
 }  // namespace phi

 namespace paddle {
-namespace operators {
+namespace funcs {

 class TensorFormatter {
 public:
@@ -55,5 +54,5 @@ class TensorFormatter {
  bool print_tensor_layout_ = true;
 };

-}  // namespace operators
+}  // namespace funcs
 }  // namespace paddle
--- a/paddle/phi/kernels/gpu/feed_with_place_kernel.cu
+++ b/paddle/phi/kernels/gpu/feed_with_place_kernel.cu
@@ -31,3 +31,17 @@ PD_REGISTER_KERNEL(shadow_feed,
                   phi::bfloat16,
                   phi::complex64,
                   phi::complex128) {}
+
+PD_REGISTER_KERNEL(print_kernel,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::PrintKernel,
+                   bool,
+                   float,
+                   int32_t,
+                   int64_t,
+                   double,
+                   phi::float16,
+                   phi::bfloat16,
+                   phi::complex64,
+                   phi::complex128) {}
--- a/paddle/phi/kernels/impl/feed_with_place_impl.h
+++ b/paddle/phi/kernels/impl/feed_with_place_impl.h
@@ -16,9 +16,13 @@

 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/tensor_utils.h"
+#include "paddle/phi/kernels/funcs/tensor_formatter.h"

 namespace phi {

+const char kForward[] = "FORWARD";
+const char kBackward[] = "BACKWARD";
+
 template <typename T, typename Context>
 void ShadowFeedKernel(const Context& ctx,
                      const DenseTensor& x,
@@ -32,4 +36,40 @@ void ShadowFeedKernel(const Context& ctx,
  }
 }

+template <typename T, typename Context>
+void PrintKernel(const Context& ctx,
+                 const DenseTensor& x,
+                 int first_n,
+                 const std::string& message,
+                 int summarize,
+                 bool print_tensor_name,
+                 bool print_tensor_type,
+                 bool print_tensor_shape,
+                 bool print_tensor_layout,
+                 bool print_tensor_lod,
+                 const std::string& print_phase,
+                 bool is_forward,
+                 DenseTensor* out) {
+  phi::Copy<Context>(ctx, x, ctx.GetPlace(), true, out);
+  out->set_lod(x.lod());
+
+  if ((is_forward && print_phase == kBackward) ||
+      (!is_forward && print_phase == kForward)) {
+    return;
+  }
+
+  // TODO(phlrain): support first_n using a input tensor
+  // if (first_n > 0 && ++times_ > first_n) return;
+
+  // TODO(phlrain): support printed_var_name
+  paddle::funcs::TensorFormatter formatter;
+  const std::string& name = print_tensor_name ? "var" : "";
+  formatter.SetPrintTensorType(print_tensor_type);
+  formatter.SetPrintTensorShape(print_tensor_shape);
+  formatter.SetPrintTensorLod(print_tensor_lod);
+  formatter.SetPrintTensorLayout(print_tensor_layout);
+  formatter.SetSummarize(summarize);
+  formatter.Print(x, name, message);
+}
+
 }  // namespace phi
--- a/test/ir/new_ir/test_standalone_new_ir.py
+++ b/test/ir/new_ir/test_standalone_new_ir.py
@@ -243,6 +243,33 @@ class TestSplitOp(unittest.TestCase):
            np.testing.assert_array_equal(out[0], np_a[0:2])


+class TestNewIrPrint(unittest.TestCase):
+    def test_with_new_ir(self):
+        paddle.enable_static()
+        place = (
+            paddle.CUDAPlace(0)
+            if paddle.is_compiled_with_cuda()
+            else paddle.CPUPlace()
+        )
+        exe = paddle.static.Executor(place)
+
+        main_program = paddle.static.Program()
+        new_scope = paddle.static.Scope()
+        with paddle.static.scope_guard(new_scope):
+            with paddle.static.program_guard(main_program):
+                x = paddle.ones([2, 2], dtype="float32")
+                y = paddle.ones([2, 2], dtype="float32")
+
+                z = x + y
+                z = paddle.static.Print(z)
+
+            out = exe.run(main_program, {}, fetch_list=[z.name])
+
+        gold_res = np.ones([2, 2], dtype="float32") * 2
+
+        np.testing.assert_array_equal(out[0], gold_res)
+
+
 class TestJitSaveOp(unittest.TestCase):
    def test_with_new_ir(self):
        paddle.disable_static()