From 51ebcf68d53b8b13a2f0bcbdab32345829602e12 Mon Sep 17 00:00:00 2001
From: hong <43953930+phlrain@users.noreply.github.com>
Date: Thu, 27 Jul 2023 17:02:19 +0800
Subject: [PATCH] [NewIR]Fix new ir dygraph 2 static concat grad bug (#55634)

* add kernel dialect

* change DenseTensorTypeStorage to DenseTensorType

* add test case`

* add first pd_op to kernel dialect

* lower pd op to kernel dialect

* update

* update

* remove useless code

* add attrite print test

* fix bug

* update

* update

* update

* update

* polish code

* fix bug

* polish  code  and add python test

* add test

* fix test error

* relax constraint when inserting get_parameter

* add env flag

* fix bug

* dygraph2static support new ir

* fix bug

* revert test env

* change cc_test_old to cc_test

* update

* fix build_static bug

* update test

* fix type test error

* udpate cmake

* disable test in windows

* fix inference compile

* fix program translator error

* only run on cpu, not support gpu yet

* fix conflict

* polish code

* fix bug

* add feed with place op

* update

* remove useless unitest

* udpate mkldnn

* update

* update

* align mkldnn version

* new ir support builtin slice op

* fix bug

* fix phi kernel adaptor bug

* add enable static

* add enable_static

* remove useless test case

* change feed list to single variable

* update

* add feed with place and shaddow output op

* fix bug

* remove usless code

* support gpu

* fix bug

* fix bug

* remove template

* add more data type

* fix cimpile bug

* udpate

* remove useless code

* revert dygraph2st test

* remove usless code

* revert op

* fix bug

* remove instance norm

* fix concat grad bug

* revert code

---------

Co-authored-by: kangguangli <kangguangli@hotmail.com>
---
 .../ir/phi_kernel_adaptor/phi_kernel_util.h   |  1 -
 .../ir/transforms/pd_op_to_kernel_pass.cc     | 27 +++++++++++++---
 .../phi/kernels/cpu/feed_with_place_kernel.cc |  1 -
 .../paddle/jit/dy2static/partial_program.py   | 31 ++++++++++---------
 test/ir/new_ir/test_standalone_new_ir.py      |  1 -
 5 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h
index 08b9baceadf..f053b045038 100644
--- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h
+++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h
@@ -66,7 +66,6 @@ void BuildPhiContext(ir::Operation* op,
       local_scope != nullptr ? local_scope : scope;
   VLOG(6) << "BuildPhiContext in scope[" << scope << "] inner_scope["
           << inner_scope << "]";
-  // inputs include input and mutable attributes
 
   auto attr_map = op->attributes();
 
diff --git a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
index beb4635bebb..15bc6d7c3d0 100644
--- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
@@ -292,16 +292,33 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
           std::vector<ir::Type> vec_inner_types;
           auto base_types = result_type.dyn_cast<ir::VectorType>().data();
           for (size_t j = 0; j < base_types.size(); ++j) {
-            if (base_types[j].isa<dialect::DenseTensorType>()) {
+            if (base_types[j]) {
+              if (base_types[j].isa<dialect::DenseTensorType>()) {
+                auto allocated_dense_tensor_dtype =
+                    paddle::dialect::AllocatedDenseTensorType::get(
+                        ctx,
+                        phi::TransToPhiPlace(kernel_key.backend()),
+                        base_types[j].dyn_cast<dialect::DenseTensorType>());
+                vec_inner_types.push_back(allocated_dense_tensor_dtype);
+              } else {
+                PADDLE_THROW(phi::errors::Unimplemented(
+                    "only support dense tensor in vector type for now"));
+              }
+            } else {
+              // NOTE(phlrain), kernel not support a nullptr in output
+              ir::Type fp32_dtype = ir::Float32Type::get(ctx);
+              phi::DDim dims = {};
+              phi::DataLayout data_layout = phi::DataLayout::NCHW;
+              phi::LoD lod = {{}};
+              size_t offset = 0;
+              auto dense_tensor_dtype = paddle::dialect::DenseTensorType::get(
+                  ctx, fp32_dtype, dims, data_layout, lod, offset);
               auto allocated_dense_tensor_dtype =
                   paddle::dialect::AllocatedDenseTensorType::get(
                       ctx,
                       phi::TransToPhiPlace(kernel_key.backend()),
-                      base_types[j].dyn_cast<dialect::DenseTensorType>());
+                      dense_tensor_dtype);
               vec_inner_types.push_back(allocated_dense_tensor_dtype);
-            } else {
-              PADDLE_THROW(phi::errors::Unimplemented(
-                  "only support dense tensor in vector type for now"));
             }
           }
 
diff --git a/paddle/phi/kernels/cpu/feed_with_place_kernel.cc b/paddle/phi/kernels/cpu/feed_with_place_kernel.cc
index 5044bceda26..ce346472821 100644
--- a/paddle/phi/kernels/cpu/feed_with_place_kernel.cc
+++ b/paddle/phi/kernels/cpu/feed_with_place_kernel.cc
@@ -30,7 +30,6 @@ template <typename T, typename Context>
 void ShaddowOutputKernel(const Context& ctx,
                          const DenseTensor& x,
                          DenseTensor* out) {}
-
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
diff --git a/python/paddle/jit/dy2static/partial_program.py b/python/paddle/jit/dy2static/partial_program.py
index 9032c9c23ae..7d45d390f3e 100644
--- a/python/paddle/jit/dy2static/partial_program.py
+++ b/python/paddle/jit/dy2static/partial_program.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from copy import deepcopy
 
 import numpy as np
@@ -821,26 +822,28 @@ class PartialProgramLayer:
                 "mem_opt_skip_vars": forward_mem_opt_skip_vars,
                 "for_partial_block": True,
             }
-            _apply_pass(
-                forward_program,
-                empty_startup_program,
-                "buffer_shared_inplace_pass",
-                attrs,
-                attr_types,
-            )
+            if not os.getenv("FLAGS_enable_new_ir_in_executor"):
+                _apply_pass(
+                    forward_program,
+                    empty_startup_program,
+                    "buffer_shared_inplace_pass",
+                    attrs,
+                    attr_types,
+                )
         if backward_program:
             attrs = {
                 "use_cuda": use_cuda,
                 "mem_opt_skip_vars": backward_mem_opt_skip_vars,
                 "for_partial_block": True,
             }
-            _apply_pass(
-                backward_program,
-                empty_startup_program,
-                "buffer_shared_inplace_pass",
-                attrs,
-                attr_types,
-            )
+            if not os.getenv("FLAGS_enable_new_ir_in_executor"):
+                _apply_pass(
+                    backward_program,
+                    empty_startup_program,
+                    "buffer_shared_inplace_pass",
+                    attrs,
+                    attr_types,
+                )
 
     @LazyInitialized
     def _inout_var_names(self):
diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py
index 0b8191d8bab..1adb8788be7 100644
--- a/test/ir/new_ir/test_standalone_new_ir.py
+++ b/test/ir/new_ir/test_standalone_new_ir.py
@@ -162,7 +162,6 @@ class TestAddGradOp(unittest.TestCase):
 class TestNewIrDygraph(unittest.TestCase):
     def test_with_new_ir(self):
         paddle.disable_static()
-        # paddle.device.set_device("cpu")
 
         @paddle.jit.to_static
         def func(x, y):
-- 
GitLab