[CustomOP unittest] Add customOP multiple inplace unittest (#51758)

5b6d2f85 · HongyuJia · GitHub · 89ff0d59 · 5b6d2f85 · 5b6d2f85
4 changed file
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -576,7 +576,7 @@ class PADDLE_API OpMetaInfo {

  // format: {"<input_name1>:<output_name1>",
  // "<input_name2>:<output_name2>",...}
-  OpMetaInfo& Inplace(
+  OpMetaInfo& SetInplaceMap(
      std::unordered_map<std::string, std::string>&& inplace_map);

  // format: PD_KERNEL(...)
@@ -635,7 +635,7 @@ class PADDLE_API OpMetaInfoBuilder {
  OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs);
  OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs);
  OpMetaInfoBuilder& Attrs(std::vector<std::string>&& attrs);
-  OpMetaInfoBuilder& Inplace(
+  OpMetaInfoBuilder& SetInplaceMap(
      std::unordered_map<std::string, std::string>&& inplace_map);
  OpMetaInfoBuilder& SetKernelFn(KernelFunc func);
  OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func);

--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -211,7 +211,7 @@ OpMetaInfo& OpMetaInfo::Attrs(std::vector<std::string>&& attrs) {
  attrs_ = std::forward<std::vector<std::string>>(attrs);
  return *this;
 }
-OpMetaInfo& OpMetaInfo::Inplace(
+OpMetaInfo& OpMetaInfo::SetInplaceMap(
    std::unordered_map<std::string, std::string>&& inplace_map) {
  inplace_map_ =
      std::forward<std::unordered_map<std::string, std::string>>(inplace_map);
@@ -297,9 +297,9 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Attrs(std::vector<std::string>&& attrs) {
  return *this;
 }

-OpMetaInfoBuilder& OpMetaInfoBuilder::Inplace(
+OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap(
    std::unordered_map<std::string, std::string>&& inplace_map) {
-  info_ptr_->Inplace(
+  info_ptr_->SetInplaceMap(
      std::forward<std::unordered_map<std::string, std::string>>(inplace_map));
  return *this;
 }

--- a/python/paddle/fluid/tests/custom_op/custom_inplace.cc
+++ b/python/paddle/fluid/tests/custom_op/custom_inplace.cc
@@ -75,7 +75,7 @@ std::vector<paddle::Tensor> AddBackward(const paddle::Tensor& x,
                                        const paddle::Tensor& y,
                                        paddle::Tensor& out_grad) {  // NOLINT
  PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor.");
-  PD_CHECK(y.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor.");
+  PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor.");

  paddle::Tensor y_grad = paddle::empty(x.shape(), x.dtype(), x.place());

@@ -91,7 +91,7 @@ std::vector<paddle::Tensor> AddBackward(const paddle::Tensor& x,
 PD_BUILD_OP(custom_add)
    .Inputs({"X", "Y"})
    .Outputs({"Out"})
-    .Inplace({{"X", "Out"}})
+    .SetInplaceMap({{"X", "Out"}})
    .SetKernelFn(PD_KERNEL(AddForward))
    .SetInferShapeFn(PD_INFER_SHAPE(AddInferShape))
    .SetInferDtypeFn(PD_INFER_DTYPE(AddInferDtype));
@@ -99,9 +99,87 @@ PD_BUILD_OP(custom_add)
 PD_BUILD_GRAD_OP(custom_add)
    .Inputs({"X", "Y", paddle::Grad("Out")})
    .Outputs({paddle::Grad("X"), paddle::Grad("Y")})
-    .Inplace({{paddle::Grad("Out"), paddle::Grad("X")}})
+    .SetInplaceMap({{paddle::Grad("Out"), paddle::Grad("X")}})
    .SetKernelFn(PD_KERNEL(AddBackward));

+void MultiInplaceForward(paddle::Tensor& x,  // NOLINT
+                         const paddle::Tensor& y,
+                         paddle::Tensor& a,  // NOLINT
+                         const paddle::Tensor& b) {
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor.");
+  PD_CHECK(a.place() == paddle::PlaceType::kCPU, "a must be a CPU Tensor.");
+
+  PD_DISPATCH_FLOATING_TYPES(
+      x.type(), "MultiInplaceForward", ([&] {
+        add_forward_kernel<data_t>(
+            x.data<data_t>(), y.data<data_t>(), x.size());
+        add_forward_kernel<data_t>(
+            a.data<data_t>(), b.data<data_t>(), a.size());
+      }));
+}
+
+std::vector<paddle::DataType> MultiInplaceInferDtype(
+    const paddle::DataType& x_dtype,
+    const paddle::DataType& y_dtype,
+    const paddle::DataType& a_dtype,
+    const paddle::DataType& b_dtype) {
+  return {x_dtype, a_dtype};
+}
+
+std::vector<std::vector<int64_t>> MultiInplaceInferShape(
+    const std::vector<int64_t>& x_shape,
+    const std::vector<int64_t>& y_shape,
+    const std::vector<int64_t>& a_shape,
+    const std::vector<int64_t>& b_shape) {
+  return {x_shape, a_shape};
+}
+
+std::vector<paddle::Tensor> MultiInplaceBackward(
+    const paddle::Tensor& x,
+    const paddle::Tensor& y,
+    paddle::Tensor& outxy_grad,  // NOLINT
+    const paddle::Tensor& a,
+    const paddle::Tensor& b,
+    paddle::Tensor& outab_grad) {  // NOLINT
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor.");
+  PD_CHECK(y.place() == paddle::PlaceType::kCPU, "y must be a CPU Tensor.");
+  PD_CHECK(a.place() == paddle::PlaceType::kCPU, "a must be a CPU Tensor.");
+  PD_CHECK(b.place() == paddle::PlaceType::kCPU, "b must be a CPU Tensor.");
+
+  paddle::Tensor y_grad = paddle::empty(x.shape(), x.dtype(), x.place());
+  paddle::Tensor b_grad = paddle::empty(a.shape(), a.dtype(), a.place());
+
+  PD_DISPATCH_FLOATING_TYPES(
+      outxy_grad.type(), "MultiInplaceBackward", ([&] {
+        add_backward_kernel<data_t>(y_grad.data<data_t>(),
+                                    outxy_grad.data<data_t>(),
+                                    outxy_grad.size());
+        add_backward_kernel<data_t>(b_grad.data<data_t>(),
+                                    outab_grad.data<data_t>(),
+                                    outab_grad.size());
+      }));
+
+  return {y_grad, b_grad};
+}
+
+PD_BUILD_OP(custom_multi_inplace)
+    .Inputs({"X", "Y", "A", "B"})
+    .Outputs({"OutXY", "OutAB"})
+    .SetInplaceMap({{"X", "OutXY"}, {"A", "OutAB"}})
+    .SetKernelFn(PD_KERNEL(MultiInplaceForward))
+    .SetInferShapeFn(PD_INFER_SHAPE(MultiInplaceInferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(MultiInplaceInferDtype));
+
+PD_BUILD_GRAD_OP(custom_multi_inplace)
+    .Inputs({"X", "Y", paddle::Grad("OutXY"), "A", "B", paddle::Grad("OutAB")})
+    .Outputs({paddle::Grad("X"),
+              paddle::Grad("Y"),
+              paddle::Grad("A"),
+              paddle::Grad("B")})
+    .SetInplaceMap({{paddle::Grad("OutXY"), paddle::Grad("X")},
+                    {paddle::Grad("OutAB"), paddle::Grad("A")}})
+    .SetKernelFn(PD_KERNEL(MultiInplaceBackward));
+
 void ReluForwardInplace(paddle::Tensor& x) {  // NOLINT
  PD_CHECK(x.place() == paddle::PlaceType::kCPU, "x must be a CPU Tensor.");

@@ -126,11 +204,11 @@ void ReluBackwardInplace(const paddle::Tensor& x,
 PD_BUILD_OP(custom_relu_inplace)
    .Inputs({"X"})
    .Outputs({"Out"})
-    .Inplace({{"X", "Out"}})
+    .SetInplaceMap({{"X", "Out"}})
    .SetKernelFn(PD_KERNEL(ReluForwardInplace));

 PD_BUILD_GRAD_OP(custom_relu_inplace)
    .Inputs({"X", "Out", paddle::Grad("Out")})
    .Outputs({paddle::Grad("X")})
-    .Inplace({{paddle::Grad("Out"), paddle::Grad("X")}})
+    .SetInplaceMap({{paddle::Grad("Out"), paddle::Grad("X")}})
    .SetKernelFn(PD_KERNEL(ReluBackwardInplace));
--- a/python/paddle/fluid/tests/custom_op/test_custom_inplace.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_inplace.py
@@ -147,6 +147,105 @@ def inplace_static_relu(func, device, dtype, np_x, np_y, np_z):
    return x_v, y_v, out_v, x_grad_v, y_grad_v


+def dynamic_multi_inplace(phi_func, device, dtype, np_x, np_y, np_a, np_b):
+    paddle.set_device(device)
+    x = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=True)
+    y = paddle.to_tensor(np_y, dtype=dtype, stop_gradient=False)
+    a = paddle.to_tensor(np_a, dtype=dtype, stop_gradient=True)
+    b = paddle.to_tensor(np_b, dtype=dtype, stop_gradient=False)
+    if phi_func:
+        out_xy, out_ab = custom_inplace.custom_multi_inplace(x, y, a, b)
+    else:
+        out_xy = x.add_(y)
+        out_ab = a.add_(b)
+    out = out_xy + out_ab
+
+    out.backward()
+    return (
+        x.numpy(),
+        y.numpy(),
+        out_xy.numpy(),
+        x.grad.numpy(),
+        y.grad.numpy(),
+        a.numpy(),
+        b.numpy(),
+        out_ab.numpy(),
+        a.grad.numpy(),
+        b.grad.numpy(),
+    )
+
+
+def static_multi_inplace(phi_func, device, dtype, np_x, np_y, np_a, np_b):
+    paddle.enable_static()
+    paddle.set_device(device)
+    with static.scope_guard(static.Scope()):
+        with static.program_guard(static.Program()):
+            x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
+            y = static.data(name="y", shape=[None, np_y.shape[1]], dtype=dtype)
+            a = static.data(name="a", shape=[None, np_x.shape[1]], dtype=dtype)
+            b = static.data(name="b", shape=[None, np_y.shape[1]], dtype=dtype)
+            x.stop_gradient = False
+            y.stop_gradient = False
+            a.stop_gradient = False
+            b.stop_gradient = False
+            if phi_func:
+                out_xy, out_ab = custom_inplace.custom_multi_inplace(x, y, a, b)
+            else:
+                out_xy = paddle.add(x, y)
+                out_ab = paddle.add(a, b)
+            mean_out = paddle.mean(paddle.add(out_xy, out_ab))
+            static.append_backward(mean_out)
+
+            exe = static.Executor()
+            exe.run(static.default_startup_program())
+
+            (
+                x_v,
+                out_xy_v,
+                x_grad_v,
+                y_grad_v,
+                out_xy_grad_v,
+                a_v,
+                out_ab_v,
+                a_grad_v,
+                b_grad_v,
+                out_ab_grad_v,
+            ) = exe.run(
+                static.default_main_program(),
+                feed={
+                    "x": np_x.astype(dtype),
+                    "y": np_y.astype(dtype),
+                    "a": np_a.astype(dtype),
+                    "b": np_b.astype(dtype),
+                },
+                fetch_list=[
+                    x.name,
+                    out_xy.name,
+                    x.name + "@GRAD",
+                    y.name + "@GRAD",
+                    out_xy.name + "@GRAD",
+                    a.name,
+                    out_ab.name,
+                    a.name + "@GRAD",
+                    b.name + "@GRAD",
+                    out_ab.name + "@GRAD",
+                ],
+            )
+    paddle.disable_static()
+    return (
+        x_v,
+        out_xy_v,
+        x_grad_v,
+        y_grad_v,
+        out_xy_grad_v,
+        a_v,
+        out_ab_v,
+        a_grad_v,
+        b_grad_v,
+        out_ab_grad_v,
+    )
+
+
 class TestCustomInplaceJit(unittest.TestCase):
    def setUp(self):
        self.dtypes = ['float32', 'float64']
@@ -154,6 +253,8 @@ class TestCustomInplaceJit(unittest.TestCase):
        self.np_x = np.random.random((3, 2)).astype("float32")
        self.np_y = np.random.random((3, 2)).astype("float32")
        self.np_z = np.random.random((3, 2)).astype("float32")
+        self.np_a = np.random.random((3, 2)).astype("float32")
+        self.np_b = np.random.random((3, 2)).astype("float32")

    def check_output(self, out, pd_out, name):
        np.testing.assert_array_equal(
@@ -328,6 +429,127 @@ class TestCustomInplaceJit(unittest.TestCase):
                self.check_output(phi_x_grad, pd_x_grad, "x_grad")
                self.check_output(phi_y_grad, pd_y_grad, "y_grad")

+    def test_static_multi_inplace(self):
+        for device in self.devices:
+            for dtype in self.dtypes:
+                (
+                    pd_x,
+                    pd_out_xy,
+                    pd_x_grad,
+                    pd_y_grad,
+                    pd_out_xy_grad,
+                    pd_a,
+                    pd_out_ab,
+                    pd_a_grad,
+                    pd_b_grad,
+                    pd_out_ab_grad,
+                ) = static_multi_inplace(
+                    False,
+                    device,
+                    dtype,
+                    self.np_x,
+                    self.np_y,
+                    self.np_a,
+                    self.np_b,
+                )
+                (
+                    phi_x,
+                    phi_out_xy,
+                    phi_x_grad,
+                    phi_y_grad,
+                    phi_out_xy_grad,
+                    phi_a,
+                    phi_out_ab,
+                    phi_a_grad,
+                    phi_b_grad,
+                    phi_out_ab_grad,
+                ) = static_multi_inplace(
+                    True,
+                    device,
+                    dtype,
+                    self.np_x,
+                    self.np_y,
+                    self.np_a,
+                    self.np_b,
+                )
+                self.check_output(phi_x, pd_out_xy, "inplace_phi_x")
+                self.check_output(
+                    phi_x_grad, phi_out_xy_grad, "inplace_phi_x_grad"
+                )
+                self.check_output(phi_a, pd_out_ab, "inplace_phi_a")
+                self.check_output(
+                    phi_a_grad, phi_out_ab_grad, "inplace_phi_a_grad"
+                )
+
+                self.check_output(phi_out_xy, pd_out_xy, "outxy")
+                self.check_output(phi_x_grad, pd_x_grad, "x_grad")
+                self.check_output(phi_y_grad, pd_y_grad, "y_grad")
+                self.check_output(phi_out_xy_grad, pd_out_xy_grad, "outxy_grad")
+                self.check_output(phi_out_ab, pd_out_ab, "outab")
+                self.check_output(phi_a_grad, pd_a_grad, "a_grad")
+                self.check_output(phi_b_grad, pd_b_grad, "b_grad")
+                self.check_output(phi_out_ab_grad, pd_out_ab_grad, "outab_grad")
+
+    def test_dynamic_multi_inplace(self):
+        for device in self.devices:
+            for dtype in self.dtypes:
+                (
+                    pd_x,
+                    pd_y,
+                    pd_out_xy,
+                    pd_x_grad,
+                    pd_y_grad,
+                    pd_a,
+                    pd_b,
+                    pd_out_ab,
+                    pd_a_grad,
+                    pd_b_grad,
+                ) = dynamic_multi_inplace(
+                    False,
+                    device,
+                    dtype,
+                    self.np_x,
+                    self.np_y,
+                    self.np_a,
+                    self.np_b,
+                )
+                (
+                    phi_x,
+                    phi_y,
+                    phi_out_xy,
+                    phi_x_grad,
+                    phi_y_grad,
+                    phi_a,
+                    phi_b,
+                    phi_out_ab,
+                    phi_a_grad,
+                    phi_b_grad,
+                ) = dynamic_multi_inplace(
+                    True,
+                    device,
+                    dtype,
+                    self.np_x,
+                    self.np_y,
+                    self.np_a,
+                    self.np_b,
+                )
+
+                self.check_output(phi_x, phi_out_xy, "inplace_phi_x")
+                self.check_output(pd_x, pd_out_xy, "inplace_pd_x")
+                self.check_output(phi_a, phi_out_ab, "inplace_phi_a")
+                self.check_output(pd_a, pd_out_ab, "inplace_pd_a")
+
+                self.check_output(phi_x, pd_x, "x")
+                self.check_output(phi_y, pd_y, "y")
+                self.check_output(phi_out_xy, pd_out_xy, "outxy")
+                self.check_output(phi_x_grad, pd_x_grad, "x_grad")
+                self.check_output(phi_y_grad, pd_y_grad, "y_grad")
+                self.check_output(phi_a, pd_a, "a")
+                self.check_output(phi_b, pd_b, "b")
+                self.check_output(phi_out_ab, pd_out_ab, "outab")
+                self.check_output(phi_a_grad, pd_a_grad, "a_grad")
+                self.check_output(phi_b_grad, pd_b_grad, "b_grad")
+

 if __name__ == "__main__":
    unittest.main()