From e417828436a192c07629dc1c7f65b6a454e68a45 Mon Sep 17 00:00:00 2001
From: JYChen <zoooo0820@qq.com>
Date: Fri, 21 Apr 2023 21:28:17 +0800
Subject: [PATCH] Cherry pick fix set value cpu (#53127)

* fix the set_value error in cpu

* add a unitest for set_value OP

* fix platform::is_gpu_place

* add todo note for set_value

* fix test
---
 .../phi/kernels/impl/set_value_kernel_impl.h  | 39 ++++++++++++++-----
 .../tests/unittests/test_set_value_op.py      | 39 +++++++++++++++++++
 2 files changed, 69 insertions(+), 9 deletions(-)
diff --git a/paddle/phi/kernels/impl/set_value_kernel_impl.h b/paddle/phi/kernels/impl/set_value_kernel_impl.h
index d92fcfd32a7..9956260a798 100644
--- a/paddle/phi/kernels/impl/set_value_kernel_impl.h
+++ b/paddle/phi/kernels/impl/set_value_kernel_impl.h
@@ -136,7 +136,6 @@ void SetValueImpl(const Context& dev_ctx,
       Empty<T>(dev_ctx, IntArray{slice_dims.Get(), slice_dims.size()});
   DenseTensor pad_tensor =
       Empty<T>(dev_ctx, IntArray{in_dims.Get(), in_dims.size()});
-
   auto pad_e = EigenTensor<T, RANK>::From(pad_tensor, in_dims);
   auto out_e = EigenTensor<T, RANK>::From(*out);
   auto slice_e = EigenTensor<T, RANK>::From(slice_tensor, slice_dims);
@@ -185,16 +184,38 @@ void SetValueImpl(const Context& dev_ctx,
   // is [3], which is right.
 
   slice_tensor.Resize(slice_dims_for_assign);
+
   CheckIsDimsMatch(slice_dims_for_assign, value.dims());
-  // ElementwiseComputeEx can do broadcasting
-  funcs::ElementwiseCompute<funcs::SubtractFunctor<T>, T>(
-      dev_ctx,
-      slice_tensor,
-      value,
-      -1,
-      funcs::SubtractFunctor<T>(),
-      &slice_tensor);
 
+  bool is_gpu_place = dev_ctx.GetPlace().GetType() == phi::AllocationType::GPU;
+  if (is_gpu_place || slice_tensor.dims().size() >= value.dims().size()) {
+    // [Why here we confirm running device]
+    //    ElementwiseComputeEx can do broadcasting in two cases:
+    //    1. The place is GPU.
+    //    2. The place is CPU, and the 'x' does not need broadcast.
+    //    Please see the note in
+    //    paddle/fluid/operators/elementwise/elementwise_op_function.h
+    // So, here we choose different logic depending on the device to avoid
+    // numerical problems, temporarily.
+    //
+    // TODO(zoooo0820): Reimplement logic of set_value to avoid using
+    // elementwise-sub.
+    funcs::ElementwiseCompute<funcs::SubtractFunctor<T>, T>(
+        dev_ctx,
+        slice_tensor,
+        value,
+        -1,
+        funcs::SubtractFunctor<T>(),
+        &slice_tensor);
+  } else {
+    funcs::ElementwiseCompute<funcs::InverseSubtractFunctor<T>, T>(
+        dev_ctx,
+        slice_tensor,
+        value,
+        -1,
+        funcs::InverseSubtractFunctor<T>(),
+        &slice_tensor);
+  }
   slice_tensor.Resize(slice_dims);
 
   // - Step 2.2 Pad slice tensor with 0
diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py
index ab8985ca199..9c5a71df018 100644
--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -982,6 +982,45 @@ class TestSetValueValueShape5(TestSetValueApi):
         self.data[:, 0] = self.value
 
 
+# This is to test case which dims of indexed Tensor is
+# less than value Tensor on CPU / GPU.
+class TestSetValueValueShape6(TestSetValueApi):
+    def set_value(self):
+        self.value = np.ones((1, 4)) * 5
+
+    def set_shape(self):
+        self.shape = [4, 4]
+
+    def _call_setitem(self, x):
+        x[:, 0] = self.value  # x is Paddle.Tensor
+
+    def _get_answer(self):
+        self.data[:, 0] = self.value
+
+    def test_api(self):
+        places = ['cpu']
+        if paddle.is_compiled_with_cuda():
+            places.append('gpu')
+        for place in places:
+            paddle.set_device(place)
+
+            static_out = self._run_static()
+            dynamic_out = self._run_dynamic()
+            self._get_answer()
+
+            error_msg = (
+                "\nIn {} mode: \nExpected res = \n{}, \n\nbut received : \n{}"
+            )
+            self.assertTrue(
+                (self.data == static_out).all(),
+                msg=error_msg.format("static", self.data, static_out),
+            )
+            self.assertTrue(
+                (self.data == dynamic_out).all(),
+                msg=error_msg.format("dynamic", self.data, dynamic_out),
+            )
+
+
 # 4. Test error
 class TestError(TestSetValueBase):
     def _value_type_error(self):
-- 
GitLab