From e417828436a192c07629dc1c7f65b6a454e68a45 Mon Sep 17 00:00:00 2001 From: JYChen Date: Fri, 21 Apr 2023 21:28:17 +0800 Subject: [PATCH] Cherry pick fix set value cpu (#53127) * fix the set_value error in cpu * add a unitest for set_value OP * fix platform::is_gpu_place * add todo note for set_value * fix test --- .../phi/kernels/impl/set_value_kernel_impl.h | 39 ++++++++++++++----- .../tests/unittests/test_set_value_op.py | 39 +++++++++++++++++++ 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/paddle/phi/kernels/impl/set_value_kernel_impl.h b/paddle/phi/kernels/impl/set_value_kernel_impl.h index d92fcfd32a7..9956260a798 100644 --- a/paddle/phi/kernels/impl/set_value_kernel_impl.h +++ b/paddle/phi/kernels/impl/set_value_kernel_impl.h @@ -136,7 +136,6 @@ void SetValueImpl(const Context& dev_ctx, Empty(dev_ctx, IntArray{slice_dims.Get(), slice_dims.size()}); DenseTensor pad_tensor = Empty(dev_ctx, IntArray{in_dims.Get(), in_dims.size()}); - auto pad_e = EigenTensor::From(pad_tensor, in_dims); auto out_e = EigenTensor::From(*out); auto slice_e = EigenTensor::From(slice_tensor, slice_dims); @@ -185,16 +184,38 @@ void SetValueImpl(const Context& dev_ctx, // is [3], which is right. slice_tensor.Resize(slice_dims_for_assign); + CheckIsDimsMatch(slice_dims_for_assign, value.dims()); - // ElementwiseComputeEx can do broadcasting - funcs::ElementwiseCompute, T>( - dev_ctx, - slice_tensor, - value, - -1, - funcs::SubtractFunctor(), - &slice_tensor); + bool is_gpu_place = dev_ctx.GetPlace().GetType() == phi::AllocationType::GPU; + if (is_gpu_place || slice_tensor.dims().size() >= value.dims().size()) { + // [Why here we confirm running device] + // ElementwiseComputeEx can do broadcasting in two cases: + // 1. The place is GPU. + // 2. The place is CPU, and the 'x' does not need broadcast. + // Please see the note in + // paddle/fluid/operators/elementwise/elementwise_op_function.h + // So, here we choose different logic depending on the device to avoid + // numerical problems, temporarily. + // + // TODO(zoooo0820): Reimplement logic of set_value to avoid using + // elementwise-sub. + funcs::ElementwiseCompute, T>( + dev_ctx, + slice_tensor, + value, + -1, + funcs::SubtractFunctor(), + &slice_tensor); + } else { + funcs::ElementwiseCompute, T>( + dev_ctx, + slice_tensor, + value, + -1, + funcs::InverseSubtractFunctor(), + &slice_tensor); + } slice_tensor.Resize(slice_dims); // - Step 2.2 Pad slice tensor with 0 diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py index ab8985ca199..9c5a71df018 100644 --- a/python/paddle/fluid/tests/unittests/test_set_value_op.py +++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py @@ -982,6 +982,45 @@ class TestSetValueValueShape5(TestSetValueApi): self.data[:, 0] = self.value +# This is to test case which dims of indexed Tensor is +# less than value Tensor on CPU / GPU. +class TestSetValueValueShape6(TestSetValueApi): + def set_value(self): + self.value = np.ones((1, 4)) * 5 + + def set_shape(self): + self.shape = [4, 4] + + def _call_setitem(self, x): + x[:, 0] = self.value # x is Paddle.Tensor + + def _get_answer(self): + self.data[:, 0] = self.value + + def test_api(self): + places = ['cpu'] + if paddle.is_compiled_with_cuda(): + places.append('gpu') + for place in places: + paddle.set_device(place) + + static_out = self._run_static() + dynamic_out = self._run_dynamic() + self._get_answer() + + error_msg = ( + "\nIn {} mode: \nExpected res = \n{}, \n\nbut received : \n{}" + ) + self.assertTrue( + (self.data == static_out).all(), + msg=error_msg.format("static", self.data, static_out), + ) + self.assertTrue( + (self.data == dynamic_out).all(), + msg=error_msg.format("dynamic", self.data, dynamic_out), + ) + + # 4. Test error class TestError(TestSetValueBase): def _value_type_error(self): -- GitLab