[Bug Fix]Fix gradient accumulator (#39577)

* merge legacy to fluid * Remove legacy code * Remove legacy code * Remove DataType test * Using Tensor directly instead of using EagerTensor * support gradient_accumulation * make test_imperative_lod_tensor_to_selected_rows longer * make test_imperative_lod_tensor_to_selected_rows longer * refine code * Rename all EagerTensor to Tensor * Rename some EagerTensor to Tensor * rename EagerTensor to EagerVariable * add more test * fix different device gradient_accmulator bug * merge develop * remove useless tests

[Bug Fix]Fix gradient accumulator (#39577)
* merge legacy to fluid * Remove legacy code * Remove legacy code * Remove DataType test * Using Tensor directly instead of using EagerTensor * support gradient_accumulation * make test_imperative_lod_tensor_to_selected_rows longer * make test_imperative_lod_tensor_to_selected_rows longer * refine code * Rename all EagerTensor to Tensor * Rename some EagerTensor to Tensor * rename EagerTensor to EagerVariable * add more test * fix different device gradient_accmulator bug * merge develop * remove useless tests
a7cbd3ef · Jiabin Yang · GitHub · adf4b98f · a7cbd3ef · a7cbd3ef
2 changed file
--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -300,13 +300,10 @@ void TensorAdd(const VarType& src, VarType* dst) {
                        "should be equal, Otherwise, the calculation results "
                        "will be incorrect."));
-#ifdef PADDLE_WITH_XPU
  // if src and dst are in different place, copy dst to src's place
  if (dst_tensor->place() != place) {
    paddle::framework::TensorCopySync(*dst_tensor, place, dst_tensor);
  }
-#endif
 #define PADDLE_TENSOR_ADD(cpp_type)                                  \
  if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
    TensorAddFunctor<cpp_type> func(                                 \

--- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
+++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
@@ -163,10 +163,30 @@ TEST(test_add_functor, add_functor) {
  gpu_res = TensorddTest(gpu_place, gpu_place, static_cast<double>(1.0),
                         static_cast<double>(2.0));
  EXPECT_EQ(gpu_res, 0);
+  // normal
+  gpu_res = TensorddTest(gpu_place, gpu_place, static_cast<float>(1.0),
+                         static_cast<float>(2.0));
+  EXPECT_EQ(gpu_res, 0);
  gpu_res =
      TensorddTest(gpu_place, gpu_place, static_cast<platform::float16>(1.0),
                   static_cast<platform::float16>(2.0));
  EXPECT_EQ(gpu_res, 0);
+  // different places
+  gpu_res = TensorddTest(cpu_place, gpu_place, static_cast<float>(1.0),
+                         static_cast<float>(2.0));
+  EXPECT_EQ(gpu_res, 0);
+  gpu_res = TensorddTest(gpu_place, cpu_place, static_cast<float>(1.0),
+                         static_cast<float>(2.0));
+  EXPECT_EQ(gpu_res, 0);
+  gpu_res =
+      TensorddTest(cpu_place, gpu_place, static_cast<platform::float16>(1.0),
+                   static_cast<platform::float16>(2.0));
+  EXPECT_EQ(gpu_res, 0);
+  gpu_res =
+      TensorddTest(gpu_place, cpu_place, static_cast<platform::float16>(1.0),
+                   static_cast<platform::float16>(2.0));
+  EXPECT_EQ(gpu_res, 0);
 #endif
 #ifdef PADDLE_WITH_XPU