diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc
index 17ab1f1f7c53fe69e07e04df4f98baaaf10d615f..2020e2900c0160cef5be154a37c5f211fbd07738 100644
--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -300,13 +300,10 @@ void TensorAdd(const VarType& src, VarType* dst) {
                         "should be equal, Otherwise, the calculation results "
                         "will be incorrect."));
 
-#ifdef PADDLE_WITH_XPU
   // if src and dst are in different place, copy dst to src's place
   if (dst_tensor->place() != place) {
     paddle::framework::TensorCopySync(*dst_tensor, place, dst_tensor);
   }
-#endif
-
 #define PADDLE_TENSOR_ADD(cpp_type)                                  \
   if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
     TensorAddFunctor<cpp_type> func(                                 \
diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
index 4dfc8198064e376edf55df9b4c51031344f71485..60b238e5cfdb1f3b0e37ac0a6b3f88fd52426fa7 100644
--- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
+++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
@@ -163,10 +163,30 @@ TEST(test_add_functor, add_functor) {
   gpu_res = TensorddTest(gpu_place, gpu_place, static_cast<double>(1.0),
                          static_cast<double>(2.0));
   EXPECT_EQ(gpu_res, 0);
+
+  // normal
+  gpu_res = TensorddTest(gpu_place, gpu_place, static_cast<float>(1.0),
+                         static_cast<float>(2.0));
+  EXPECT_EQ(gpu_res, 0);
   gpu_res =
       TensorddTest(gpu_place, gpu_place, static_cast<platform::float16>(1.0),
                    static_cast<platform::float16>(2.0));
   EXPECT_EQ(gpu_res, 0);
+  // different places
+  gpu_res = TensorddTest(cpu_place, gpu_place, static_cast<float>(1.0),
+                         static_cast<float>(2.0));
+  EXPECT_EQ(gpu_res, 0);
+  gpu_res = TensorddTest(gpu_place, cpu_place, static_cast<float>(1.0),
+                         static_cast<float>(2.0));
+  EXPECT_EQ(gpu_res, 0);
+  gpu_res =
+      TensorddTest(cpu_place, gpu_place, static_cast<platform::float16>(1.0),
+                   static_cast<platform::float16>(2.0));
+  EXPECT_EQ(gpu_res, 0);
+  gpu_res =
+      TensorddTest(gpu_place, cpu_place, static_cast<platform::float16>(1.0),
+                   static_cast<platform::float16>(2.0));
+  EXPECT_EQ(gpu_res, 0);
 #endif
 
 #ifdef PADDLE_WITH_XPU