diff --git a/paddle/fluid/operators/math/concat_test.cc b/paddle/fluid/operators/math/concat_test.cc
index 854a8ee4425edbf3f07177589b4d512027cc3394..19d056fa54777eff2881a346da071ff95126173c 100644
--- a/paddle/fluid/operators/math/concat_test.cc
+++ b/paddle/fluid/operators/math/concat_test.cc
@@ -72,8 +72,8 @@ void testConcat() {
   }
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(input_a_cpu, Place(), *context, &input_a, true);
-    TensorCopy(input_b_cpu, Place(), *context, &input_b, true);
+    TensorCopySync(input_a_cpu, Place(), &input_a);
+    TensorCopySync(input_b_cpu, Place(), &input_b);
   }
 
   std::vector<Tensor> input;
@@ -89,7 +89,7 @@ void testConcat() {
 
   int* out_ptr;
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(out, CPUPlace(), *context, &out_cpu, true);
+    TensorCopySync(out, CPUPlace(), &out_cpu);
     out_ptr = out_cpu.data<int>();
   } else {
     out_ptr = out.data<int>();
@@ -144,8 +144,8 @@ void testConcat() {
   }
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(input_a_cpu, Place(), *context, &input_a, true);
-    TensorCopy(input_b_cpu, Place(), *context, &input_b, true);
+    TensorCopySync(input_a_cpu, Place(), &input_a);
+    TensorCopySync(input_b_cpu, Place(), &input_b);
   }
 
   input.clear();
@@ -159,7 +159,7 @@ void testConcat() {
   PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(out, CPUPlace(), *context, &out_cpu, true);
+    TensorCopySync(out, CPUPlace(), &out_cpu);
     out_ptr = out_cpu.data<int>();
   } else {
     out_ptr = out.data<int>();
@@ -216,8 +216,8 @@ void testConcat() {
   }
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(input_a_cpu, Place(), *context, &input_a, true);
-    TensorCopy(input_b_cpu, Place(), *context, &input_b, true);
+    TensorCopySync(input_a_cpu, Place(), &input_a);
+    TensorCopySync(input_b_cpu, Place(), &input_b);
   }
 
   input.clear();
@@ -231,7 +231,7 @@ void testConcat() {
   PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(out, CPUPlace(), *context, &out_cpu, true);
+    TensorCopySync(out, CPUPlace(), &out_cpu);
     out_ptr = out_cpu.data<int>();
   } else {
     out_ptr = out.data<int>();
@@ -290,8 +290,8 @@ void testConcat() {
   }
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(input_a_cpu, Place(), *context, &input_a, true);
-    TensorCopy(input_b_cpu, Place(), *context, &input_b, true);
+    TensorCopySync(input_a_cpu, Place(), &input_a);
+    TensorCopySync(input_b_cpu, Place(), &input_b);
   }
 
   input.clear();
@@ -305,7 +305,7 @@ void testConcat() {
   PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
 
   if (paddle::platform::is_gpu_place(Place())) {
-    TensorCopy(out, CPUPlace(), *context, &out_cpu, true);
+    TensorCopySync(out, CPUPlace(), &out_cpu);
     out_ptr = out_cpu.data<int>();
   } else {
     out_ptr = out.data<int>();
diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
index afbe56234530b6c1314d8eef1eb8cf1241aeacbf..d2831d31af47f49f2f8862fa27fed560485b27d8 100644
--- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
@@ -177,9 +177,9 @@ void DoubleBufferReader::PrefetchThreadFunc() {
     }
     if (platform::is_gpu_place(place_)) {
       auto& gpu_batch = gpu_tensor_cache_[cached_tensor_id];
-      auto* gpu_ctx = ctxs_[cached_tensor_id].get();
       gpu_batch.resize(cpu_batch.size());
       for (size_t i = 0; i < cpu_batch.size(); ++i) {
+        // TODO(fengjiayi): Use asynchronous TensorCopy instead
         framework::TensorCopySync(cpu_batch[i], place_, &gpu_batch[i]);
         gpu_batch[i].set_lod(cpu_batch[i].lod());
       }