Use CUDAPinnedPlace in buffered_reader (#19112)

Use CUDAPinnedPlace in buffered_reader

Use CUDAPinnedPlace in buffered_reader (#19112)
Use CUDAPinnedPlace in buffered_reader
c70a97f4 · chengduo · gongweibao · ef46918a · c70a97f4 · c70a97f4
隐藏空白更改
内联并排

Showing with 15 addition and 2 deletion

paddle/fluid/framework/tensor_util.cc paddle/fluid/framework/tensor_util.cc +4 -0

paddle/fluid/operators/reader/buffered_reader.cc paddle/fluid/operators/reader/buffered_reader.cc +11 -2

未找到文件。
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -99,6 +99,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
        PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
      }
    }
+  } else {
+    PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
  }
 #endif
 }
@@ -166,6 +168,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
    memory::Copy(dst_gpu_place, dst_ptr, src_pinned_place, src_ptr, size,
                 nullptr);
+  } else {
+    PADDLE_THROW("Copy from %s to %s is not supported.", src_place, dst_place);
  }
 #endif
 }

--- a/paddle/fluid/operators/reader/buffered_reader.cc
+++ b/paddle/fluid/operators/reader/buffered_reader.cc
@@ -128,9 +128,18 @@ void BufferedReader::ReadAsync(size_t i) {
                       boost::get<platform::CUDAPlace>(cpu_place), cpu_ptr,
                       size, stream_);
        } else {
+          platform::CUDAPinnedPlace cuda_pinned_place;
+          framework::LoDTensor cuda_pinned_tensor;
+          cuda_pinned_tensor.Resize(cpu[i].dims());
+          auto cuda_pinned_ptr =
+              cuda_pinned_tensor.mutable_data(cuda_pinned_place, cpu[i].type());
+          memory::Copy(cuda_pinned_place, cuda_pinned_ptr,
+                       boost::get<platform::CPUPlace>(cpu_place), cpu_ptr,
+                       size);
          memory::Copy(boost::get<platform::CUDAPlace>(place_), gpu_ptr,
-                       boost::get<platform::CPUPlace>(cpu_place), cpu_ptr, size,
+                       cuda_pinned_place, cuda_pinned_ptr, size, stream_);
-                       stream_);
+          PADDLE_ENFORCE(cudaStreamSynchronize(stream_),
+                         "cuda stream sync error.");
        }
        gpu[i].set_lod(cpu[i].lod());
      }