Add cuda memcpy in gpu_info

b0588641 · liaogang · e53a48b4 · b0588641 · b0588641
隐藏空白更改
内联并排

Showing with 33 addition and 2 deletion

paddle/platform/gpu_info.cc paddle/platform/gpu_info.cc +19 -1

paddle/platform/gpu_info.h paddle/platform/gpu_info.h +14 -1

未找到文件。
--- a/paddle/platform/gpu_info.cc
+++ b/paddle/platform/gpu_info.cc
@@ -44,7 +44,7 @@ void SetDeviceId(int id) {
                 "cudaSetDevice failed in paddle::platform::SetDeviceId");
 }
-void GpuMemoryUsage(size_t& available, size_t& total) {
+void GpuMemoryUsage(size_t &available, size_t &total) {
  throw_on_error(cudaMemGetInfo(&available, &total),
                 "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
 }
@@ -82,5 +82,23 @@ size_t GpuMaxChunkSize() {
  return usable;
 }
+void GpuMemcpyAsync(void *dst, const void *src, size_t count,
+                    enum cudaMemcpyKind kind, cudaStream_t stream) {
+  PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream));
+}
+void GpuMemcpySync(void *dst, const void *src, size_t count,
+                   enum cudaMemcpyKind kind) {
+  PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind));
+  // note: cudaMemcpy may actually be asynchronous with respect to the caller,
+  //       block on stream 0 to make sure the copy has completed
+  PADDLE_ENFORCE(cudaStreamSynchronize(0));
+}
+void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
+                   size_t count, cudaStream_t stream) {
+  PADDLE_ENFORCE(
+      cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream));
+}
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/gpu_info.h
+++ b/paddle/platform/gpu_info.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #ifndef PADDLE_ONLY_CPU
+#include <cuda_runtime.h>
 #include <stddef.h>
 namespace paddle {
@@ -31,7 +32,7 @@ int GetCurrentDeviceId();
 void SetDeviceId(int device_id);
 //！Get the memory usage of current GPU device.
-void GpuMemoryUsage(size_t& available, size_t& total);
+void GpuMemoryUsage(size_t &available, size_t &total);
 //! Get the maximum allocation size of current GPU device.
 size_t GpuMaxAllocSize();
@@ -42,6 +43,18 @@ size_t GpuMinChunkSize();
 //! Get the maximum chunk size for GPU buddy allocator.
 size_t GpuMaxChunkSize();
+//! Copy memory from address src to dst asynchronously.
+void GpuMemcpyAsync(void *dst, const void *src, size_t count,
+                    enum cudaMemcpyKind kind, cudaStream_t stream);
+//! Copy memory from address src to dst synchronously.
+void GpuMemcpySync(void *dst, const void *src, size_t count,
+                   enum cudaMemcpyKind kind);
+//! Copy memory from one device to another device.
+void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
+                   size_t count, cudaStream_t stream);
 }  // namespace platform
 }  // namespace paddle