From b05886414b4f7bdbc9a6228e6cb681c7f3ccb50c Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 19 Jul 2017 22:28:06 +0800 Subject: [PATCH] Add cuda memcpy in gpu_info --- paddle/platform/gpu_info.cc | 20 +++++++++++++++++++- paddle/platform/gpu_info.h | 15 ++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index a1383d352..12dc01d1a 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -44,7 +44,7 @@ void SetDeviceId(int id) { "cudaSetDevice failed in paddle::platform::SetDeviceId"); } -void GpuMemoryUsage(size_t& available, size_t& total) { +void GpuMemoryUsage(size_t &available, size_t &total) { throw_on_error(cudaMemGetInfo(&available, &total), "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"); } @@ -82,5 +82,23 @@ size_t GpuMaxChunkSize() { return usable; } +void GpuMemcpyAsync(void *dst, const void *src, size_t count, + enum cudaMemcpyKind kind, cudaStream_t stream) { + PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream)); +} + +void GpuMemcpySync(void *dst, const void *src, size_t count, + enum cudaMemcpyKind kind) { + PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind)); + // note: cudaMemcpy may actually be asynchronous with respect to the caller, + // block on stream 0 to make sure the copy has completed + PADDLE_ENFORCE(cudaStreamSynchronize(0)); +} + +void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, + size_t count, cudaStream_t stream) { + PADDLE_ENFORCE( + cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream)); +} } // namespace platform } // namespace paddle diff --git a/paddle/platform/gpu_info.h b/paddle/platform/gpu_info.h index 79e71956b..d3a5f5f13 100644 --- a/paddle/platform/gpu_info.h +++ b/paddle/platform/gpu_info.h @@ -16,6 +16,7 @@ limitations under the License. */ #ifndef PADDLE_ONLY_CPU +#include #include namespace paddle { @@ -31,7 +32,7 @@ int GetCurrentDeviceId(); void SetDeviceId(int device_id); //!Get the memory usage of current GPU device. -void GpuMemoryUsage(size_t& available, size_t& total); +void GpuMemoryUsage(size_t &available, size_t &total); //! Get the maximum allocation size of current GPU device. size_t GpuMaxAllocSize(); @@ -42,6 +43,18 @@ size_t GpuMinChunkSize(); //! Get the maximum chunk size for GPU buddy allocator. size_t GpuMaxChunkSize(); +//! Copy memory from address src to dst asynchronously. +void GpuMemcpyAsync(void *dst, const void *src, size_t count, + enum cudaMemcpyKind kind, cudaStream_t stream); + +//! Copy memory from address src to dst synchronously. +void GpuMemcpySync(void *dst, const void *src, size_t count, + enum cudaMemcpyKind kind); + +//! Copy memory from one device to another device. +void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, + size_t count, cudaStream_t stream); + } // namespace platform } // namespace paddle -- GitLab