提交 b0588641 编写于 作者: L liaogang

Add cuda memcpy in gpu_info

上级 e53a48b4
...@@ -44,7 +44,7 @@ void SetDeviceId(int id) { ...@@ -44,7 +44,7 @@ void SetDeviceId(int id) {
"cudaSetDevice failed in paddle::platform::SetDeviceId"); "cudaSetDevice failed in paddle::platform::SetDeviceId");
} }
void GpuMemoryUsage(size_t& available, size_t& total) { void GpuMemoryUsage(size_t &available, size_t &total) {
throw_on_error(cudaMemGetInfo(&available, &total), throw_on_error(cudaMemGetInfo(&available, &total),
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"); "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
} }
...@@ -82,5 +82,23 @@ size_t GpuMaxChunkSize() { ...@@ -82,5 +82,23 @@ size_t GpuMaxChunkSize() {
return usable; return usable;
} }
void GpuMemcpyAsync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind, cudaStream_t stream) {
PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream));
}
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind) {
PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind));
// note: cudaMemcpy may actually be asynchronous with respect to the caller,
// block on stream 0 to make sure the copy has completed
PADDLE_ENFORCE(cudaStreamSynchronize(0));
}
void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
size_t count, cudaStream_t stream) {
PADDLE_ENFORCE(
cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream));
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
#include <cuda_runtime.h>
#include <stddef.h> #include <stddef.h>
namespace paddle { namespace paddle {
...@@ -31,7 +32,7 @@ int GetCurrentDeviceId(); ...@@ -31,7 +32,7 @@ int GetCurrentDeviceId();
void SetDeviceId(int device_id); void SetDeviceId(int device_id);
//!Get the memory usage of current GPU device. //!Get the memory usage of current GPU device.
void GpuMemoryUsage(size_t& available, size_t& total); void GpuMemoryUsage(size_t &available, size_t &total);
//! Get the maximum allocation size of current GPU device. //! Get the maximum allocation size of current GPU device.
size_t GpuMaxAllocSize(); size_t GpuMaxAllocSize();
...@@ -42,6 +43,18 @@ size_t GpuMinChunkSize(); ...@@ -42,6 +43,18 @@ size_t GpuMinChunkSize();
//! Get the maximum chunk size for GPU buddy allocator. //! Get the maximum chunk size for GPU buddy allocator.
size_t GpuMaxChunkSize(); size_t GpuMaxChunkSize();
//! Copy memory from address src to dst asynchronously.
void GpuMemcpyAsync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind, cudaStream_t stream);
//! Copy memory from address src to dst synchronously.
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind);
//! Copy memory from one device to another device.
void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
size_t count, cudaStream_t stream);
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册