diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 5be9bef3ac37f19edc92d4845fd1982ad38979d7..5c7b3bb15eae055fa2ce0e5756e50160f3dfacfe 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -15,9 +15,6 @@ limitations under the License. */ #include "paddle/memory/memory.h" #include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/system_allocator.h" -#include "paddle/platform/assert.h" - -#include namespace paddle { namespace memory { @@ -49,16 +46,9 @@ size_t Used(platform::CPUPlace place) { template <> void Copy(platform::CPUPlace, void* dst, - platform::CPUPlace, void* src, - size_t size) { - memcpy(dst, src, size); -} - -template <> -void Copy(platform::CPUPlace, void* dst, - platform::CPUPlace, void* src, - size_t size) { - memcpy(dst, src, size); + platform::CPUPlace, + const void* src, size_t num) { + memcpy(dst, src, num); } #ifndef PADDLE_ONLY_CPU @@ -93,6 +83,36 @@ size_t Used(platform::GPUPlace place) { return GetGPUBuddyAllocator(place.device)->Used(); } +template <> +void Copy(platform::CPUPlace, void* dst, + platform::GPUPlace, + const void* src, size_t num, + cudaStream_t stream) { + platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream); +} + +template <> +void Copy(platform::GPUPlace, void* dst, + platform::CPUPlace, + const void* src, size_t num, + cudaStream_t stream) { + platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream); +} + +template <> +void Copy(platform::GPUPlace dst_place, + void* dst, + platform::GPUPlace src_place, + const void* src, size_t num, + cudaStream_t stream) { + if (dst_place == src_place) { + platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream); + } else { + platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num, + stream); + } +} + #endif // PADDLE_ONLY_CPU } // namespace memory diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 96c00cb106b70ce425b349e5baca23a4ca746f37..3ac359e1746df2bcd6d285793b0d4a677f282cb7 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -14,22 +14,29 @@ limitations under the License. */ #pragma once +#include "paddle/platform/gpu_info.h" #include "paddle/platform/place.h" namespace paddle { namespace memory { -template +template void* Alloc(Place, size_t); -template +template void Free(Place, void*); -template +template size_t Used(Place); -template -void Copy(Place1, void* dst, Place2, void* src, size_t size); +template +void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num); + +#ifndef PADDLE_ONLY_CPU +template +void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num, + cudaStream_t stream); +#endif // PADDLE_ONLY_CPU } // namespace memory } // namespace paddle