提交 e53a48b4 编写于 作者: L liaogang

Add memcpy

上级 028f3dc4
...@@ -15,9 +15,6 @@ limitations under the License. */ ...@@ -15,9 +15,6 @@ limitations under the License. */
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h"
#include <boost/variant.hpp>
namespace paddle { namespace paddle {
namespace memory { namespace memory {
...@@ -49,16 +46,9 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) { ...@@ -49,16 +46,9 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
template <> template <>
void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst, void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
platform::CPUPlace, void* src, platform::CPUPlace,
size_t size) { const void* src, size_t num) {
memcpy(dst, src, size); memcpy(dst, src, num);
}
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace, void* dst,
platform::CPUPlace, void* src,
size_t size) {
memcpy(dst, src, size);
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
...@@ -93,6 +83,36 @@ size_t Used<platform::GPUPlace>(platform::GPUPlace place) { ...@@ -93,6 +83,36 @@ size_t Used<platform::GPUPlace>(platform::GPUPlace place) {
return GetGPUBuddyAllocator(place.device)->Used(); return GetGPUBuddyAllocator(place.device)->Used();
} }
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace, void* dst,
platform::GPUPlace,
const void* src, size_t num,
cudaStream_t stream) {
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
}
template <>
void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace, void* dst,
platform::CPUPlace,
const void* src, size_t num,
cudaStream_t stream) {
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
}
template <>
void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
void* dst,
platform::GPUPlace src_place,
const void* src, size_t num,
cudaStream_t stream) {
if (dst_place == src_place) {
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream);
} else {
platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num,
stream);
}
}
#endif // PADDLE_ONLY_CPU #endif // PADDLE_ONLY_CPU
} // namespace memory } // namespace memory
......
...@@ -14,22 +14,29 @@ limitations under the License. */ ...@@ -14,22 +14,29 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/platform/gpu_info.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
template <class Place> template <typename Place>
void* Alloc(Place, size_t); void* Alloc(Place, size_t);
template <class Place> template <typename Place>
void Free(Place, void*); void Free(Place, void*);
template <class Place> template <typename Place>
size_t Used(Place); size_t Used(Place);
template <class Place1, class Place2> template <typename DstPlace, typename SrcPlace>
void Copy(Place1, void* dst, Place2, void* src, size_t size); void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifndef PADDLE_ONLY_CPU
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
cudaStream_t stream);
#endif // PADDLE_ONLY_CPU
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册