提交 e53a48b4 编写于 作者: L liaogang

Add memcpy

上级 028f3dc4
......@@ -15,9 +15,6 @@ limitations under the License. */
#include "paddle/memory/memory.h"
#include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h"
#include <boost/variant.hpp>
namespace paddle {
namespace memory {
......@@ -49,16 +46,9 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
template <>
void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
platform::CPUPlace, void* src,
size_t size) {
memcpy(dst, src, size);
}
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace, void* dst,
platform::CPUPlace, void* src,
size_t size) {
memcpy(dst, src, size);
platform::CPUPlace,
const void* src, size_t num) {
memcpy(dst, src, num);
}
#ifndef PADDLE_ONLY_CPU
......@@ -93,6 +83,36 @@ size_t Used<platform::GPUPlace>(platform::GPUPlace place) {
return GetGPUBuddyAllocator(place.device)->Used();
}
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace, void* dst,
platform::GPUPlace,
const void* src, size_t num,
cudaStream_t stream) {
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
}
template <>
void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace, void* dst,
platform::CPUPlace,
const void* src, size_t num,
cudaStream_t stream) {
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
}
template <>
void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
void* dst,
platform::GPUPlace src_place,
const void* src, size_t num,
cudaStream_t stream) {
if (dst_place == src_place) {
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream);
} else {
platform::GpuMemcpyPeer(dst, dst_place.device, src, src_place.device, num,
stream);
}
}
#endif // PADDLE_ONLY_CPU
} // namespace memory
......
......@@ -14,22 +14,29 @@ limitations under the License. */
#pragma once
#include "paddle/platform/gpu_info.h"
#include "paddle/platform/place.h"
namespace paddle {
namespace memory {
template <class Place>
template <typename Place>
void* Alloc(Place, size_t);
template <class Place>
template <typename Place>
void Free(Place, void*);
template <class Place>
template <typename Place>
size_t Used(Place);
template <class Place1, class Place2>
void Copy(Place1, void* dst, Place2, void* src, size_t size);
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifndef PADDLE_ONLY_CPU
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
cudaStream_t stream);
#endif // PADDLE_ONLY_CPU
} // namespace memory
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册