Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e1140f2b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e1140f2b
编写于
7月 21, 2017
作者:
Y
Yi Wang
提交者:
GitHub
7月 21, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2970 from gangliao/memcpy
Add CPU/GPU Memcpy in memory folder
上级
6129ab42
6cae35b5
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
121 addition
and
25 deletion
+121
-25
paddle/memory/memory.cc
paddle/memory/memory.cc
+44
-1
paddle/memory/memory.h
paddle/memory/memory.h
+13
-3
paddle/platform/enforce.h
paddle/platform/enforce.h
+26
-19
paddle/platform/gpu_info.cc
paddle/platform/gpu_info.cc
+24
-1
paddle/platform/gpu_info.h
paddle/platform/gpu_info.h
+14
-1
未找到文件。
paddle/memory/memory.cc
浏览文件 @
e1140f2b
...
...
@@ -15,7 +15,8 @@ limitations under the License. */
#include "paddle/memory/memory.h"
#include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h"
#include <cstring> // for memcpy
namespace
paddle
{
namespace
memory
{
...
...
@@ -45,6 +46,13 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
CPUPlace
>
(
platform
::
CPUPlace
,
void
*
dst
,
platform
::
CPUPlace
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
#ifndef PADDLE_ONLY_CPU
detail
::
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
...
...
@@ -77,6 +85,41 @@ size_t Used<platform::GPUPlace>(platform::GPUPlace place) {
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
}
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
GPUPlace
>
(
platform
::
CPUPlace
dst_place
,
void
*
dst
,
platform
::
GPUPlace
src_place
,
const
void
*
src
,
size_t
num
,
cudaStream_t
stream
)
{
platform
::
SetDeviceId
(
src_place
.
device
);
platform
::
GpuMemcpyAsync
(
dst
,
src
,
num
,
cudaMemcpyDeviceToHost
,
stream
);
}
template
<
>
void
Copy
<
platform
::
GPUPlace
,
platform
::
CPUPlace
>
(
platform
::
GPUPlace
dst_place
,
void
*
dst
,
platform
::
CPUPlace
src_place
,
const
void
*
src
,
size_t
num
,
cudaStream_t
stream
)
{
platform
::
SetDeviceId
(
dst_place
.
device
);
platform
::
GpuMemcpyAsync
(
dst
,
src
,
num
,
cudaMemcpyHostToDevice
,
stream
);
}
template
<
>
void
Copy
<
platform
::
GPUPlace
,
platform
::
GPUPlace
>
(
platform
::
GPUPlace
dst_place
,
void
*
dst
,
platform
::
GPUPlace
src_place
,
const
void
*
src
,
size_t
num
,
cudaStream_t
stream
)
{
if
(
dst_place
==
src_place
)
{
platform
::
SetDeviceId
(
src_place
.
device
);
platform
::
GpuMemcpyAsync
(
dst
,
src
,
num
,
cudaMemcpyDeviceToDevice
,
stream
);
}
else
{
platform
::
GpuMemcpyPeer
(
dst
,
dst_place
.
device
,
src
,
src_place
.
device
,
num
,
stream
);
}
}
#endif // PADDLE_ONLY_CPU
}
// namespace memory
...
...
paddle/memory/memory.h
浏览文件 @
e1140f2b
...
...
@@ -14,20 +14,30 @@ limitations under the License. */
#pragma once
#include "paddle/platform/gpu_info.h"
#include "paddle/platform/place.h"
namespace
paddle
{
namespace
memory
{
template
<
class
Place
>
template
<
typename
Place
>
void
*
Alloc
(
Place
,
size_t
);
template
<
class
Place
>
template
<
typename
Place
>
void
Free
(
Place
,
void
*
);
template
<
class
Place
>
template
<
typename
Place
>
size_t
Used
(
Place
);
template
<
typename
DstPlace
,
typename
SrcPlace
>
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
);
#ifndef PADDLE_ONLY_CPU
template
<
typename
DstPlace
,
typename
SrcPlace
>
void
Copy
(
DstPlace
,
void
*
dst
,
SrcPlace
,
const
void
*
src
,
size_t
num
,
cudaStream_t
stream
);
#endif // PADDLE_ONLY_CPU
template
<
typename
T
,
/* must be POD types */
typename
Place
/* platform::GPUPlace or platform::CPUPlace */
,
typename
std
::
enable_if
<
std
::
is_pod
<
T
>
::
value
>::
type
*
=
nullptr
>
...
...
paddle/platform/enforce.h
浏览文件 @
e1140f2b
...
...
@@ -43,10 +43,26 @@ namespace platform {
// For more details, please check https://stackoverflow.com/a/43870188/724872.
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
template
<
typename
T
>
inline
void
throw_on_error
(
T
e
)
{
throw_on_error
(
e
,
""
);
}
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
int
stat
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
!
(
stat
)))
{
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...)
+
string
::
Sprintf
(
" at [%s:%s];"
,
__FILE__
,
__LINE__
));
}
}
#ifndef PADDLE_ONLY_CPU
template
<
typename
...
Args
>
inline
void
throw_on_error
(
cudaError_t
e
,
const
Args
&
...
args
)
{
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudaError_t
e
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
e
))
{
// clang-format off
throw
thrust
::
system_error
(
...
...
@@ -58,7 +74,8 @@ inline void throw_on_error(cudaError_t e, const Args&... args) {
}
template
<
typename
...
Args
>
inline
void
throw_on_error
(
curandStatus_t
stat
,
const
Args
&
...
args
)
{
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
curandStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
!=
CURAND_STATUS_SUCCESS
)
{
// clang-format off
throw
thrust
::
system_error
(
...
...
@@ -70,7 +87,8 @@ inline void throw_on_error(curandStatus_t stat, const Args&... args) {
}
template
<
typename
...
Args
>
inline
void
throw_on_error
(
cudnnStatus_t
stat
,
const
Args
&
...
args
)
{
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cudnnStatus_t
stat
,
const
Args
&
...
args
)
{
if
(
stat
==
CUDNN_STATUS_SUCCESS
)
{
return
;
}
else
{
...
...
@@ -84,7 +102,8 @@ inline void throw_on_error(cudnnStatus_t stat, const Args&... args) {
}
template
<
typename
...
Args
>
inline
void
throw_on_error
(
cublasStatus_t
stat
,
const
Args
&
...
args
)
{
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
cublasStatus_t
stat
,
const
Args
&
...
args
)
{
std
::
string
err
;
if
(
stat
==
CUBLAS_STATUS_SUCCESS
)
{
return
;
...
...
@@ -113,15 +132,6 @@ inline void throw_on_error(cublasStatus_t stat, const Args&... args) {
#endif // PADDLE_ONLY_CPU
template
<
typename
...
Args
>
inline
void
throw_on_error
(
int
stat
,
const
Args
&
...
args
)
{
if
(
UNLIKELY
(
!
(
stat
)))
{
throw
std
::
runtime_error
(
string
::
Sprintf
(
args
...)
+
string
::
Sprintf
(
" at [%s:%s];"
,
__FILE__
,
__LINE__
));
}
}
#define PADDLE_THROW(...) \
do { \
throw std::runtime_error( \
...
...
@@ -129,12 +139,9 @@ inline void throw_on_error(int stat, const Args&... args) {
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); \
} while (0)
/**
* @brief Enforce a condition, otherwise throw an EnforceNotMet
*/
#define PADDLE_ENFORCE(condition, ...) \
do { \
::paddle::platform::throw_on_error(condition, __VA_ARGS__); \
#define PADDLE_ENFORCE(...) \
do { \
::paddle::platform::throw_on_error(__VA_ARGS__); \
} while (0)
}
// namespace platform
...
...
paddle/platform/gpu_info.cc
浏览文件 @
e1140f2b
...
...
@@ -44,7 +44,7 @@ void SetDeviceId(int id) {
"cudaSetDevice failed in paddle::platform::SetDeviceId"
);
}
void
GpuMemoryUsage
(
size_t
&
available
,
size_t
&
total
)
{
void
GpuMemoryUsage
(
size_t
&
available
,
size_t
&
total
)
{
PADDLE_ENFORCE
(
cudaMemGetInfo
(
&
available
,
&
total
),
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"
);
}
...
...
@@ -82,5 +82,28 @@ size_t GpuMaxChunkSize() {
return
usable
;
}
void
GpuMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
enum
cudaMemcpyKind
kind
,
cudaStream_t
stream
)
{
PADDLE_ENFORCE
(
cudaMemcpyAsync
(
dst
,
src
,
count
,
kind
,
stream
),
"cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync"
);
}
void
GpuMemcpySync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
enum
cudaMemcpyKind
kind
)
{
PADDLE_ENFORCE
(
cudaMemcpy
(
dst
,
src
,
count
,
kind
),
"cudaMemcpy failed in paddle::platform::GpuMemcpySync"
);
// note: cudaMemcpy may actually be asynchronous with respect to the caller,
// block on stream 0 to make sure the copy has completed
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
0
),
"cudaStreamSynchronize failed in paddle::platform::GpuMemcpySync"
);
}
void
GpuMemcpyPeer
(
void
*
dst
,
int
dst_device
,
const
void
*
src
,
int
src_device
,
size_t
count
,
cudaStream_t
stream
)
{
PADDLE_ENFORCE
(
cudaMemcpyPeerAsync
(
dst
,
dst_device
,
src
,
src_device
,
count
,
stream
),
"cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeer"
);
}
}
// namespace platform
}
// namespace paddle
paddle/platform/gpu_info.h
浏览文件 @
e1140f2b
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#ifndef PADDLE_ONLY_CPU
#include <cuda_runtime.h>
#include <stddef.h>
namespace
paddle
{
...
...
@@ -31,7 +32,7 @@ int GetCurrentDeviceId();
void
SetDeviceId
(
int
device_id
);
//!Get the memory usage of current GPU device.
void
GpuMemoryUsage
(
size_t
&
available
,
size_t
&
total
);
void
GpuMemoryUsage
(
size_t
&
available
,
size_t
&
total
);
//! Get the maximum allocation size of current GPU device.
size_t
GpuMaxAllocSize
();
...
...
@@ -42,6 +43,18 @@ size_t GpuMinChunkSize();
//! Get the maximum chunk size for GPU buddy allocator.
size_t
GpuMaxChunkSize
();
//! Copy memory from address src to dst asynchronously.
void
GpuMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
enum
cudaMemcpyKind
kind
,
cudaStream_t
stream
);
//! Copy memory from address src to dst synchronously.
void
GpuMemcpySync
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
enum
cudaMemcpyKind
kind
);
//! Copy memory from one device to another device.
void
GpuMemcpyPeer
(
void
*
dst
,
int
dst_device
,
const
void
*
src
,
int
src_device
,
size_t
count
,
cudaStream_t
stream
);
}
// namespace platform
}
// namespace paddle
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录