Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
766c7405
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
766c7405
编写于
4月 03, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
follow comments
上级
2514d70e
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
20 addition
and
12 deletion
+20
-12
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+3
-3
paddle/fluid/memory/detail/system_allocator.h
paddle/fluid/memory/detail/system_allocator.h
+6
-2
paddle/fluid/memory/pinned_memory_test.cu
paddle/fluid/memory/pinned_memory_test.cu
+11
-7
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
766c7405
...
@@ -15,6 +15,6 @@ cc_library(paddle_memory
...
@@ -15,6 +15,6 @@ cc_library(paddle_memory
cc_test
(
memory_test SRCS memory_test.cc DEPS place paddle_memory
)
cc_test
(
memory_test SRCS memory_test.cc DEPS place paddle_memory
)
if
(
WITH_GPU
)
#
if (WITH_GPU)
nv_test
(
pinned_memory_test SRCS pinned_memory_test.cu DEPS place paddle_memory
)
#
nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place paddle_memory)
endif
()
#
endif()
paddle/fluid/memory/detail/system_allocator.h
浏览文件 @
766c7405
...
@@ -21,8 +21,9 @@ namespace memory {
...
@@ -21,8 +21,9 @@ namespace memory {
namespace
detail
{
namespace
detail
{
/**
/**
* \brief SystemAllocator is the parent class of CPUAllocator and GPUAllocator.
* \brief SystemAllocator is the parent class of CPUAllocator,
* A BuddyAllocator object uses a SystemAllocator* pointing to the
* CUDAPinnedAllocator and GPUAllocator. A BuddyAllocator
* object uses a SystemAllocator* pointing to the
* underlying system allocator.
* underlying system allocator.
*/
*/
class
SystemAllocator
{
class
SystemAllocator
{
...
@@ -43,6 +44,8 @@ class CPUAllocator : public SystemAllocator {
...
@@ -43,6 +44,8 @@ class CPUAllocator : public SystemAllocator {
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
class
GPUAllocator
:
public
SystemAllocator
{
class
GPUAllocator
:
public
SystemAllocator
{
public:
public:
explicit
GPUAllocator
(
int
gpu_id
)
:
gpu_id_
(
gpu_id
)
{}
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
);
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
);
virtual
bool
UseGpu
()
const
;
virtual
bool
UseGpu
()
const
;
...
@@ -50,6 +53,7 @@ class GPUAllocator : public SystemAllocator {
...
@@ -50,6 +53,7 @@ class GPUAllocator : public SystemAllocator {
private:
private:
size_t
gpu_alloc_size_
=
0
;
size_t
gpu_alloc_size_
=
0
;
size_t
fallback_alloc_size_
=
0
;
size_t
fallback_alloc_size_
=
0
;
int
gpu_id_
;
};
};
class
CUDAPinnedAllocator
:
public
SystemAllocator
{
class
CUDAPinnedAllocator
:
public
SystemAllocator
{
...
...
paddle/fluid/memory/pinned_memory_test.cu
浏览文件 @
766c7405
...
@@ -24,6 +24,8 @@ limitations under the License. */
...
@@ -24,6 +24,8 @@ limitations under the License. */
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <unordered_map>
#include <unordered_map>
// This unit test is an example comparing the performance between using pinned
// memory and not. In general, using pinned memory will be faster.
template
<
typename
T
>
template
<
typename
T
>
__global__
void
Kernel
(
T
*
output
,
int
dim
)
{
__global__
void
Kernel
(
T
*
output
,
int
dim
)
{
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
@@ -33,7 +35,7 @@ __global__ void Kernel(T* output, int dim) {
...
@@ -33,7 +35,7 @@ __global__ void Kernel(T* output, int dim) {
}
}
template
<
typename
Place
>
template
<
typename
Place
>
void
test_pinned_memory
()
{
float
test_pinned_memory
()
{
Place
cpu_place
;
Place
cpu_place
;
paddle
::
platform
::
CUDAPlace
cuda_place
;
paddle
::
platform
::
CUDAPlace
cuda_place
;
...
@@ -133,12 +135,14 @@ void test_pinned_memory() {
...
@@ -133,12 +135,14 @@ void test_pinned_memory() {
paddle
::
memory
::
Free
(
cpu_place
,
output_pinned_mem
[
j
]);
paddle
::
memory
::
Free
(
cpu_place
,
output_pinned_mem
[
j
]);
paddle
::
memory
::
Free
(
cuda_place
,
gpu_mem
[
j
]);
paddle
::
memory
::
Free
(
cuda_place
,
gpu_mem
[
j
]);
}
}
return
elapsedTime
/
30
;
}
}
TEST
(
CPUANDCUDAPinned
,
CPUAllocator
)
{
TEST
(
CPUANDCUDAPinned
,
CPUAllocatorAndCUDAPinnedAllocator
)
{
test_pinned_memory
<
paddle
::
platform
::
CPUPlace
>
();
// Generally speaking, operation on pinned_memory is faster than that on
}
// unpinned-memory, but if this unit test fails frequently, please close this
// test for the time being.
TEST
(
CPUANDCUDAPinned
,
CUDAPinnedAllocator
)
{
float
time1
=
test_pinned_memory
<
paddle
::
platform
::
CPUPlace
>
();
test_pinned_memory
<
paddle
::
platform
::
CUDAPinnedPlace
>
();
float
time2
=
test_pinned_memory
<
paddle
::
platform
::
CUDAPinnedPlace
>
();
EXPECT_GT
(
time1
,
time2
)
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录