Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
158d6c4d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
158d6c4d
编写于
3月 26, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add unit test
上级
18eb7730
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
124 addition
and
40 deletion
+124
-40
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+0
-3
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+5
-2
paddle/fluid/memory/memcpy.cc
paddle/fluid/memory/memcpy.cc
+39
-0
paddle/fluid/memory/memory.cc
paddle/fluid/memory/memory.cc
+20
-29
paddle/fluid/memory/memory.h
paddle/fluid/memory/memory.h
+4
-6
paddle/fluid/memory/memory_test.cc
paddle/fluid/memory/memory_test.cc
+55
-0
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+1
-0
未找到文件。
paddle/fluid/framework/tensor.h
浏览文件 @
158d6c4d
...
...
@@ -92,9 +92,6 @@ class Tensor {
/*! Return the numel of the memory block. */
inline
int64_t
numel
()
const
;
/*! Return the numel of the memory block. */
inline
bool
isPinned
()
const
;
/*! Resize the dimensions of the memory block. */
inline
Tensor
&
Resize
(
const
DDim
&
dims
);
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
158d6c4d
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
...
...
@@ -127,10 +128,12 @@ void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) {
// NOTE: here, we use CpuMaxAllocSize()/2 as the maximum memory size
// of host pinned allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging.
size_t
usable
=
CpuMaxAllocSize
()
/
2
-
cuda_pinnd_alloc_size_
;
size_t
usable
=
paddle
::
platform
::
CpuMaxAllocSize
()
/
2
-
cuda_pinnd_alloc_size_
;
if
(
size
>
usable
)
return
nullptr
;
void
*
p
;
// PINNED memory is visible to all CUDA contexts.
cudaError_t
result
=
cudaMallocHost
(
&
p
,
size
);
...
...
@@ -161,7 +164,7 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
}
}
bool
CUDAPinnedAllocator
::
UseGpu
()
const
{
return
tru
e
;
}
bool
CUDAPinnedAllocator
::
UseGpu
()
const
{
return
fals
e
;
}
#endif
...
...
paddle/fluid/memory/memcpy.cc
浏览文件 @
158d6c4d
...
...
@@ -56,6 +56,45 @@ void Copy<platform::CUDAPlace, platform::CUDAPlace>(
}
}
template
<
>
void
Copy
<
platform
::
CPUPlace
,
platform
::
CUDAPinnedPlace
>
(
platform
::
CPUPlace
dst_place
,
void
*
dst
,
platform
::
CUDAPinnedPlace
src_place
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
template
<
>
void
Copy
<
platform
::
CUDAPinnedPlace
,
platform
::
CPUPlace
>
(
platform
::
CUDAPinnedPlace
dst_place
,
void
*
dst
,
platform
::
CPUPlace
src_place
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
template
<
>
void
Copy
<
platform
::
CUDAPinnedPlace
,
platform
::
CUDAPinnedPlace
>
(
platform
::
CUDAPinnedPlace
dst_place
,
void
*
dst
,
platform
::
CUDAPinnedPlace
src_place
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
template
<
>
void
Copy
<
platform
::
CUDAPinnedPlace
,
platform
::
CUDAPlace
>
(
platform
::
CUDAPinnedPlace
dst_place
,
void
*
dst
,
platform
::
CUDAPlace
src_place
,
const
void
*
src
,
size_t
num
,
cudaStream_t
stream
)
{
platform
::
SetDeviceId
(
src_place
.
device
);
platform
::
GpuMemcpyAsync
(
dst
,
src
,
num
,
cudaMemcpyDeviceToHost
,
stream
);
}
template
<
>
void
Copy
<
platform
::
CUDAPlace
,
platform
::
CUDAPinnedPlace
>
(
platform
::
CUDAPlace
dst_place
,
void
*
dst
,
platform
::
CUDAPinnedPlace
src_place
,
const
void
*
src
,
size_t
num
,
cudaStream_t
stream
)
{
platform
::
SetDeviceId
(
dst_place
.
device
);
platform
::
GpuMemcpyAsync
(
dst
,
src
,
num
,
cudaMemcpyHostToDevice
,
stream
);
}
#endif
}
// namespace memory
...
...
paddle/fluid/memory/memory.cc
浏览文件 @
158d6c4d
...
...
@@ -82,16 +82,6 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
return
as
[
gpu_id
];
}
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
(
int
gpu_id
)
{
static
BuddyAllocator
*
as
=
NULL
;
if
(
as
==
NULL
)
{
as
=
new
BuddyAllocator
(
new
detail
::
CUDAPinnedAllocator
,
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
}
return
as
;
}
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
)
{
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
...
...
@@ -100,8 +90,7 @@ size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
...
...
@@ -123,37 +112,39 @@ void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
}
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
BuddyAllocator
*
ba
=
NULL
;
if
(
ba
==
NULL
)
{
ba
=
new
BuddyAllocator
(
new
detail
::
CUDAPinnedAllocator
,
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
}
return
ba
;
}
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
platform
::
CUDAPinnedPlace
place
)
{
return
Get
GPUBuddyAllocator
(
place
.
device
)
->
Used
();
return
Get
CUDAPinnedBuddyAllocator
(
)
->
Used
();
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
platform
::
CUDAPinnedPlace
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
(
place
.
device
);
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
avail
,
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
size
<<
" bytes in GPU "
<<
place
.
device
<<
", available "
<<
avail
<<
" bytes"
;
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
platform
::
GpuMinChunkSize
();
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
platform
::
GpuMaxChunkSize
();
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
Used
<
platform
::
CUDAPlace
>
(
place
);
platform
::
SetDeviceId
(
cur_dev
);
}
// if (ptr == nullptr) {
// LOG(WARNING) << "Cannot allocate " << size << " bytes in CUDAPinnedPlace
// "
// << ", available " << avail << " bytes"
// }
return
ptr
;
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
platform
::
CUDAPinnedPlace
place
,
void
*
p
)
{
GetCUDAPinnedBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
}
#endif
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
...
...
paddle/fluid/memory/memory.h
浏览文件 @
158d6c4d
...
...
@@ -33,7 +33,7 @@ namespace memory {
* address is valid or not.
*/
template
<
typename
Place
>
void
*
Alloc
(
Place
place
,
size_t
size
,
bool
is_pinned
=
false
);
void
*
Alloc
(
Place
place
,
size_t
size
);
/**
* \brief Free memory block in one place.
...
...
@@ -43,7 +43,7 @@ void* Alloc(Place place, size_t size, bool is_pinned = false);
*
*/
template
<
typename
Place
>
void
Free
(
Place
place
,
void
*
ptr
,
bool
is_pinned
=
false
);
void
Free
(
Place
place
,
void
*
ptr
);
/**
* \brief Total size of used memory in one place.
...
...
@@ -75,13 +75,11 @@ class PODDeleter {
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
public:
explicit
PODDeleter
(
Place
place
,
bool
is_pinned
=
false
)
:
place_
(
place
),
is_pinned_
(
is_pinned
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
),
is_pinned_
);
}
explicit
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
private:
Place
place_
;
bool
is_pinned_
;
};
/**
...
...
paddle/fluid/memory/memory_test.cc
浏览文件 @
158d6c4d
...
...
@@ -141,4 +141,59 @@ TEST(BuddyAllocator, GPUMultAlloc) {
}
}
size_t
align
(
size_t
size
,
paddle
::
platform
::
CUDAPinnedPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Metadata
);
size_t
alignment
=
paddle
::
platform
::
CpuMinChunkSize
();
size_t
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
(
alignment
-
remaining
);
}
TEST
(
BuddyAllocator
,
CUDAPinnedAllocator
)
{
void
*
p
=
nullptr
;
EXPECT_EQ
(
p
,
nullptr
);
paddle
::
platform
::
CUDAPinnedPlace
cpu
;
p
=
paddle
::
memory
::
Alloc
(
cpu
,
4096
);
EXPECT_NE
(
p
,
nullptr
);
paddle
::
platform
::
Place
place
=
cpu
;
EXPECT_EQ
(
paddle
::
memory
::
Used
(
cpu
),
paddle
::
memory
::
memory_usage
(
place
));
paddle
::
memory
::
Free
(
cpu
,
p
);
}
TEST
(
BuddyAllocator
,
CUDAPinnedMultAllocator
)
{
paddle
::
platform
::
CUDAPinnedPlace
cpu
;
std
::
unordered_map
<
void
*
,
size_t
>
ps
;
size_t
total_size
=
paddle
::
memory
::
Used
(
cpu
);
EXPECT_EQ
(
total_size
,
0UL
);
for
(
auto
size
:
{
0
,
128
,
256
,
1024
,
4096
,
16384
,
65536
,
262144
,
1048576
,
4194304
})
{
ps
[
paddle
::
memory
::
Alloc
(
cpu
,
size
)]
=
size
;
// Buddy Allocator doesn't manage too large memory chunk
if
(
paddle
::
memory
::
Used
(
cpu
)
==
total_size
)
continue
;
size_t
aligned_size
=
align
(
size
,
cpu
);
total_size
+=
aligned_size
;
EXPECT_EQ
(
total_size
,
paddle
::
memory
::
Used
(
cpu
));
}
for
(
auto
p
:
ps
)
{
EXPECT_EQ
(
is_aligned
(
p
.
first
),
true
);
paddle
::
memory
::
Free
(
cpu
,
p
.
first
);
// Buddy Allocator doesn't manage too large memory chunk
if
(
paddle
::
memory
::
Used
(
cpu
)
==
total_size
)
continue
;
size_t
aligned_size
=
align
(
p
.
second
,
cpu
);
total_size
-=
aligned_size
;
EXPECT_EQ
(
total_size
,
paddle
::
memory
::
Used
(
cpu
));
}
}
#endif
paddle/fluid/platform/place.cc
浏览文件 @
158d6c4d
...
...
@@ -26,6 +26,7 @@ class PlacePrinter : public boost::static_visitor<> {
void
operator
()(
const
CUDAPlace
&
p
)
{
os_
<<
"CUDAPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
CUDAPinnedPlace
&
p
)
{
os_
<<
"CUDAPinnedPlace"
;
}
private:
std
::
ostream
&
os_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录