Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
2e4a3986
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2e4a3986
编写于
3月 26, 2018
作者:
C
chengduo
提交者:
GitHub
3月 26, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #9216 from chengduoZH/feature/add_pinned_memory
Add pinned memory
上级
c858f489
9e99446e
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
144 addition
and
30 deletion
+144
-30
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+22
-10
paddle/fluid/framework/tensor_impl.h
paddle/fluid/framework/tensor_impl.h
+14
-9
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+44
-0
paddle/fluid/memory/detail/system_allocator.h
paddle/fluid/memory/detail/system_allocator.h
+12
-0
paddle/fluid/memory/memory.cc
paddle/fluid/memory/memory.cc
+46
-7
paddle/fluid/memory/memory.h
paddle/fluid/memory/memory.h
+6
-4
未找到文件。
paddle/fluid/framework/tensor.h
浏览文件 @
2e4a3986
...
...
@@ -45,10 +45,11 @@ class Tensor {
friend
struct
EigenVector
;
public:
Tensor
()
:
offset_
(
0
)
{}
Tensor
()
:
offset_
(
0
)
,
is_pinned_
(
false
)
{}
/*! Constructor with place should only be used in pybind. */
explicit
Tensor
(
const
platform
::
Place
&
place
)
:
offset_
(
0
)
{
explicit
Tensor
(
const
platform
::
Place
&
place
)
:
offset_
(
0
),
is_pinned_
(
false
)
{
holder_
->
set_place
(
place
);
}
...
...
@@ -69,11 +70,12 @@ class Tensor {
* @note If not exist, then allocation.
*/
template
<
typename
T
>
inline
T
*
mutable_data
(
platform
::
Place
place
);
inline
T
*
mutable_data
(
platform
::
Place
place
,
bool
is_pinned
=
false
);
inline
void
*
mutable_data
(
platform
::
Place
place
,
std
::
type_index
type
);
inline
void
*
mutable_data
(
platform
::
Place
place
,
std
::
type_index
type
,
bool
is_pinned
=
false
);
inline
void
*
mutable_data
(
platform
::
Place
place
);
inline
void
*
mutable_data
(
platform
::
Place
place
,
bool
is_pinned
=
false
);
/**
* @brief Return a pointer to mutable memory block.
...
...
@@ -84,7 +86,8 @@ class Tensor {
* @note If not exist, then allocation.
*/
template
<
typename
T
>
inline
T
*
mutable_data
(
DDim
dims
,
platform
::
Place
place
);
inline
T
*
mutable_data
(
DDim
dims
,
platform
::
Place
place
,
bool
is_pinned
=
false
);
/*! Return the dimensions of the memory block. */
inline
const
DDim
&
dims
()
const
;
...
...
@@ -92,6 +95,9 @@ class Tensor {
/*! Return the numel of the memory block. */
inline
int64_t
numel
()
const
;
/*! Return the numel of the memory block. */
inline
bool
isPinned
()
const
;
/*! Resize the dimensions of the memory block. */
inline
Tensor
&
Resize
(
const
DDim
&
dims
);
...
...
@@ -146,12 +152,14 @@ class Tensor {
template
<
typename
Place
>
struct
PlaceholderImpl
:
public
Placeholder
{
PlaceholderImpl
(
Place
place
,
size_t
size
,
std
::
type_index
type
)
:
ptr_
(
static_cast
<
uint8_t
*>
(
memory
::
Alloc
(
place
,
size
)),
memory
::
PODDeleter
<
uint8_t
,
Place
>
(
place
)),
PlaceholderImpl
(
Place
place
,
size_t
size
,
std
::
type_index
type
,
bool
is_pinned
=
false
)
:
ptr_
(
static_cast
<
uint8_t
*>
(
memory
::
Alloc
(
place
,
size
,
is_pinned
)),
memory
::
PODDeleter
<
uint8_t
,
Place
>
(
place
,
is_pinned
)),
place_
(
place
),
size_
(
size
),
type_
(
type
)
{
type_
(
type
),
is_pinned_
(
is_pinned
)
{
PADDLE_ENFORCE_NOT_NULL
(
ptr_
,
"Insufficient %s memory to allocation."
,
(
is_cpu_place
(
place_
)
?
"CPU"
:
"GPU"
));
}
...
...
@@ -174,6 +182,9 @@ class Tensor {
/* the current type of memory */
std
::
type_index
type_
;
/*! use pinned memory or not. */
bool
is_pinned_
;
};
/*! holds the memory block if allocated. */
...
...
@@ -208,6 +219,7 @@ class Tensor {
* PlaceHolder::ptr_ and where the tensor data really begins.
*/
size_t
offset_
;
bool
is_pinned_
;
};
inline
void
Tensor
::
switch_place
(
platform
::
Place
new_place
)
{
...
...
paddle/fluid/framework/tensor_impl.h
浏览文件 @
2e4a3986
...
...
@@ -101,19 +101,21 @@ inline T* Tensor::data() {
}
template
<
typename
T
>
inline
T
*
Tensor
::
mutable_data
(
DDim
dims
,
platform
::
Place
place
)
{
inline
T
*
Tensor
::
mutable_data
(
DDim
dims
,
platform
::
Place
place
,
bool
is_pinned
)
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
Resize
(
dims
);
return
mutable_data
<
T
>
(
place
);
return
mutable_data
<
T
>
(
place
,
is_pinned
);
}
template
<
typename
T
>
inline
T
*
Tensor
::
mutable_data
(
platform
::
Place
place
)
{
inline
T
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
bool
is_pinned
)
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
return
reinterpret_cast
<
T
*>
(
mutable_data
(
place
,
typeid
(
T
)));
return
reinterpret_cast
<
T
*>
(
mutable_data
(
place
,
typeid
(
T
)
,
is_pinned
));
}
inline
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
std
::
type_index
type
)
{
inline
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
std
::
type_index
type
,
bool
is_pinned
)
{
if
(
holder_
!=
nullptr
)
{
holder_
->
set_type
(
type
);
}
...
...
@@ -127,26 +129,27 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
holder_
->
size
()
<
size
+
offset_
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
,
type
));
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
,
type
,
is_pinned
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
}
#else
holder_
.
reset
(
new
PlaceholderImpl
<
platform
::
CUDAPlace
>
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
size
,
type
));
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
size
,
type
,
is_pinned
));
}
#endif
offset_
=
0
;
is_pinned_
=
is_pinned
;
}
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
inline
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
)
{
inline
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
bool
is_pinned
)
{
PADDLE_ENFORCE
(
this
->
holder_
!=
nullptr
,
"Cannot invoke mutable data if current hold nothing"
);
return
mutable_data
(
place
,
holder_
->
type
());
return
mutable_data
(
place
,
holder_
->
type
()
,
is_pinned
);
}
inline
Tensor
&
Tensor
::
ShareDataWith
(
const
Tensor
&
src
)
{
...
...
@@ -188,6 +191,8 @@ inline const DDim& Tensor::dims() const { return dims_; }
inline
int64_t
Tensor
::
numel
()
const
{
return
product
(
dims_
);
}
inline
bool
Tensor
::
isPinned
()
const
{
return
is_pinned_
;
}
inline
Tensor
ReshapeToMatrix
(
const
Tensor
&
src
,
int
num_col_dims
)
{
Tensor
res
;
res
.
ShareDataWith
(
src
);
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
2e4a3986
...
...
@@ -119,6 +119,50 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) {
bool
GPUAllocator
::
UseGpu
()
const
{
return
true
;
}
// PINNED memory allows direct DMA transfers by the GPU to and from system
// memory. It’s locked to a physical address.
void
*
CUDAPinnedAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
if
(
size
<=
0
)
return
nullptr
;
void
*
p
;
// NOTE: here, we use GpuMaxAllocSize() as the maximum memory size
// of host pinned allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging.
size_t
usable
=
paddle
::
platform
::
GpuMaxAllocSize
()
-
fallback_alloc_size_
;
if
(
size
>
usable
)
return
nullptr
;
// PINNED memory is visible to all CUDA contexts.
cudaError_t
result
=
cudaMallocHost
(
&
p
,
size
);
if
(
result
==
cudaSuccess
)
{
index
=
1
;
fallback_alloc_size_
+=
size
;
return
p
;
}
return
nullptr
;
}
void
CUDAPinnedAllocator
::
Free
(
void
*
p
,
size_t
size
,
size_t
index
)
{
cudaError_t
err
;
PADDLE_ASSERT
(
index
==
1
);
PADDLE_ASSERT
(
fallback_alloc_size_
>=
size
);
fallback_alloc_size_
-=
size
;
err
=
cudaFreeHost
(
p
);
// Purposefully allow cudaErrorCudartUnloading, because
// that is returned if you ever call cudaFreeHost after the
// driver has already shutdown. This happens only if the
// process is terminating, in which case we don't care if
// cudaFreeHost succeeds.
if
(
err
!=
cudaErrorCudartUnloading
)
{
PADDLE_ENFORCE
(
err
,
"cudaFreeHost failed in GPUPinnedAllocator::Free."
);
}
}
bool
CUDAPinnedAllocator
::
UseGpu
()
const
{
return
true
;
}
#endif
}
// namespace detail
...
...
paddle/fluid/memory/detail/system_allocator.h
浏览文件 @
2e4a3986
...
...
@@ -51,6 +51,18 @@ class GPUAllocator : public SystemAllocator {
size_t
gpu_alloc_size_
=
0
;
size_t
fallback_alloc_size_
=
0
;
};
class
CUDAPinnedAllocator
:
public
SystemAllocator
{
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
);
virtual
bool
UseGpu
()
const
;
private:
size_t
gpu_alloc_size_
=
0
;
// TODO(zcd): how to define the upper limit of CUDAPinnedMemory?
size_t
fallback_alloc_size_
=
0
;
};
#endif
}
// namespace detail
...
...
paddle/fluid/memory/memory.cc
浏览文件 @
2e4a3986
...
...
@@ -38,7 +38,8 @@ BuddyAllocator* GetCPUBuddyAllocator() {
}
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
)
{
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
,
bool
is_pinned
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
VLOG
(
10
)
<<
" pointer="
<<
p
;
...
...
@@ -46,7 +47,8 @@ void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
void
*
p
)
{
void
Free
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
void
*
p
,
bool
is_pinned
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
...
...
@@ -82,15 +84,47 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
return
as
[
gpu_id
];
}
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
(
int
gpu_id
)
{
static
BuddyAllocator
**
as
=
NULL
;
if
(
as
==
NULL
)
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
as
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
gpu
=
0
;
gpu
<
gpu_num
;
gpu
++
)
{
as
[
gpu
]
=
nullptr
;
}
}
platform
::
SetDeviceId
(
gpu_id
);
if
(
!
as
[
gpu_id
])
{
as
[
gpu_id
]
=
new
BuddyAllocator
(
new
detail
::
CUDAPinnedAllocator
,
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
return
as
[
gpu_id
];
}
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
)
{
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
void
*
Alloc
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
,
size_t
size
,
bool
is_pinned
)
{
void
*
ptr
;
if
(
is_pinned
)
{
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
(
place
.
device
);
ptr
=
buddy_allocator
->
Alloc
(
size
);
}
else
{
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
ptr
=
buddy_allocator
->
Alloc
(
size
);
}
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
...
...
@@ -108,8 +142,13 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
,
void
*
p
)
{
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
void
Free
<
platform
::
CUDAPlace
>
(
platform
::
CUDAPlace
place
,
void
*
p
,
bool
is_pinned
)
{
if
(
is_pinned
)
{
GetCUDAPinnedBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
}
else
{
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
}
}
#endif
...
...
paddle/fluid/memory/memory.h
浏览文件 @
2e4a3986
...
...
@@ -33,7 +33,7 @@ namespace memory {
* address is valid or not.
*/
template
<
typename
Place
>
void
*
Alloc
(
Place
place
,
size_t
size
);
void
*
Alloc
(
Place
place
,
size_t
size
,
bool
is_pinned
=
false
);
/**
* \brief Free memory block in one place.
...
...
@@ -43,7 +43,7 @@ void* Alloc(Place place, size_t size);
*
*/
template
<
typename
Place
>
void
Free
(
Place
place
,
void
*
ptr
);
void
Free
(
Place
place
,
void
*
ptr
,
bool
is_pinned
=
false
);
/**
* \brief Total size of used memory in one place.
...
...
@@ -74,11 +74,13 @@ class PODDeleter {
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
public:
explicit
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
explicit
PODDeleter
(
Place
place
,
bool
is_pinned
=
false
)
:
place_
(
place
),
is_pinned_
(
is_pinned
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
),
is_pinned_
);
}
private:
Place
place_
;
bool
is_pinned_
;
};
/**
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录