Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e2780623
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e2780623
编写于
10月 10, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add support to old allocator
上级
a5cf565c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
274 addition
and
4 deletion
+274
-4
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+1
-1
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+251
-2
paddle/fluid/memory/malloc.h
paddle/fluid/memory/malloc.h
+21
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
e2780623
add_subdirectory
(
detail
)
add_subdirectory
(
detail
)
add_subdirectory
(
allocation
)
add_subdirectory
(
allocation
)
cc_library
(
malloc SRCS malloc.cc DEPS allocator_facade
)
cc_library
(
malloc SRCS malloc.cc DEPS
buddy_allocator place enforce
allocator_facade
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
memory
cc_library
(
memory
...
...
paddle/fluid/memory/malloc.cc
浏览文件 @
e2780623
...
@@ -18,6 +18,10 @@ limitations under the License. */
...
@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
"BuddyAllocator are always zeroed in some op's implementation. "
...
@@ -26,17 +30,262 @@ DEFINE_bool(init_allocated_mem, false,
...
@@ -26,17 +30,262 @@ DEFINE_bool(init_allocated_mem, false,
"during unit testing."
);
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DEFINE_bool
(
use_legacy_allocator
,
true
,
"Whether to use the legacy allocator. If the new allocators have"
"been well tested, we should remove these flag."
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
namespace
legacy
{
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
10
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
size_t
Used
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
PADDLE_ENFORCE
(
gpu_id
<
gpu_num
,
"gpu_id:%d should < gpu_num:%d"
,
gpu_id
,
gpu_num
);
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
i
=
0
;
i
<
gpu_num
;
i
++
)
{
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
i
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
});
platform
::
SetDeviceId
(
gpu_id
);
return
a_arr
[
gpu_id
];
}
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
)
{
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
size
<<
" bytes in GPU "
<<
place
.
device
<<
", available "
<<
avail
<<
" bytes"
;
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
buddy_allocator
->
GetMinChunkSize
();
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
buddy_allocator
->
GetMaxChunkSize
();
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
Used
<
platform
::
CUDAPlace
>
(
place
);
platform
::
SetDeviceId
(
cur_dev
);
}
if
(
FLAGS_init_allocated_mem
)
{
cudaMemset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
void
*
p
)
{
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
}
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
std
::
once_flag
init_flag
;
static
BuddyAllocator
*
ba
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
return
ba
;
}
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
)
{
return
GetCUDAPinnedBuddyAllocator
()
->
Used
();
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
LOG
(
WARNING
)
<<
"cudaMallocHost Cannot allocate "
<<
size
<<
" bytes in CUDAPinnedPlace"
;
}
if
(
FLAGS_init_allocated_mem
)
{
memset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
void
*
p
)
{
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
}
#endif
struct
AllocVisitor
:
public
boost
::
static_visitor
<
void
*>
{
inline
explicit
AllocVisitor
(
size_t
size
)
:
size_
(
size
)
{}
template
<
typename
Place
>
inline
void
*
operator
()(
const
Place
&
place
)
const
{
return
Alloc
<
Place
>
(
place
,
size_
);
}
private:
size_t
size_
;
};
struct
FreeVisitor
:
public
boost
::
static_visitor
<
void
>
{
inline
explicit
FreeVisitor
(
void
*
ptr
)
:
ptr_
(
ptr
)
{}
template
<
typename
Place
>
inline
void
operator
()(
const
Place
&
place
)
const
{
Free
<
Place
>
(
place
,
ptr_
);
}
private:
void
*
ptr_
;
};
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
return
Used
(
cpu
);
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
gpu
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
cuda_pinned
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
size_t
memory_usage
(
const
platform
::
Place
&
p
)
{
return
boost
::
apply_visitor
(
Usage
(),
p
);
}
class
LegacyAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
~
LegacyAllocation
()
{
boost
::
apply_visitor
(
FreeVisitor
(
this
->
ptr
()),
this
->
place
());
}
};
}
// namespace legacy
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
size_t
size
,
Allocator
::
Attr
attr
)
{
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
if
(
FLAGS_use_legacy_allocator
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
std
::
shared_ptr
<
Allocation
>
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
}
}
}
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
Allocator
::
Attr
attr
)
{
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
if
(
FLAGS_use_legacy_allocator
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
std
::
unique_ptr
<
Allocation
>
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
}
}
}
}
// namespace memory
}
// namespace memory
}
// namespace paddle
}
// namespace paddle
paddle/fluid/memory/malloc.h
浏览文件 @
e2780623
...
@@ -30,5 +30,26 @@ extern std::unique_ptr<Allocation> Alloc(
...
@@ -30,5 +30,26 @@ extern std::unique_ptr<Allocation> Alloc(
const
platform
::
Place
&
place
,
size_t
size
,
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
namespace
legacy
{
template
<
typename
Place
>
void
*
Alloc
(
const
Place
&
place
,
size_t
size
);
template
<
typename
Place
>
void
Free
(
const
Place
&
place
,
void
*
p
);
template
<
typename
Place
>
size_t
Used
(
const
Place
&
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
}
// namespace legacy
}
// namespace memory
}
// namespace memory
}
// namespace paddle
}
// namespace paddle
python/paddle/fluid/__init__.py
浏览文件 @
e2780623
...
@@ -113,7 +113,7 @@ def __bootstrap__():
...
@@ -113,7 +113,7 @@ def __bootstrap__():
'check_nan_inf'
,
'benchmark'
,
'warpctc_dir'
,
'eager_delete_scope'
,
'check_nan_inf'
,
'benchmark'
,
'warpctc_dir'
,
'eager_delete_scope'
,
'use_mkldnn'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'use_mkldnn'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'cpu_deterministic'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'cpu_deterministic'
,
'eager_delete_tensor_gb'
'eager_delete_tensor_gb'
,
'use_legacy_allocator'
]
]
if
core
.
is_compiled_with_dist
():
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
read_env_flags
.
append
(
'rpc_deadline'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录