Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
e2780623
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e2780623
编写于
10月 10, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add support to old allocator
上级
a5cf565c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
274 addition
and
4 deletion
+274
-4
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+1
-1
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+251
-2
paddle/fluid/memory/malloc.h
paddle/fluid/memory/malloc.h
+21
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
e2780623
add_subdirectory
(
detail
)
add_subdirectory
(
allocation
)
cc_library
(
malloc SRCS malloc.cc DEPS allocator_facade
)
cc_library
(
malloc SRCS malloc.cc DEPS
buddy_allocator place enforce
allocator_facade
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
memory
...
...
paddle/fluid/memory/malloc.cc
浏览文件 @
e2780623
...
...
@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
...
...
@@ -26,17 +30,262 @@ DEFINE_bool(init_allocated_mem, false,
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DEFINE_bool
(
use_legacy_allocator
,
true
,
"Whether to use the legacy allocator. If the new allocators have"
"been well tested, we should remove these flag."
);
namespace
paddle
{
namespace
memory
{
namespace
legacy
{
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
10
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
size_t
Used
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
PADDLE_ENFORCE
(
gpu_id
<
gpu_num
,
"gpu_id:%d should < gpu_num:%d"
,
gpu_id
,
gpu_num
);
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
i
=
0
;
i
<
gpu_num
;
i
++
)
{
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
i
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
});
platform
::
SetDeviceId
(
gpu_id
);
return
a_arr
[
gpu_id
];
}
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
)
{
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
size
<<
" bytes in GPU "
<<
place
.
device
<<
", available "
<<
avail
<<
" bytes"
;
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
buddy_allocator
->
GetMinChunkSize
();
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
buddy_allocator
->
GetMaxChunkSize
();
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
Used
<
platform
::
CUDAPlace
>
(
place
);
platform
::
SetDeviceId
(
cur_dev
);
}
if
(
FLAGS_init_allocated_mem
)
{
cudaMemset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
void
*
p
)
{
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
}
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
std
::
once_flag
init_flag
;
static
BuddyAllocator
*
ba
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
return
ba
;
}
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
)
{
return
GetCUDAPinnedBuddyAllocator
()
->
Used
();
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
size_t
size
)
{
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
LOG
(
WARNING
)
<<
"cudaMallocHost Cannot allocate "
<<
size
<<
" bytes in CUDAPinnedPlace"
;
}
if
(
FLAGS_init_allocated_mem
)
{
memset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
void
*
p
)
{
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
}
#endif
struct
AllocVisitor
:
public
boost
::
static_visitor
<
void
*>
{
inline
explicit
AllocVisitor
(
size_t
size
)
:
size_
(
size
)
{}
template
<
typename
Place
>
inline
void
*
operator
()(
const
Place
&
place
)
const
{
return
Alloc
<
Place
>
(
place
,
size_
);
}
private:
size_t
size_
;
};
struct
FreeVisitor
:
public
boost
::
static_visitor
<
void
>
{
inline
explicit
FreeVisitor
(
void
*
ptr
)
:
ptr_
(
ptr
)
{}
template
<
typename
Place
>
inline
void
operator
()(
const
Place
&
place
)
const
{
Free
<
Place
>
(
place
,
ptr_
);
}
private:
void
*
ptr_
;
};
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
return
Used
(
cpu
);
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
gpu
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
cuda_pinned
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
size_t
memory_usage
(
const
platform
::
Place
&
p
)
{
return
boost
::
apply_visitor
(
Usage
(),
p
);
}
class
LegacyAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
~
LegacyAllocation
()
{
boost
::
apply_visitor
(
FreeVisitor
(
this
->
ptr
()),
this
->
place
());
}
};
}
// namespace legacy
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
if
(
FLAGS_use_legacy_allocator
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
std
::
shared_ptr
<
Allocation
>
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
}
}
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
if
(
FLAGS_use_legacy_allocator
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
std
::
unique_ptr
<
Allocation
>
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
}
}
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/malloc.h
浏览文件 @
e2780623
...
...
@@ -30,5 +30,26 @@ extern std::unique_ptr<Allocation> Alloc(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
namespace
legacy
{
template
<
typename
Place
>
void
*
Alloc
(
const
Place
&
place
,
size_t
size
);
template
<
typename
Place
>
void
Free
(
const
Place
&
place
,
void
*
p
);
template
<
typename
Place
>
size_t
Used
(
const
Place
&
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
}
// namespace legacy
}
// namespace memory
}
// namespace paddle
python/paddle/fluid/__init__.py
浏览文件 @
e2780623
...
...
@@ -113,7 +113,7 @@ def __bootstrap__():
'check_nan_inf'
,
'benchmark'
,
'warpctc_dir'
,
'eager_delete_scope'
,
'use_mkldnn'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'cpu_deterministic'
,
'eager_delete_tensor_gb'
'eager_delete_tensor_gb'
,
'use_legacy_allocator'
]
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录