Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
19e669a9
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
19e669a9
编写于
11月 16, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add legacy_allocator
test=develop
上级
1cb7e7dd
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
374 addition
and
324 deletion
+374
-324
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+1
-1
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+2
-0
paddle/fluid/memory/allocation/allocator.cc
paddle/fluid/memory/allocation/allocator.cc
+1
-5
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+22
-4
paddle/fluid/memory/allocation/buffered_allocator.h
paddle/fluid/memory/allocation/buffered_allocator.h
+0
-6
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+307
-0
paddle/fluid/memory/allocation/legacy_allocator.h
paddle/fluid/memory/allocation/legacy_allocator.h
+37
-0
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+4
-287
paddle/fluid/memory/malloc.h
paddle/fluid/memory/malloc.h
+0
-21
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
19e669a9
add_subdirectory
(
detail
)
add_subdirectory
(
detail
)
add_subdirectory
(
allocation
)
add_subdirectory
(
allocation
)
cc_library
(
malloc SRCS malloc.cc DEPS
buddy_allocator
place enforce allocator_facade
)
cc_library
(
malloc SRCS malloc.cc DEPS place enforce allocator_facade
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
memory
cc_library
(
memory
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
19e669a9
...
@@ -3,6 +3,7 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
...
@@ -3,6 +3,7 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library
(
best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator
)
cc_library
(
best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator
)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
...
@@ -53,6 +54,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
...
@@ -53,6 +54,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
retry_allocator
retry_allocator
buffered_allocator
buffered_allocator
allocator_strategy
allocator_strategy
legacy_allocator
)
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
...
...
paddle/fluid/memory/allocation/allocator.cc
浏览文件 @
19e669a9
...
@@ -37,11 +37,7 @@ const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
...
@@ -37,11 +37,7 @@ const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
auto
*
allocator
=
allocation
->
allocator
();
auto
*
allocator
=
allocation
->
allocator
();
if
(
allocator
)
{
allocator
->
Free
(
allocation
);
allocator
->
Free
(
allocation
);
}
else
{
delete
allocation
;
// Compatible for legacy allocation.
}
}
}
}
// namespace allocation
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
19e669a9
...
@@ -19,10 +19,12 @@
...
@@ -19,10 +19,12 @@
#include <vector>
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
...
@@ -190,13 +192,29 @@ class AllocatorFacadePrivate {
...
@@ -190,13 +192,29 @@ class AllocatorFacadePrivate {
~
AllocatorFacadePrivate
()
=
default
;
~
AllocatorFacadePrivate
()
=
default
;
AllocatorFacadePrivate
()
{
AllocatorFacadePrivate
()
{
InitCPUAllocator
();
if
(
GetAllocatorStrategy
()
==
AllocatorStrategy
::
kLegacy
)
{
InitCUDAAllocator
();
InitLegacyAllocator
();
InitCUDAPinnedAllocator
();
}
else
{
WrapZeroSizeAllocator
();
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
}
}
}
private:
private:
void
InitLegacyAllocator
()
{
std
::
vector
<
platform
::
Place
>
places
{
platform
::
CPUPlace
()};
#ifdef PADDLE_WITH_CUDA
for
(
int
dev_id
=
0
;
dev_id
<
platform
::
GetCUDADeviceCount
();
++
dev_id
)
{
places
.
emplace_back
(
platform
::
CUDAPlace
(
dev_id
));
}
#endif
for
(
auto
&
p
:
places
)
{
allocators_
[
p
]
=
std
::
make_shared
<
LegacyAllocator
>
(
p
);
}
}
void
InitCPUAllocator
()
{
void
InitCPUAllocator
()
{
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
CPUManagedAllocator
>
();
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
CPUManagedAllocator
>
();
}
}
...
...
paddle/fluid/memory/allocation/buffered_allocator.h
浏览文件 @
19e669a9
...
@@ -35,12 +35,6 @@ class BufferedAllocator : public Allocator {
...
@@ -35,12 +35,6 @@ class BufferedAllocator : public Allocator {
~
BufferedAllocator
();
~
BufferedAllocator
();
// std::unique_ptr<Allocation> Allocate(
// size_t size, Allocator::Attr attr = Allocator::Attr::kDefault)
// override;
//
// void FreeUniquePtr(std::unique_ptr<Allocation> allocation) override;
bool
IsAllocThreadSafe
()
const
override
;
bool
IsAllocThreadSafe
()
const
override
;
// only used in unittest
// only used in unittest
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
0 → 100644
浏览文件 @
19e669a9
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
"To find this error in time, we use init_allocated_mem to indicate "
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
namespace
paddle
{
namespace
memory
{
namespace
legacy
{
template
<
typename
Place
>
void
*
Alloc
(
const
Place
&
place
,
size_t
size
);
template
<
typename
Place
>
void
Free
(
const
Place
&
place
,
void
*
p
);
template
<
typename
Place
>
size_t
Used
(
const
Place
&
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
100
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
size_t
Used
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
PADDLE_ENFORCE
(
gpu_id
<
gpu_num
,
"gpu_id:%d should < gpu_num:%d"
,
gpu_id
,
gpu_num
);
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
i
=
0
;
i
<
gpu_num
;
i
++
)
{
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
i
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
100
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
});
platform
::
SetDeviceId
(
gpu_id
);
return
a_arr
[
gpu_id
];
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
string
::
HumanReadableSize
(
size
)
<<
" in GPU "
<<
place
.
device
<<
", available "
<<
string
::
HumanReadableSize
(
avail
);
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMinChunkSize
());
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMaxChunkSize
());
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
string
::
HumanReadableSize
(
Used
<
platform
::
CUDAPlace
>
(
place
));
platform
::
SetDeviceId
(
cur_dev
);
}
if
(
FLAGS_init_allocated_mem
)
{
cudaMemset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
std
::
once_flag
init_flag
;
static
BuddyAllocator
*
ba
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
return
ba
;
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetCUDAPinnedBuddyAllocator
()
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
LOG
(
WARNING
)
<<
"cudaMallocHost Cannot allocate "
<<
size
<<
" bytes in CUDAPinnedPlace"
;
}
if
(
FLAGS_init_allocated_mem
)
{
memset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
struct
AllocVisitor
:
public
boost
::
static_visitor
<
void
*>
{
inline
explicit
AllocVisitor
(
size_t
size
)
:
size_
(
size
)
{}
template
<
typename
Place
>
inline
void
*
operator
()(
const
Place
&
place
)
const
{
return
Alloc
<
Place
>
(
place
,
size_
);
}
private:
size_t
size_
;
};
struct
FreeVisitor
:
public
boost
::
static_visitor
<
void
>
{
inline
explicit
FreeVisitor
(
void
*
ptr
)
:
ptr_
(
ptr
)
{}
template
<
typename
Place
>
inline
void
operator
()(
const
Place
&
place
)
const
{
Free
<
Place
>
(
place
,
ptr_
);
}
private:
void
*
ptr_
;
};
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
return
Used
(
cpu
);
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
gpu
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
cuda_pinned
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
}
// namespace legacy
namespace
allocation
{
Allocation
*
LegacyAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
ptr
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place_
);
return
new
Allocation
(
ptr
,
size
,
place_
);
}
void
LegacyAllocator
::
Free
(
Allocation
*
allocation
)
{
boost
::
apply_visitor
(
legacy
::
FreeVisitor
(
allocation
->
ptr
()),
allocation
->
place
());
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/legacy_allocator.h
0 → 100644
浏览文件 @
19e669a9
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
LegacyAllocatorPrivate
;
class
LegacyAllocator
:
public
Allocator
{
public:
explicit
LegacyAllocator
(
const
platform
::
Place
&
p
)
:
place_
(
p
)
{}
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
private:
platform
::
Place
place_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/malloc.cc
浏览文件 @
19e669a9
...
@@ -12,305 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,305 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/memory/malloc.h"
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
"To find this error in time, we use init_allocated_mem to indicate "
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
namespace
legacy
{
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
100
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
size_t
Used
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
PADDLE_ENFORCE
(
gpu_id
<
gpu_num
,
"gpu_id:%d should < gpu_num:%d"
,
gpu_id
,
gpu_num
);
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
i
=
0
;
i
<
gpu_num
;
i
++
)
{
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
i
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
100
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
});
platform
::
SetDeviceId
(
gpu_id
);
return
a_arr
[
gpu_id
];
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
string
::
HumanReadableSize
(
size
)
<<
" in GPU "
<<
place
.
device
<<
", available "
<<
string
::
HumanReadableSize
(
avail
);
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMinChunkSize
());
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMaxChunkSize
());
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
string
::
HumanReadableSize
(
Used
<
platform
::
CUDAPlace
>
(
place
));
platform
::
SetDeviceId
(
cur_dev
);
}
if
(
FLAGS_init_allocated_mem
)
{
cudaMemset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
std
::
once_flag
init_flag
;
static
BuddyAllocator
*
ba
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
return
ba
;
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetCUDAPinnedBuddyAllocator
()
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
LOG
(
WARNING
)
<<
"cudaMallocHost Cannot allocate "
<<
size
<<
" bytes in CUDAPinnedPlace"
;
}
if
(
FLAGS_init_allocated_mem
)
{
memset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
struct
AllocVisitor
:
public
boost
::
static_visitor
<
void
*>
{
inline
explicit
AllocVisitor
(
size_t
size
)
:
size_
(
size
)
{}
template
<
typename
Place
>
inline
void
*
operator
()(
const
Place
&
place
)
const
{
return
Alloc
<
Place
>
(
place
,
size_
);
}
private:
size_t
size_
;
};
struct
FreeVisitor
:
public
boost
::
static_visitor
<
void
>
{
inline
explicit
FreeVisitor
(
void
*
ptr
)
:
ptr_
(
ptr
)
{}
template
<
typename
Place
>
inline
void
operator
()(
const
Place
&
place
)
const
{
Free
<
Place
>
(
place
,
ptr_
);
}
private:
void
*
ptr_
;
};
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
return
Used
(
cpu
);
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
gpu
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
cuda_pinned
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
class
LegacyAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
~
LegacyAllocation
()
final
{
boost
::
apply_visitor
(
FreeVisitor
(
this
->
ptr
()),
this
->
place
());
}
};
}
// namespace legacy
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
allocation
::
GetAllocatorStrategy
()
==
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
allocation
::
AllocatorStrategy
::
kLegacy
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
std
::
shared_ptr
<
Allocation
>
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
}
}
}
AllocationPtr
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
AllocationPtr
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
Allocator
::
Attr
attr
)
{
if
(
allocation
::
GetAllocatorStrategy
()
==
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
allocation
::
AllocatorStrategy
::
kLegacy
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
AllocationPtr
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
}
}
}
}
// namespace memory
}
// namespace memory
...
...
paddle/fluid/memory/malloc.h
浏览文件 @
19e669a9
...
@@ -30,26 +30,5 @@ extern std::shared_ptr<Allocation> AllocShared(
...
@@ -30,26 +30,5 @@ extern std::shared_ptr<Allocation> AllocShared(
extern
AllocationPtr
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
extern
AllocationPtr
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
namespace
legacy
{
template
<
typename
Place
>
void
*
Alloc
(
const
Place
&
place
,
size_t
size
);
template
<
typename
Place
>
void
Free
(
const
Place
&
place
,
void
*
p
);
template
<
typename
Place
>
size_t
Used
(
const
Place
&
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
}
// namespace legacy
}
// namespace memory
}
// namespace memory
}
// namespace paddle
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录