Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c20db635
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c20db635
编写于
3月 25, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
split PR
test=develop
上级
c75a8803
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
31 addition
and
1205 deletion
+31
-1205
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+0
-2
paddle/fluid/framework/inlined_vector.h
paddle/fluid/framework/inlined_vector.h
+0
-82
paddle/fluid/framework/inlined_vector_test.cc
paddle/fluid/framework/inlined_vector_test.cc
+0
-53
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+4
-13
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+6
-5
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+3
-52
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+1
-4
paddle/fluid/memory/allocation/allocator_strategy.h
paddle/fluid/memory/allocation/allocator_strategy.h
+1
-1
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
...fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+0
-134
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
.../fluid/memory/allocation/auto_growth_best_fit_allocator.h
+0
-87
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
.../allocation/auto_growth_best_fit_allocator_facade_test.cc
+0
-96
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
.../memory/allocation/auto_growth_best_fit_allocator_test.cc
+0
-70
paddle/fluid/memory/allocation/cpu_allocator.cc
paddle/fluid/memory/allocation/cpu_allocator.cc
+2
-2
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+1
-1
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+12
-6
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
...e/fluid/memory/allocation/multi_bin_buffered_allocator.cc
+0
-300
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
...le/fluid/memory/allocation/multi_bin_buffered_allocator.h
+0
-62
paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
...id/memory/allocation/multi_bin_buffered_allocator_test.cc
+0
-170
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
...memory/allocation/naive_best_fit_allocator_facade_test.cc
+0
-3
paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
...cation/test_multi_bin_buffered_allocator_division_plan.cc
+0
-56
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+0
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-3
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
c20db635
...
...
@@ -202,8 +202,6 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test
(
tuple_test SRCS tuple_test.cc
)
cc_test
(
inlined_vector_test SRCS inlined_vector_test.cc
)
if
(
NOT WIN32
)
cc_test
(
rw_lock_test SRCS rw_lock_test.cc
)
endif
(
NOT WIN32
)
...
...
paddle/fluid/framework/inlined_vector.h
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
,
size_t
N
>
class
InlinedVector
{
static_assert
(
N
>
0
,
"N must be larger than 0"
);
public:
inline
void
push_back
(
const
T
&
item
)
{
if
(
size_
<
N
)
{
head_
[
size_
]
=
item
;
}
else
{
tail_
.
emplace_back
(
item
);
}
++
size_
;
}
inline
void
pop_back
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to pop back element from empty vector."
);
if
(
size_
>
N
)
{
tail_
.
pop_back
();
}
--
size_
;
}
inline
const
T
&
back
()
const
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get back element of empty vector."
);
return
size_
<=
N
?
head_
[
size_
-
1
]
:
tail_
.
back
();
}
inline
T
&
back
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get back element of empty vector."
);
return
size_
<=
N
?
head_
[
size_
-
1
]
:
tail_
.
back
();
}
inline
bool
empty
()
const
{
return
size_
==
0
;
}
inline
size_t
size
()
const
{
return
size_
;
}
// This API can only be used in unittest
T
&
operator
[](
size_t
i
)
{
return
i
<
N
?
head_
[
i
]
:
tail_
[
i
-
N
];
}
const
T
&
operator
[](
size_t
i
)
const
{
return
i
<
N
?
head_
[
i
]
:
tail_
[
i
-
N
];
}
operator
std
::
vector
<
T
>
()
const
{
std
::
vector
<
T
>
ret
;
ret
.
reserve
(
size_
);
for
(
size_t
i
=
0
;
i
<
size_
;
++
i
)
{
ret
.
emplace_back
((
*
this
)[
i
]);
}
return
ret
;
}
private:
T
head_
[
N
];
size_t
size_
{
0
};
std
::
vector
<
T
>
tail_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/inlined_vector_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/inlined_vector.h"
#include <vector>
#include "gtest/gtest.h"
namespace
paddle
{
namespace
framework
{
TEST
(
inlined_stack
,
inlined_stack
)
{
size_t
max_num
=
10
;
InlinedVector
<
size_t
,
5
>
stack
;
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
.
size
(),
i
);
stack
.
push_back
(
i
);
ASSERT_EQ
(
stack
.
size
(),
i
+
1
);
}
std
::
vector
<
size_t
>
vec
=
stack
;
ASSERT_EQ
(
stack
.
size
(),
vec
.
size
());
for
(
size_t
i
=
0
;
i
<
vec
.
size
();
++
i
)
{
ASSERT_EQ
(
stack
[
i
],
vec
[
i
]);
}
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
[
i
],
i
);
}
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
.
back
(),
max_num
-
1
-
i
);
stack
.
pop_back
();
ASSERT_EQ
(
stack
.
size
(),
max_num
-
1
-
i
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
c20db635
...
...
@@ -3,18 +3,9 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library
(
best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator
)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator gflags
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
cc_test
(
multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
if
(
NOT WIN32
)
cc_test
(
test_multi_bin_buffered_allocator_division_plan SRCS test_multi_bin_buffered_allocator_division_plan.cc DEPS multi_bin_buffered_allocator
)
endif
()
if
(
WITH_GPU
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
...
...
@@ -47,7 +38,7 @@ else ()
set
(
AllocatorFacadeDeps
)
endif
()
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator
multi_bin_buffered_allocator auto_growth_best_fit_allocator
legacy_allocator zero_size_allocator
)
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator
)
cc_library
(
aligned_allocator SRCS aligned_allocator.cc DEPS allocator
)
cc_library
(
auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator
)
...
...
@@ -59,8 +50,8 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
c20db635
...
...
@@ -17,7 +17,6 @@
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -50,7 +49,9 @@ class Allocator;
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{}
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{
decorated_allocators_
.
reserve
(
8
);
}
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
...
...
@@ -80,8 +81,8 @@ class Allocation {
virtual
~
Allocation
();
private:
std
::
vector
<
Allocator
*>
DecoratedAllocators
()
const
{
return
static_cast
<
std
::
vector
<
Allocator
*>>
(
decorated_allocators_
)
;
const
std
::
vector
<
Allocator
*>&
DecoratedAllocators
()
const
{
return
decorated_allocators_
;
}
inline
void
RegisterDecoratedAllocator
(
Allocator
*
allocator
)
{
...
...
@@ -98,7 +99,7 @@ class Allocation {
void
*
ptr_
;
size_t
size_
;
platform
::
Place
place_
;
framework
::
InlinedVector
<
Allocator
*
,
8
>
decorated_allocators_
;
std
::
vector
<
Allocator
*
>
decorated_allocators_
;
friend
class
Allocator
;
friend
class
AllocationDeleter
;
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
c20db635
...
...
@@ -22,14 +22,12 @@
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
...
...
@@ -47,24 +45,18 @@ DEFINE_int64(
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"
);
DEFINE_bool
(
enable_buffered_allocator
,
false
,
"Enable buffered_allocator"
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAndBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
,
bool
enable_buffered
)
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
)
{
if
(
retry_time
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time
);
allocator
.
reset
(
retry_allocator
);
}
if
(
enable_buffered
)
{
allocator
.
reset
(
new
MultiBinBufferedAllocator
(
allocator
));
}
return
allocator
;
}
...
...
@@ -134,8 +126,7 @@ class ChunkedAllocator : public Allocator {
std
::
shared_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
allocator
=
WrapRetryAndBufferedAllocator
(
allocator
,
retry_time_
,
FLAGS_enable_buffered_allocator
);
allocator
=
WrapRetryAllocator
(
allocator
,
retry_time_
);
return
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
move
(
allocator
));
}
...
...
@@ -219,13 +210,6 @@ class AllocatorFacadePrivate {
WrapZeroSizeAllocator
();
break
;
}
case
AllocatorStrategy
::
kAutoGrowthBestFit
:
{
InitAutoGrowthCPUAllocator
();
InitAutoGrowthCUDAAllocator
();
InitAutoGrowthCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported allocator strategy: %d"
,
static_cast
<
int
>
(
strategy
));
...
...
@@ -234,39 +218,6 @@ class AllocatorFacadePrivate {
}
private:
void
InitAutoGrowthCPUAllocator
()
{
auto
cpu_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUAllocator
>
());
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cpu_allocator
,
platform
::
CpuMaxChunkSize
(),
4096
);
}
void
InitAutoGrowthCUDAAllocator
()
{
#ifdef PADDLE_WITH_CUDA
int
dev_cnt
=
platform
::
GetCUDADeviceCount
();
for
(
int
dev_id
=
0
;
dev_id
<
dev_cnt
;
++
dev_id
)
{
auto
cuda_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CUDAAllocator
>
(
platform
::
CUDAPlace
(
dev_id
)));
auto
allocator
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cuda_allocator
,
platform
::
GpuMaxChunkSize
(),
4096
);
allocators_
[
platform
::
CUDAPlace
(
dev_id
)]
=
WrapRetryAndBufferedAllocator
(
allocator
,
FLAGS_gpu_allocator_retry_time
,
false
);
}
#endif
}
void
InitAutoGrowthCUDAPinnedAllocator
()
{
#ifdef PADDLE_WITH_CUDA
auto
cuda_pinned_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUPinnedAllocator
>
());
allocators_
[
platform
::
CUDAPinnedPlace
()]
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cuda_pinned_allocator
,
platform
::
CUDAPinnedMaxChunkSize
(),
4096
);
#endif
}
void
InitLegacyAllocator
()
{
std
::
vector
<
platform
::
Place
>
places
{
platform
::
CPUPlace
()};
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
c20db635
...
...
@@ -20,8 +20,7 @@ DEFINE_string(
allocator_strategy
,
"legacy"
,
"The allocation strategy. Legacy means the original allocator of Fluid."
"naive_best_fit means the experimental best fit allocator. "
"auto_growth_best_fit means the experimental auto growth best fit "
"allocator. Enum in [legacy, naive_best_fit, auto_growth_best_fit]."
);
"allocator. Enum in [legacy, naive_best_fit]."
);
namespace
paddle
{
namespace
memory
{
...
...
@@ -32,8 +31,6 @@ static AllocatorStrategy GetStrategyFromFlag() {
return
AllocatorStrategy
::
kLegacy
;
}
else
if
(
FLAGS_allocator_strategy
==
"naive_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
if
(
FLAGS_allocator_strategy
==
"auto_growth_best_fit"
)
{
return
AllocatorStrategy
::
kAutoGrowthBestFit
;
}
else
{
PADDLE_THROW
(
"Unsupported allocator strategy: %s"
,
FLAGS_allocator_strategy
);
...
...
paddle/fluid/memory/allocation/allocator_strategy.h
浏览文件 @
c20db635
...
...
@@ -18,7 +18,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
enum
class
AllocatorStrategy
{
kLegacy
,
kNaiveBestFit
,
kAutoGrowthBestFit
};
enum
class
AllocatorStrategy
{
kLegacy
,
kNaiveBestFit
};
extern
AllocatorStrategy
GetAllocatorStrategy
();
...
...
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include <algorithm>
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <unordered_map>
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
size_t
align
(
size_t
size
,
size_t
alignment
)
{
auto
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
AutoGrowthBestFitAllocator
::
AutoGrowthBestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
)
:
underlying_allocator_
(
underlying_allocator
),
chunk_size_
(
align
(
chunk_size
,
alignment
)),
alignment_
(
alignment
)
{}
Allocation
*
AutoGrowthBestFitAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
size
=
align
(
size
,
alignment_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
auto
iter
=
free_blocks_
.
lower_bound
(
std
::
make_pair
(
size
,
nullptr
));
BlockIt
block_it
;
if
(
iter
!=
free_blocks_
.
end
())
{
VLOG
(
2
)
<<
"Found "
<<
iter
->
second
->
size_
<<
" for "
<<
size
;
block_it
=
iter
->
second
;
free_blocks_
.
erase
(
iter
);
auto
*
chunk
=
block_it
->
chunk_
;
size_t
remaining_size
=
block_it
->
size_
-
size
;
if
(
remaining_size
==
0
)
{
block_it
->
is_free_
=
false
;
VLOG
(
2
)
<<
"Found and no remaining"
;
}
else
{
auto
remaining_free_block
=
chunk
->
blocks_
.
insert
(
block_it
,
Chunk
::
Block
(
block_it
->
ptr_
,
remaining_size
,
true
,
chunk
));
free_blocks_
.
emplace
(
std
::
make_pair
(
remaining_size
,
block_it
->
ptr_
),
remaining_free_block
);
block_it
->
ptr_
=
reinterpret_cast
<
uint8_t
*>
(
block_it
->
ptr_
)
+
remaining_size
;
block_it
->
size_
=
size
;
block_it
->
is_free_
=
false
;
VLOG
(
2
)
<<
"Found and remaining "
<<
remaining_size
;
}
}
else
{
size_t
alloc_size
=
size
;
if
(
!
underlying_allocator_exhaustive_
&&
chunk_size_
>
size
)
{
alloc_size
=
chunk_size_
;
}
try
{
chunks_
.
emplace_back
(
underlying_allocator_
->
Allocate
(
alloc_size
,
attr
));
}
catch
(
BadAlloc
&
ex
)
{
if
(
size
==
alloc_size
)
throw
ex
;
underlying_allocator_exhaustive_
=
true
;
alloc_size
=
size
;
chunks_
.
emplace_back
(
underlying_allocator_
->
Allocate
(
alloc_size
,
attr
));
}
auto
*
chunk
=
&
(
*
chunks_
.
rbegin
());
uint8_t
*
p
=
reinterpret_cast
<
uint8_t
*>
(
chunk
->
allocation_
->
ptr
());
auto
&
blocks
=
chunk
->
blocks_
;
size_t
remaining_size
=
alloc_size
-
size
;
if
(
remaining_size
>
0
)
{
blocks
.
emplace_back
(
p
,
remaining_size
,
true
,
chunk
);
free_blocks_
.
emplace
(
std
::
make_pair
(
remaining_size
,
p
),
--
(
blocks
.
end
()));
}
blocks
.
emplace_back
(
p
+
remaining_size
,
size
,
false
,
chunk
);
block_it
=
--
(
blocks
.
end
());
VLOG
(
2
)
<<
"Not found and allocate "
<<
alloc_size
<<
", and remaining "
<<
remaining_size
;
}
VLOG
(
2
)
<<
"After allocate, free blocks "
<<
free_blocks_
.
size
();
return
new
Chunk
::
BlockAllocation
(
block_it
);
}
void
AutoGrowthBestFitAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
auto
&
block_it
=
static_cast
<
Chunk
::
BlockAllocation
*>
(
allocation
)
->
block_it_
;
auto
&
blocks
=
block_it
->
chunk_
->
blocks_
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
block_it
->
is_free_
=
true
;
if
(
block_it
!=
blocks
.
begin
())
{
auto
prev_it
=
block_it
;
--
prev_it
;
if
(
prev_it
->
is_free_
)
{
free_blocks_
.
erase
(
std
::
make_pair
(
prev_it
->
size_
,
prev_it
->
ptr_
));
prev_it
->
size_
+=
block_it
->
size_
;
blocks
.
erase
(
block_it
);
block_it
=
prev_it
;
}
}
auto
next_it
=
block_it
;
++
next_it
;
if
(
next_it
!=
blocks
.
end
()
&&
next_it
->
is_free_
)
{
free_blocks_
.
erase
(
std
::
make_pair
(
next_it
->
size_
,
next_it
->
ptr_
));
block_it
->
size_
+=
next_it
->
size_
;
blocks
.
erase
(
next_it
);
}
free_blocks_
.
emplace
(
std
::
make_pair
(
block_it
->
size_
,
block_it
->
ptr_
),
block_it
);
VLOG
(
2
)
<<
"Combine "
<<
block_it
->
size_
<<
", "
<<
blocks
.
size
()
<<
", "
<<
free_blocks_
.
size
();
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
AutoGrowthBestFitAllocator
:
public
Allocator
{
public:
explicit
AutoGrowthBestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
);
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
using
AllocationList
=
std
::
list
<
AllocationPtr
>
;
using
AllocationListIt
=
AllocationList
::
iterator
;
struct
Chunk
{
struct
Block
{
Block
(
void
*
ptr
,
size_t
size
,
bool
is_free
,
Chunk
*
chunk
)
:
ptr_
(
ptr
),
size_
(
size
),
is_free_
(
is_free
),
chunk_
(
chunk
)
{}
void
*
ptr_
;
size_t
size_
;
bool
is_free_
;
Chunk
*
chunk_
;
// which chunk it is from
};
explicit
Chunk
(
AllocationPtr
allocation
)
:
allocation_
(
std
::
move
(
allocation
))
{}
AllocationPtr
allocation_
;
std
::
list
<
Block
>
blocks_
;
// std::mutex mtx_;
struct
BlockAllocation
:
public
Allocation
{
explicit
BlockAllocation
(
const
std
::
list
<
Block
>::
iterator
&
it
)
:
Allocation
(
it
->
ptr_
,
it
->
size_
,
it
->
chunk_
->
allocation_
->
place
()),
block_it_
(
it
)
{}
std
::
list
<
Block
>::
iterator
block_it_
;
};
};
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
using
BlockIt
=
std
::
list
<
Chunk
::
Block
>::
iterator
;
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
std
::
list
<
Chunk
>
chunks_
;
std
::
map
<
std
::
pair
<
size_t
,
void
*>
,
BlockIt
>
free_blocks_
;
size_t
chunk_size_
;
size_t
alignment_
;
bool
underlying_allocator_exhaustive_
{
false
};
mutable
std
::
mutex
mtx_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
inline
size_t
AlignTo
(
size_t
size
,
size_t
alignment
=
4096
)
{
auto
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"auto_growth_best_fit"
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
size_t
size
=
1024
;
platform
::
Place
place
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
AlignTo
(
size
));
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
AlignTo
(
size
));
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
AlignTo
(
size
));
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
AlignTo
(
size
));
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <condition_variable> // NOLINT
#include <mutex> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <iostream>
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
auto_growth_best_fit_allocator
)
{
auto
cpu_allocator
=
std
::
make_shared
<
CPUAllocator
>
();
auto
allocator
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cpu_allocator
,
0
,
4096
);
std
::
mutex
mtx
;
std
::
condition_variable
cv
;
bool
flag
=
false
;
auto
thread_main
=
[
&
]
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mtx
);
cv
.
wait
(
lock
,
[
&
]
{
return
flag
;
});
}
for
(
size_t
i
=
10
;
i
>
0
;
--
i
)
{
allocator
->
Allocate
((
i
+
1
)
*
1000
);
}
};
std
::
vector
<
std
::
thread
>
ths
;
for
(
size_t
i
=
10
;
i
<
10
;
++
i
)
{
ths
.
emplace_back
(
thread_main
);
}
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx
);
flag
=
true
;
}
cv
.
notify_all
();
for
(
auto
&
th
:
ths
)
{
th
.
join
();
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cpu_allocator.cc
浏览文件 @
c20db635
...
...
@@ -35,9 +35,9 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
Allocation
*
CPUAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
p
;
#ifdef _WIN32
p
=
_aligned_malloc
(
size
,
4096
);
p
=
_aligned_malloc
(
size
,
kAlignment
);
#else
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
4096
,
size
),
0
,
"Alloc %ld error!"
,
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
kAlignment
,
size
),
0
,
"Alloc %ld error!"
,
size
);
#endif
return
new
Allocation
(
p
,
size
,
platform
::
CPUPlace
());
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
c20db635
...
...
@@ -33,7 +33,7 @@ namespace allocation {
// an open-sourced allocator into Paddle.
class
CPUAllocator
:
public
Allocator
{
public:
constexpr
static
size_t
kAlignment
=
64u
;
constexpr
static
size_t
kAlignment
=
4096UL
;
bool
IsAllocThreadSafe
()
const
override
;
protected:
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
c20db635
...
...
@@ -148,12 +148,18 @@ class GPUBuddyAllocatorList {
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
"You can set GFlags environment variable "
<<
"'FLAGS_fraction_of_gpu_memory_to_use' "
<<
"or 'FLAGS_initial_gpu_memory_in_mb' "
<<
"or 'FLAGS_reallocate_gpu_memory_in_mb' "
<<
"to change the memory size for GPU usage.
\n
"
<<
"Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
<<
FLAGS_fraction_of_gpu_memory_to_use
<<
". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
<<
"
\n\n
"
;
});
return
allocators_
[
dev_id
];
}
...
...
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <algorithm>
#include <cctype>
#include <fstream>
#include <limits>
#include <mutex> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include "paddle/fluid/platform/lock_guard_ptr.h"
DEFINE_double
(
buffered_allocator_excess_times
,
2
,
"Excess memory size times of buffered_allocator. BufferedAllocator"
" would try to reuse memory freed previously, but the size of freed"
" allocation may not be exactly the same as the requested. Here, we"
" use a flag to control the excess times of reused memory size. "
"Not quite sure what is the best excess times value."
);
DEFINE_string
(
buffered_allocator_division_plan_path
,
""
,
"The file path which "
"determines the memory size division plans of BufferedAllocator."
"If it is empty, use the default division plan. The file must be a "
"text file which each lines indicates the bound of division plan. "
"For example, if the text file has 3 lines, which are '500M', '1G', "
" '2G', the division plan would be [0, 500M), [500M, 1G), [1G, 2G) "
"and [2G, +inf). Allocation request whose requested memory size is "
"inside the last interval of division plan would be dispatched to "
" underlying_allocator directly without caching when freed."
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
std
::
string
TrimStringAndToUpperCase
(
const
std
::
string
&
str
)
{
auto
not_space
=
[](
char
ch
)
{
return
std
::
isspace
(
ch
)
==
0
;
};
auto
first_idx
=
static_cast
<
size_t
>
(
std
::
find_if
(
str
.
begin
(),
str
.
end
(),
not_space
)
-
str
.
begin
());
auto
last_idx
=
static_cast
<
size_t
>
(
std
::
find_if
(
str
.
rbegin
(),
str
.
rend
(),
not_space
)
-
str
.
rbegin
());
if
(
first_idx
==
str
.
size
()
||
last_idx
==
str
.
size
())
return
""
;
last_idx
=
str
.
size
()
-
last_idx
;
auto
ret
=
str
.
substr
(
first_idx
,
last_idx
-
first_idx
);
std
::
for_each
(
ret
.
begin
(),
ret
.
end
(),
[](
char
&
ch
)
{
ch
=
std
::
toupper
(
ch
);
});
return
ret
;
}
namespace
{
enum
DivisionPlanFileStatus
{
kEOF
,
kException
,
kNormal
};
}
// NOLINT
static
size_t
ParseStringToBytes
(
const
std
::
string
&
original_str
,
DivisionPlanFileStatus
*
ret_code
)
{
std
::
string
str
=
TrimStringAndToUpperCase
(
original_str
);
if
(
str
.
empty
())
{
*
ret_code
=
kEOF
;
return
0
;
}
if
(
str
.
back
()
==
'B'
)
{
str
.
pop_back
();
if
(
str
.
empty
())
{
*
ret_code
=
kException
;
return
0
;
}
}
size_t
multiples
=
1
;
switch
(
str
.
back
())
{
case
'G'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
30
);
break
;
case
'M'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
20
);
break
;
case
'K'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
10
);
break
;
default:
break
;
}
if
(
multiples
!=
1
)
{
str
.
pop_back
();
if
(
str
.
empty
())
{
*
ret_code
=
kException
;
return
0
;
}
}
str
=
TrimStringAndToUpperCase
(
str
);
double
mem_val
=
-
1.0
;
std
::
stringstream
ss
(
str
);
if
(
!
(
ss
>>
mem_val
)
||
mem_val
<
0
)
{
*
ret_code
=
kException
;
return
0
;
}
*
ret_code
=
kNormal
;
return
static_cast
<
size_t
>
(
mem_val
*
multiples
);
}
static
std
::
string
GetDebugStringOfPlan
(
const
std
::
vector
<
size_t
>
&
plan
)
{
std
::
string
ret
(
"["
);
for
(
auto
sz
:
plan
)
{
ret
+=
string
::
HumanReadableSize
(
sz
);
ret
+=
", "
;
}
return
ret
+
"]"
;
}
std
::
vector
<
size_t
>
ReadBufferedAllocatorDivisionPlanFromFile
(
const
std
::
string
&
filepath
)
{
std
::
ifstream
is
(
filepath
.
c_str
());
PADDLE_ENFORCE
(
is
.
good
(),
"File %s not exist"
,
filepath
);
std
::
string
str
;
std
::
vector
<
size_t
>
plan
;
size_t
line_num
=
1
;
while
(
std
::
getline
(
is
,
str
).
good
())
{
DivisionPlanFileStatus
status
;
size_t
ret
=
ParseStringToBytes
(
str
,
&
status
);
if
(
status
==
kEOF
)
{
break
;
}
if
(
status
==
kException
)
{
PADDLE_THROW
(
"Invalid format in line %d of file %s: '%s'. Only support B, KB, MB, "
"GB."
,
line_num
,
filepath
,
str
);
}
plan
.
push_back
(
ret
);
++
line_num
;
}
return
plan
;
}
static
void
CheckAndModifyMemoryDivisionPlan
(
std
::
vector
<
size_t
>
*
division_plan
)
{
// Check whether the division plan is strictly sorted
bool
is_strictly_sorted
=
true
;
for
(
size_t
i
=
1
;
i
<
division_plan
->
size
();
++
i
)
{
if
((
*
division_plan
)[
i
-
1
]
>=
(
*
division_plan
)[
i
])
{
is_strictly_sorted
=
false
;
break
;
}
}
PADDLE_ENFORCE
(
is_strictly_sorted
,
"Divison plan must be stricted sorted"
);
// Insert 0 to disivion plan for clean binary searching code
if
(
division_plan
->
empty
()
||
division_plan
->
front
()
!=
0
)
{
division_plan
->
insert
(
division_plan
->
begin
(),
0
);
}
// Remove MAX from disivion plan for clean binary searching code
constexpr
auto
kSizeTypeMax
=
std
::
numeric_limits
<
size_t
>::
max
();
if
(
division_plan
->
back
()
==
kSizeTypeMax
)
{
division_plan
->
pop_back
();
}
PADDLE_ENFORCE
(
division_plan
->
size
()
>=
1
,
"Division plan cannot be empty"
);
}
static
std
::
vector
<
size_t
>
GetDefaultDivisionPlan
()
{
if
(
!
FLAGS_buffered_allocator_division_plan_path
.
empty
())
{
return
ReadBufferedAllocatorDivisionPlanFromFile
(
FLAGS_buffered_allocator_division_plan_path
);
}
// Default division plan is 4K, 8K, 16K, ..., 500M, 1G
constexpr
size_t
kMaxLogSize
=
30
;
std
::
vector
<
size_t
>
plan
;
for
(
size_t
i
=
12
;
i
<=
kMaxLogSize
;
++
i
)
{
plan
.
push_back
(
static_cast
<
size_t
>
(
1
)
<<
i
);
}
return
plan
;
}
inline
static
size_t
FindDivisionPlanBinIndex
(
const
std
::
vector
<
size_t
>
&
bins
,
size_t
size
)
{
return
static_cast
<
size_t
>
(
std
::
upper_bound
(
bins
.
begin
(),
bins
.
end
(),
size
)
-
bins
.
begin
()
-
1
);
}
inline
static
size_t
TolerantUpperSize
(
size_t
size
)
{
return
static_cast
<
size_t
>
(
size
*
FLAGS_buffered_allocator_excess_times
);
}
MultiBinBufferedAllocator
::
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
)
:
MultiBinBufferedAllocator
(
std
::
move
(
underlying_allocator
),
GetDefaultDivisionPlan
())
{}
MultiBinBufferedAllocator
::
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
const
std
::
vector
<
size_t
>
&
division_plan
)
:
underlying_allocator_
(
std
::
move
(
underlying_allocator
)),
division_plan_
(
division_plan
)
{
CheckAndModifyMemoryDivisionPlan
(
&
division_plan_
);
allocations_
.
resize
(
division_plan_
.
size
()
-
1
);
accumulated_cache_size_
.
assign
(
division_plan_
.
size
()
-
1
,
0UL
);
mtx_
.
resize
(
division_plan_
.
size
()
-
1
);
if
(
underlying_allocator_
->
IsAllocThreadSafe
())
{
for
(
auto
&
mtx
:
mtx_
)
{
mtx
.
reset
(
new
std
::
mutex
());
}
}
VLOG
(
1
)
<<
"Division plan is: "
<<
GetDebugStringOfPlan
(
division_plan_
);
VLOG
(
1
)
<<
"FLAGS_buffered_allocator_excess_times = "
<<
FLAGS_buffered_allocator_excess_times
;
}
void
MultiBinBufferedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
auto
bin_index
=
FindDivisionPlanBinIndex
(
division_plan_
,
allocation
->
size
());
if
(
bin_index
<
allocations_
.
size
())
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
[
bin_index
]);
allocations_
[
bin_index
].
emplace
(
allocation
->
size
(),
AllocationPtr
(
allocation
));
accumulated_cache_size_
[
bin_index
]
+=
allocation
->
size
();
}
else
{
underlying_allocator_
->
Free
(
allocation
);
}
}
// Maybe we can design more flexible FreeCache strategy based on bin_index
// and require size.
size_t
MultiBinBufferedAllocator
::
ClearCache
()
{
size_t
accumulated_size
=
0
;
// FIXME(zjl): free the largest first when there is no extra
for
(
size_t
i
=
allocations_
.
size
()
-
1
;
i
!=
static_cast
<
size_t
>
(
-
1
);
--
i
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
lock
(
mtx_
[
i
]);
allocations_
[
i
].
clear
();
accumulated_size
+=
accumulated_cache_size_
[
i
];
accumulated_cache_size_
[
i
]
=
0
;
}
return
accumulated_size
;
}
Allocation
*
MultiBinBufferedAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
auto
bin_index
=
FindDivisionPlanBinIndex
(
division_plan_
,
size
);
auto
upper_size
=
TolerantUpperSize
(
size
);
for
(;
bin_index
<
allocations_
.
size
()
&&
upper_size
>=
division_plan_
[
bin_index
];
++
bin_index
)
{
auto
&
allocation
=
allocations_
[
bin_index
];
platform
::
LockGuardPtr
<
std
::
mutex
>
lock
(
mtx_
[
bin_index
]);
auto
it
=
allocation
.
lower_bound
(
size
);
if
(
it
!=
allocation
.
end
()
&&
it
->
second
->
size
()
<=
upper_size
)
{
size_t
sz
=
it
->
second
->
size
();
auto
ret
=
std
::
move
(
it
->
second
);
allocation
.
erase
(
it
);
accumulated_cache_size_
[
bin_index
]
-=
sz
;
VLOG
(
3
)
<<
"Allocate "
<<
sz
<<
"(required "
<<
size
<<
") from cache directly"
;
return
ret
.
release
();
}
}
size_t
retry_time
=
1
;
while
(
true
)
{
try
{
auto
ret
=
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
VLOG
(
2
)
<<
"Allocate "
<<
size
<<
" from underlying directly"
;
return
ret
;
}
catch
(
BadAlloc
&
)
{
size_t
actual_free_size
=
ClearCache
();
VLOG
(
1
)
<<
retry_time
<<
"-th free "
<<
actual_free_size
<<
" bytes caches"
;
if
(
actual_free_size
==
0
)
throw
;
}
++
retry_time
;
}
}
void
UseMultiBinBufferedAllocatorGFlags
()
{}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
std
::
vector
<
size_t
>
ReadBufferedAllocatorDivisionPlanFromFile
(
const
std
::
string
&
filepath
);
class
MultiBinBufferedAllocator
:
public
Allocator
{
public:
explicit
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
);
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
const
std
::
vector
<
size_t
>&
division_plan
);
bool
IsAllocThreadSafe
()
const
override
{
return
mtx_
.
front
()
!=
nullptr
;
}
size_t
ClearCache
();
const
std
::
vector
<
size_t
>&
DivisionPlan
()
const
{
return
division_plan_
;
}
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
std
::
vector
<
std
::
multimap
<
size_t
,
AllocationPtr
>>
allocations_
;
std
::
vector
<
size_t
>
accumulated_cache_size_
;
std
::
vector
<
size_t
>
division_plan_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
mutex
>>
mtx_
;
};
extern
void
UseMultiBinBufferedAllocatorGFlags
();
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
inline
std
::
shared_ptr
<
MultiBinBufferedAllocator
>
GetBufferedAllocator
(
Allocation
*
allocation
,
bool
thread_safe
)
{
std
::
shared_ptr
<
Allocator
>
allocator
(
new
BestFitAllocator
(
allocation
));
if
(
thread_safe
)
{
allocator
.
reset
(
new
LockedAllocator
(
std
::
move
(
allocator
)));
}
return
std
::
make_shared
<
MultiBinBufferedAllocator
>
(
allocator
);
}
TEST
(
buffered_allocator
,
thread_safety
)
{
std
::
unique_ptr
<
CPUAllocator
>
allocator
(
new
CPUAllocator
());
auto
chunk
=
allocator
->
Allocate
(
1
<<
20
,
allocator
->
kDefault
);
{
auto
buf_allocator
=
GetBufferedAllocator
(
chunk
.
get
(),
true
);
ASSERT_EQ
(
buf_allocator
->
IsAllocThreadSafe
(),
true
);
}
{
auto
buf_allocator
=
GetBufferedAllocator
(
chunk
.
get
(),
false
);
ASSERT_EQ
(
buf_allocator
->
IsAllocThreadSafe
(),
false
);
}
}
class
StubAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
};
class
StubAllocator
:
public
Allocator
{
public:
void
ResetCounter
()
{
construct_count_
=
0
;
destruct_count_
=
0
;
}
size_t
GetAllocCount
()
const
{
return
construct_count_
;
}
size_t
GetFreeCount
()
const
{
return
destruct_count_
;
}
protected:
void
FreeImpl
(
Allocation
*
allocation
)
override
{
auto
*
alloc
=
dynamic_cast
<
StubAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
alloc
);
if
(
alloc
->
ptr
())
delete
[]
static_cast
<
uint8_t
*>
(
alloc
->
ptr
());
++
destruct_count_
;
delete
allocation
;
}
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
{
++
construct_count_
;
if
(
size
==
0
)
{
return
new
StubAllocation
(
nullptr
,
0
,
platform
::
CPUPlace
());
}
else
{
return
new
StubAllocation
(
new
uint8_t
[
size
],
size
,
platform
::
CPUPlace
());
}
}
private:
size_t
construct_count_
=
0
;
size_t
destruct_count_
=
0
;
};
constexpr
size_t
kZero
=
0
;
constexpr
size_t
kOne
=
1
;
constexpr
size_t
kTwo
=
2
;
TEST
(
buffered_allocator
,
lazy_free
)
{
std
::
vector
<
int
>
original_alloc_size
({
1022
,
1023
,
1024
,
1025
,
1026
});
for
(
auto
alloc_size
:
original_alloc_size
)
{
auto
stub_allocator
=
std
::
make_shared
<
StubAllocator
>
();
auto
*
underlying_allocator
=
stub_allocator
.
get
();
auto
allocator
=
std
::
make_shared
<
MultiBinBufferedAllocator
>
(
stub_allocator
);
{
underlying_allocator
->
ResetCounter
();
auto
x
=
allocator
->
Allocate
(
alloc_size
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
x
=
nullptr
;
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
{
underlying_allocator
->
ResetCounter
();
auto
x
=
allocator
->
Allocate
(
900
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kZero
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
auto
y
=
allocator
->
Allocate
(
2048
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
x
=
nullptr
;
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
y
=
nullptr
;
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
{
underlying_allocator
->
ResetCounter
();
size_t
cache_size
=
allocator
->
ClearCache
();
ASSERT_EQ
(
cache_size
,
static_cast
<
size_t
>
(
alloc_size
+
2048
));
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kZero
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kTwo
);
}
{
underlying_allocator
->
ResetCounter
();
auto
p
=
allocator
->
Allocate
(
allocator
->
DivisionPlan
().
back
(),
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kOne
);
{
underlying_allocator
->
ResetCounter
();
auto
p
=
allocator
->
Allocate
(
allocator
->
DivisionPlan
().
back
()
-
1
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
}
TEST
(
buffered_allocator
,
garbage_collection
)
{
std
::
unique_ptr
<
CPUAllocator
>
cpu_allocator
(
new
CPUAllocator
());
auto
chunk
=
cpu_allocator
->
Allocate
(
2048
,
cpu_allocator
->
kDefault
);
auto
allocator
=
GetBufferedAllocator
(
chunk
.
get
(),
false
);
auto
x1
=
allocator
->
Allocate
(
1600
,
allocator
->
kDefault
);
auto
x2
=
allocator
->
Allocate
(
400
,
allocator
->
kDefault
);
x1
=
nullptr
;
x2
=
nullptr
;
auto
x3
=
allocator
->
Allocate
(
1600
,
allocator
->
kDefault
);
ASSERT_NE
(
x3
,
nullptr
);
ASSERT_NE
(
x3
->
ptr
(),
nullptr
);
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
浏览文件 @
c20db635
...
...
@@ -22,8 +22,6 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_bool
(
enable_buffered_allocator
);
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
...
...
@@ -38,7 +36,6 @@ TEST(allocator, allocator) {
#endif
FLAGS_allocator_strategy
=
"naive_best_fit"
;
FLAGS_enable_buffered_allocator
=
true
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
...
...
paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
DECLARE_string
(
buffered_allocator_division_plan_path
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
buffered_allocator
,
division_plan
)
{
std
::
string
path
=
"/tmp/buffered_allocator_divison_plan"
;
FLAGS_buffered_allocator_division_plan_path
=
path
;
{
std
::
vector
<
std
::
string
>
plan
(
{
"100b"
,
"300.7K"
,
"500.3m"
,
"1.02gB"
,
"2g"
,
"4G"
});
std
::
ofstream
os
(
path
);
for
(
auto
&
p
:
plan
)
{
os
<<
p
<<
std
::
endl
;
}
os
.
close
();
}
auto
plan
=
ReadBufferedAllocatorDivisionPlanFromFile
(
FLAGS_buffered_allocator_division_plan_path
);
ASSERT_EQ
(
plan
.
size
(),
6UL
);
ASSERT_EQ
(
plan
[
0
],
100UL
);
ASSERT_EQ
(
plan
[
1
],
static_cast
<
size_t
>
(
300.7
*
1024
));
ASSERT_EQ
(
plan
[
2
],
static_cast
<
size_t
>
(
500.3
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
3
],
static_cast
<
size_t
>
(
1.02
*
1024
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
4
],
static_cast
<
size_t
>
(
2.0
*
1024
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
5
],
static_cast
<
size_t
>
(
4.0
*
1024
*
1024
*
1024
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
c20db635
...
...
@@ -39,7 +39,6 @@ limitations under the License. */
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
...
...
@@ -135,8 +134,6 @@ PYBIND11_MODULE(core, m) {
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
paddle
::
memory
::
allocation
::
UseMultiBinBufferedAllocatorGFlags
();
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
...
...
python/paddle/fluid/__init__.py
浏览文件 @
c20db635
...
...
@@ -129,9 +129,7 @@ def __bootstrap__():
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'memory_fraction_of_eager_deletion'
,
'allocator_strategy'
,
'enable_buffered_allocator'
,
'buffered_allocator_excess_times'
,
'buffered_allocator_division_plan_path'
,
'reader_queue_speed_test_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'multiple_of_cupti_buffer_size'
,
'enable_subgraph_optimize'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录