Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c20db635
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c20db635
编写于
3月 25, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
split PR
test=develop
上级
c75a8803
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
31 addition
and
1205 deletion
+31
-1205
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+0
-2
paddle/fluid/framework/inlined_vector.h
paddle/fluid/framework/inlined_vector.h
+0
-82
paddle/fluid/framework/inlined_vector_test.cc
paddle/fluid/framework/inlined_vector_test.cc
+0
-53
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+4
-13
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+6
-5
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+3
-52
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+1
-4
paddle/fluid/memory/allocation/allocator_strategy.h
paddle/fluid/memory/allocation/allocator_strategy.h
+1
-1
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
...fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+0
-134
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
.../fluid/memory/allocation/auto_growth_best_fit_allocator.h
+0
-87
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
.../allocation/auto_growth_best_fit_allocator_facade_test.cc
+0
-96
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
.../memory/allocation/auto_growth_best_fit_allocator_test.cc
+0
-70
paddle/fluid/memory/allocation/cpu_allocator.cc
paddle/fluid/memory/allocation/cpu_allocator.cc
+2
-2
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+1
-1
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+12
-6
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
...e/fluid/memory/allocation/multi_bin_buffered_allocator.cc
+0
-300
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
...le/fluid/memory/allocation/multi_bin_buffered_allocator.h
+0
-62
paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
...id/memory/allocation/multi_bin_buffered_allocator_test.cc
+0
-170
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
...memory/allocation/naive_best_fit_allocator_facade_test.cc
+0
-3
paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
...cation/test_multi_bin_buffered_allocator_division_plan.cc
+0
-56
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+0
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-3
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
c20db635
...
...
@@ -202,8 +202,6 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test
(
tuple_test SRCS tuple_test.cc
)
cc_test
(
inlined_vector_test SRCS inlined_vector_test.cc
)
if
(
NOT WIN32
)
cc_test
(
rw_lock_test SRCS rw_lock_test.cc
)
endif
(
NOT WIN32
)
...
...
paddle/fluid/framework/inlined_vector.h
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
,
size_t
N
>
class
InlinedVector
{
static_assert
(
N
>
0
,
"N must be larger than 0"
);
public:
inline
void
push_back
(
const
T
&
item
)
{
if
(
size_
<
N
)
{
head_
[
size_
]
=
item
;
}
else
{
tail_
.
emplace_back
(
item
);
}
++
size_
;
}
inline
void
pop_back
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to pop back element from empty vector."
);
if
(
size_
>
N
)
{
tail_
.
pop_back
();
}
--
size_
;
}
inline
const
T
&
back
()
const
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get back element of empty vector."
);
return
size_
<=
N
?
head_
[
size_
-
1
]
:
tail_
.
back
();
}
inline
T
&
back
()
{
PADDLE_ENFORCE
(
!
empty
(),
"Try to get back element of empty vector."
);
return
size_
<=
N
?
head_
[
size_
-
1
]
:
tail_
.
back
();
}
inline
bool
empty
()
const
{
return
size_
==
0
;
}
inline
size_t
size
()
const
{
return
size_
;
}
// This API can only be used in unittest
T
&
operator
[](
size_t
i
)
{
return
i
<
N
?
head_
[
i
]
:
tail_
[
i
-
N
];
}
const
T
&
operator
[](
size_t
i
)
const
{
return
i
<
N
?
head_
[
i
]
:
tail_
[
i
-
N
];
}
operator
std
::
vector
<
T
>
()
const
{
std
::
vector
<
T
>
ret
;
ret
.
reserve
(
size_
);
for
(
size_t
i
=
0
;
i
<
size_
;
++
i
)
{
ret
.
emplace_back
((
*
this
)[
i
]);
}
return
ret
;
}
private:
T
head_
[
N
];
size_t
size_
{
0
};
std
::
vector
<
T
>
tail_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/inlined_vector_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/inlined_vector.h"
#include <vector>
#include "gtest/gtest.h"
namespace
paddle
{
namespace
framework
{
TEST
(
inlined_stack
,
inlined_stack
)
{
size_t
max_num
=
10
;
InlinedVector
<
size_t
,
5
>
stack
;
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
.
size
(),
i
);
stack
.
push_back
(
i
);
ASSERT_EQ
(
stack
.
size
(),
i
+
1
);
}
std
::
vector
<
size_t
>
vec
=
stack
;
ASSERT_EQ
(
stack
.
size
(),
vec
.
size
());
for
(
size_t
i
=
0
;
i
<
vec
.
size
();
++
i
)
{
ASSERT_EQ
(
stack
[
i
],
vec
[
i
]);
}
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
[
i
],
i
);
}
for
(
size_t
i
=
0
;
i
<
max_num
;
++
i
)
{
ASSERT_EQ
(
stack
.
back
(),
max_num
-
1
-
i
);
stack
.
pop_back
();
ASSERT_EQ
(
stack
.
size
(),
max_num
-
1
-
i
);
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
c20db635
...
...
@@ -3,18 +3,9 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library
(
best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator
)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
multi_bin_buffered_allocator SRCS multi_bin_buffered_allocator.cc DEPS allocator gflags
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator profiler
)
cc_library
(
zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
cc_test
(
multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator
)
cc_library
(
auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator
)
cc_test
(
auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator
)
if
(
NOT WIN32
)
cc_test
(
test_multi_bin_buffered_allocator_division_plan SRCS test_multi_bin_buffered_allocator_division_plan.cc DEPS multi_bin_buffered_allocator
)
endif
()
if
(
WITH_GPU
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
...
...
@@ -47,7 +38,7 @@ else ()
set
(
AllocatorFacadeDeps
)
endif
()
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator
multi_bin_buffered_allocator auto_growth_best_fit_allocator
legacy_allocator zero_size_allocator
)
list
(
APPEND AllocatorFacadeDeps cpu_allocator locked_allocator best_fit_allocator aligned_allocator auto_increment_allocator conditional_allocator retry_allocator buffered_allocator legacy_allocator zero_size_allocator
)
cc_library
(
aligned_allocator SRCS aligned_allocator.cc DEPS allocator
)
cc_library
(
auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator
)
...
...
@@ -59,8 +50,8 @@ nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocat
cc_test
(
retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator
)
cc_test
(
allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
naive_best_fit_allocator_facade_test SRCS naive_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade
)
cc_test
(
allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade
)
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
c20db635
...
...
@@ -17,7 +17,6 @@
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -50,7 +49,9 @@ class Allocator;
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{}
:
ptr_
(
ptr
),
size_
(
size
),
place_
(
place
)
{
decorated_allocators_
.
reserve
(
8
);
}
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
...
...
@@ -80,8 +81,8 @@ class Allocation {
virtual
~
Allocation
();
private:
std
::
vector
<
Allocator
*>
DecoratedAllocators
()
const
{
return
static_cast
<
std
::
vector
<
Allocator
*>>
(
decorated_allocators_
)
;
const
std
::
vector
<
Allocator
*>&
DecoratedAllocators
()
const
{
return
decorated_allocators_
;
}
inline
void
RegisterDecoratedAllocator
(
Allocator
*
allocator
)
{
...
...
@@ -98,7 +99,7 @@ class Allocation {
void
*
ptr_
;
size_t
size_
;
platform
::
Place
place_
;
framework
::
InlinedVector
<
Allocator
*
,
8
>
decorated_allocators_
;
std
::
vector
<
Allocator
*
>
decorated_allocators_
;
friend
class
Allocator
;
friend
class
AllocationDeleter
;
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
c20db635
...
...
@@ -22,14 +22,12 @@
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cpu_info.h"
...
...
@@ -47,24 +45,18 @@ DEFINE_int64(
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"
);
DEFINE_bool
(
enable_buffered_allocator
,
false
,
"Enable buffered_allocator"
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAndBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
,
bool
enable_buffered
)
{
static
inline
std
::
shared_ptr
<
Allocator
>
WrapRetryAllocator
(
std
::
shared_ptr
<
Allocator
>
allocator
,
int64_t
retry_time
)
{
if
(
retry_time
>
0
)
{
auto
*
retry_allocator
=
new
RetryAllocator
(
std
::
move
(
allocator
),
retry_time
);
allocator
.
reset
(
retry_allocator
);
}
if
(
enable_buffered
)
{
allocator
.
reset
(
new
MultiBinBufferedAllocator
(
allocator
));
}
return
allocator
;
}
...
...
@@ -134,8 +126,7 @@ class ChunkedAllocator : public Allocator {
std
::
shared_ptr
<
Allocator
>
allocator
(
new
LockedAllocator
(
std
::
shared_ptr
<
Allocator
>
(
new
BestFitAllocator
(
allocation
))));
allocator
=
WrapRetryAndBufferedAllocator
(
allocator
,
retry_time_
,
FLAGS_enable_buffered_allocator
);
allocator
=
WrapRetryAllocator
(
allocator
,
retry_time_
);
return
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
move
(
allocator
));
}
...
...
@@ -219,13 +210,6 @@ class AllocatorFacadePrivate {
WrapZeroSizeAllocator
();
break
;
}
case
AllocatorStrategy
::
kAutoGrowthBestFit
:
{
InitAutoGrowthCPUAllocator
();
InitAutoGrowthCUDAAllocator
();
InitAutoGrowthCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported allocator strategy: %d"
,
static_cast
<
int
>
(
strategy
));
...
...
@@ -234,39 +218,6 @@ class AllocatorFacadePrivate {
}
private:
void
InitAutoGrowthCPUAllocator
()
{
auto
cpu_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUAllocator
>
());
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cpu_allocator
,
platform
::
CpuMaxChunkSize
(),
4096
);
}
void
InitAutoGrowthCUDAAllocator
()
{
#ifdef PADDLE_WITH_CUDA
int
dev_cnt
=
platform
::
GetCUDADeviceCount
();
for
(
int
dev_id
=
0
;
dev_id
<
dev_cnt
;
++
dev_id
)
{
auto
cuda_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CUDAAllocator
>
(
platform
::
CUDAPlace
(
dev_id
)));
auto
allocator
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cuda_allocator
,
platform
::
GpuMaxChunkSize
(),
4096
);
allocators_
[
platform
::
CUDAPlace
(
dev_id
)]
=
WrapRetryAndBufferedAllocator
(
allocator
,
FLAGS_gpu_allocator_retry_time
,
false
);
}
#endif
}
void
InitAutoGrowthCUDAPinnedAllocator
()
{
#ifdef PADDLE_WITH_CUDA
auto
cuda_pinned_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUPinnedAllocator
>
());
allocators_
[
platform
::
CUDAPinnedPlace
()]
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cuda_pinned_allocator
,
platform
::
CUDAPinnedMaxChunkSize
(),
4096
);
#endif
}
void
InitLegacyAllocator
()
{
std
::
vector
<
platform
::
Place
>
places
{
platform
::
CPUPlace
()};
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
c20db635
...
...
@@ -20,8 +20,7 @@ DEFINE_string(
allocator_strategy
,
"legacy"
,
"The allocation strategy. Legacy means the original allocator of Fluid."
"naive_best_fit means the experimental best fit allocator. "
"auto_growth_best_fit means the experimental auto growth best fit "
"allocator. Enum in [legacy, naive_best_fit, auto_growth_best_fit]."
);
"allocator. Enum in [legacy, naive_best_fit]."
);
namespace
paddle
{
namespace
memory
{
...
...
@@ -32,8 +31,6 @@ static AllocatorStrategy GetStrategyFromFlag() {
return
AllocatorStrategy
::
kLegacy
;
}
else
if
(
FLAGS_allocator_strategy
==
"naive_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
if
(
FLAGS_allocator_strategy
==
"auto_growth_best_fit"
)
{
return
AllocatorStrategy
::
kAutoGrowthBestFit
;
}
else
{
PADDLE_THROW
(
"Unsupported allocator strategy: %s"
,
FLAGS_allocator_strategy
);
...
...
paddle/fluid/memory/allocation/allocator_strategy.h
浏览文件 @
c20db635
...
...
@@ -18,7 +18,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
enum
class
AllocatorStrategy
{
kLegacy
,
kNaiveBestFit
,
kAutoGrowthBestFit
};
enum
class
AllocatorStrategy
{
kLegacy
,
kNaiveBestFit
};
extern
AllocatorStrategy
GetAllocatorStrategy
();
...
...
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include <algorithm>
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <unordered_map>
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
size_t
align
(
size_t
size
,
size_t
alignment
)
{
auto
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
AutoGrowthBestFitAllocator
::
AutoGrowthBestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
)
:
underlying_allocator_
(
underlying_allocator
),
chunk_size_
(
align
(
chunk_size
,
alignment
)),
alignment_
(
alignment
)
{}
Allocation
*
AutoGrowthBestFitAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
size
=
align
(
size
,
alignment_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
auto
iter
=
free_blocks_
.
lower_bound
(
std
::
make_pair
(
size
,
nullptr
));
BlockIt
block_it
;
if
(
iter
!=
free_blocks_
.
end
())
{
VLOG
(
2
)
<<
"Found "
<<
iter
->
second
->
size_
<<
" for "
<<
size
;
block_it
=
iter
->
second
;
free_blocks_
.
erase
(
iter
);
auto
*
chunk
=
block_it
->
chunk_
;
size_t
remaining_size
=
block_it
->
size_
-
size
;
if
(
remaining_size
==
0
)
{
block_it
->
is_free_
=
false
;
VLOG
(
2
)
<<
"Found and no remaining"
;
}
else
{
auto
remaining_free_block
=
chunk
->
blocks_
.
insert
(
block_it
,
Chunk
::
Block
(
block_it
->
ptr_
,
remaining_size
,
true
,
chunk
));
free_blocks_
.
emplace
(
std
::
make_pair
(
remaining_size
,
block_it
->
ptr_
),
remaining_free_block
);
block_it
->
ptr_
=
reinterpret_cast
<
uint8_t
*>
(
block_it
->
ptr_
)
+
remaining_size
;
block_it
->
size_
=
size
;
block_it
->
is_free_
=
false
;
VLOG
(
2
)
<<
"Found and remaining "
<<
remaining_size
;
}
}
else
{
size_t
alloc_size
=
size
;
if
(
!
underlying_allocator_exhaustive_
&&
chunk_size_
>
size
)
{
alloc_size
=
chunk_size_
;
}
try
{
chunks_
.
emplace_back
(
underlying_allocator_
->
Allocate
(
alloc_size
,
attr
));
}
catch
(
BadAlloc
&
ex
)
{
if
(
size
==
alloc_size
)
throw
ex
;
underlying_allocator_exhaustive_
=
true
;
alloc_size
=
size
;
chunks_
.
emplace_back
(
underlying_allocator_
->
Allocate
(
alloc_size
,
attr
));
}
auto
*
chunk
=
&
(
*
chunks_
.
rbegin
());
uint8_t
*
p
=
reinterpret_cast
<
uint8_t
*>
(
chunk
->
allocation_
->
ptr
());
auto
&
blocks
=
chunk
->
blocks_
;
size_t
remaining_size
=
alloc_size
-
size
;
if
(
remaining_size
>
0
)
{
blocks
.
emplace_back
(
p
,
remaining_size
,
true
,
chunk
);
free_blocks_
.
emplace
(
std
::
make_pair
(
remaining_size
,
p
),
--
(
blocks
.
end
()));
}
blocks
.
emplace_back
(
p
+
remaining_size
,
size
,
false
,
chunk
);
block_it
=
--
(
blocks
.
end
());
VLOG
(
2
)
<<
"Not found and allocate "
<<
alloc_size
<<
", and remaining "
<<
remaining_size
;
}
VLOG
(
2
)
<<
"After allocate, free blocks "
<<
free_blocks_
.
size
();
return
new
Chunk
::
BlockAllocation
(
block_it
);
}
void
AutoGrowthBestFitAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
auto
&
block_it
=
static_cast
<
Chunk
::
BlockAllocation
*>
(
allocation
)
->
block_it_
;
auto
&
blocks
=
block_it
->
chunk_
->
blocks_
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
block_it
->
is_free_
=
true
;
if
(
block_it
!=
blocks
.
begin
())
{
auto
prev_it
=
block_it
;
--
prev_it
;
if
(
prev_it
->
is_free_
)
{
free_blocks_
.
erase
(
std
::
make_pair
(
prev_it
->
size_
,
prev_it
->
ptr_
));
prev_it
->
size_
+=
block_it
->
size_
;
blocks
.
erase
(
block_it
);
block_it
=
prev_it
;
}
}
auto
next_it
=
block_it
;
++
next_it
;
if
(
next_it
!=
blocks
.
end
()
&&
next_it
->
is_free_
)
{
free_blocks_
.
erase
(
std
::
make_pair
(
next_it
->
size_
,
next_it
->
ptr_
));
block_it
->
size_
+=
next_it
->
size_
;
blocks
.
erase
(
next_it
);
}
free_blocks_
.
emplace
(
std
::
make_pair
(
block_it
->
size_
,
block_it
->
ptr_
),
block_it
);
VLOG
(
2
)
<<
"Combine "
<<
block_it
->
size_
<<
", "
<<
blocks
.
size
()
<<
", "
<<
free_blocks_
.
size
();
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
AutoGrowthBestFitAllocator
:
public
Allocator
{
public:
explicit
AutoGrowthBestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
);
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
using
AllocationList
=
std
::
list
<
AllocationPtr
>
;
using
AllocationListIt
=
AllocationList
::
iterator
;
struct
Chunk
{
struct
Block
{
Block
(
void
*
ptr
,
size_t
size
,
bool
is_free
,
Chunk
*
chunk
)
:
ptr_
(
ptr
),
size_
(
size
),
is_free_
(
is_free
),
chunk_
(
chunk
)
{}
void
*
ptr_
;
size_t
size_
;
bool
is_free_
;
Chunk
*
chunk_
;
// which chunk it is from
};
explicit
Chunk
(
AllocationPtr
allocation
)
:
allocation_
(
std
::
move
(
allocation
))
{}
AllocationPtr
allocation_
;
std
::
list
<
Block
>
blocks_
;
// std::mutex mtx_;
struct
BlockAllocation
:
public
Allocation
{
explicit
BlockAllocation
(
const
std
::
list
<
Block
>::
iterator
&
it
)
:
Allocation
(
it
->
ptr_
,
it
->
size_
,
it
->
chunk_
->
allocation_
->
place
()),
block_it_
(
it
)
{}
std
::
list
<
Block
>::
iterator
block_it_
;
};
};
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
using
BlockIt
=
std
::
list
<
Chunk
::
Block
>::
iterator
;
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
std
::
list
<
Chunk
>
chunks_
;
std
::
map
<
std
::
pair
<
size_t
,
void
*>
,
BlockIt
>
free_blocks_
;
size_t
chunk_size_
;
size_t
alignment_
;
bool
underlying_allocator_exhaustive_
{
false
};
mutable
std
::
mutex
mtx_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_double
(
fraction_of_cuda_pinned_memory_to_use
);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
inline
size_t
AlignTo
(
size_t
size
,
size_t
alignment
=
4096
)
{
auto
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
TEST
(
allocator
,
allocator
)
{
#ifdef PADDLE_WITH_CUDA
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
;
FLAGS_gpu_allocator_retry_time
=
500
;
FLAGS_fraction_of_cuda_pinned_memory_to_use
=
0.5
;
#endif
FLAGS_allocator_strategy
=
"auto_growth_best_fit"
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
size_t
size
=
1024
;
platform
::
Place
place
;
{
place
=
platform
::
CPUPlace
();
size
=
1024
;
auto
cpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
cpu_allocation
,
nullptr
);
ASSERT_NE
(
cpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cpu_allocation
->
place
(),
place
);
ASSERT_EQ
(
cpu_allocation
->
size
(),
AlignTo
(
size
));
}
#ifdef PADDLE_WITH_CUDA
{
place
=
platform
::
CUDAPlace
(
0
);
size
=
1024
;
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
AlignTo
(
size
));
}
{
// Allocate 2GB gpu memory
place
=
platform
::
CUDAPlace
(
0
);
size
=
2
*
static_cast
<
size_t
>
(
1
<<
30
);
auto
gpu_allocation
=
instance
.
Alloc
(
place
,
size
);
ASSERT_NE
(
gpu_allocation
,
nullptr
);
ASSERT_NE
(
gpu_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
gpu_allocation
->
place
(),
place
);
ASSERT_GE
(
gpu_allocation
->
size
(),
AlignTo
(
size
));
}
{
place
=
platform
::
CUDAPinnedPlace
();
size
=
(
1
<<
20
);
auto
cuda_pinned_allocation
=
instance
.
Alloc
(
platform
::
CUDAPinnedPlace
(),
1
<<
20
);
ASSERT_NE
(
cuda_pinned_allocation
,
nullptr
);
ASSERT_NE
(
cuda_pinned_allocation
->
ptr
(),
nullptr
);
ASSERT_EQ
(
cuda_pinned_allocation
->
place
(),
place
);
ASSERT_GE
(
cuda_pinned_allocation
->
size
(),
AlignTo
(
size
));
}
#endif
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <condition_variable> // NOLINT
#include <mutex> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <iostream>
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
auto_growth_best_fit_allocator
)
{
auto
cpu_allocator
=
std
::
make_shared
<
CPUAllocator
>
();
auto
allocator
=
std
::
make_shared
<
AutoGrowthBestFitAllocator
>
(
cpu_allocator
,
0
,
4096
);
std
::
mutex
mtx
;
std
::
condition_variable
cv
;
bool
flag
=
false
;
auto
thread_main
=
[
&
]
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mtx
);
cv
.
wait
(
lock
,
[
&
]
{
return
flag
;
});
}
for
(
size_t
i
=
10
;
i
>
0
;
--
i
)
{
allocator
->
Allocate
((
i
+
1
)
*
1000
);
}
};
std
::
vector
<
std
::
thread
>
ths
;
for
(
size_t
i
=
10
;
i
<
10
;
++
i
)
{
ths
.
emplace_back
(
thread_main
);
}
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx
);
flag
=
true
;
}
cv
.
notify_all
();
for
(
auto
&
th
:
ths
)
{
th
.
join
();
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cpu_allocator.cc
浏览文件 @
c20db635
...
...
@@ -35,9 +35,9 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
Allocation
*
CPUAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
p
;
#ifdef _WIN32
p
=
_aligned_malloc
(
size
,
4096
);
p
=
_aligned_malloc
(
size
,
kAlignment
);
#else
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
4096
,
size
),
0
,
"Alloc %ld error!"
,
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
kAlignment
,
size
),
0
,
"Alloc %ld error!"
,
size
);
#endif
return
new
Allocation
(
p
,
size
,
platform
::
CPUPlace
());
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
c20db635
...
...
@@ -33,7 +33,7 @@ namespace allocation {
// an open-sourced allocator into Paddle.
class
CPUAllocator
:
public
Allocator
{
public:
constexpr
static
size_t
kAlignment
=
64u
;
constexpr
static
size_t
kAlignment
=
4096UL
;
bool
IsAllocThreadSafe
()
const
override
;
protected:
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
浏览文件 @
c20db635
...
...
@@ -148,12 +148,18 @@ class GPUBuddyAllocatorList {
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
dev_id
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
10
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
VLOG
(
10
)
<<
"
\n\n
NOTE:
\n
"
<<
"You can set GFlags environment variable "
<<
"'FLAGS_fraction_of_gpu_memory_to_use' "
<<
"or 'FLAGS_initial_gpu_memory_in_mb' "
<<
"or 'FLAGS_reallocate_gpu_memory_in_mb' "
<<
"to change the memory size for GPU usage.
\n
"
<<
"Current 'FLAGS_fraction_of_gpu_memory_to_use' value is "
<<
FLAGS_fraction_of_gpu_memory_to_use
<<
". Current 'FLAGS_initial_gpu_memory_in_mb' value is "
<<
FLAGS_initial_gpu_memory_in_mb
<<
". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
<<
FLAGS_reallocate_gpu_memory_in_mb
<<
"
\n\n
"
;
});
return
allocators_
[
dev_id
];
}
...
...
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <algorithm>
#include <cctype>
#include <fstream>
#include <limits>
#include <mutex> // NOLINT
#include <sstream>
#include <string>
#include <utility>
#include "paddle/fluid/platform/lock_guard_ptr.h"
DEFINE_double
(
buffered_allocator_excess_times
,
2
,
"Excess memory size times of buffered_allocator. BufferedAllocator"
" would try to reuse memory freed previously, but the size of freed"
" allocation may not be exactly the same as the requested. Here, we"
" use a flag to control the excess times of reused memory size. "
"Not quite sure what is the best excess times value."
);
DEFINE_string
(
buffered_allocator_division_plan_path
,
""
,
"The file path which "
"determines the memory size division plans of BufferedAllocator."
"If it is empty, use the default division plan. The file must be a "
"text file which each lines indicates the bound of division plan. "
"For example, if the text file has 3 lines, which are '500M', '1G', "
" '2G', the division plan would be [0, 500M), [500M, 1G), [1G, 2G) "
"and [2G, +inf). Allocation request whose requested memory size is "
"inside the last interval of division plan would be dispatched to "
" underlying_allocator directly without caching when freed."
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
std
::
string
TrimStringAndToUpperCase
(
const
std
::
string
&
str
)
{
auto
not_space
=
[](
char
ch
)
{
return
std
::
isspace
(
ch
)
==
0
;
};
auto
first_idx
=
static_cast
<
size_t
>
(
std
::
find_if
(
str
.
begin
(),
str
.
end
(),
not_space
)
-
str
.
begin
());
auto
last_idx
=
static_cast
<
size_t
>
(
std
::
find_if
(
str
.
rbegin
(),
str
.
rend
(),
not_space
)
-
str
.
rbegin
());
if
(
first_idx
==
str
.
size
()
||
last_idx
==
str
.
size
())
return
""
;
last_idx
=
str
.
size
()
-
last_idx
;
auto
ret
=
str
.
substr
(
first_idx
,
last_idx
-
first_idx
);
std
::
for_each
(
ret
.
begin
(),
ret
.
end
(),
[](
char
&
ch
)
{
ch
=
std
::
toupper
(
ch
);
});
return
ret
;
}
namespace
{
enum
DivisionPlanFileStatus
{
kEOF
,
kException
,
kNormal
};
}
// NOLINT
static
size_t
ParseStringToBytes
(
const
std
::
string
&
original_str
,
DivisionPlanFileStatus
*
ret_code
)
{
std
::
string
str
=
TrimStringAndToUpperCase
(
original_str
);
if
(
str
.
empty
())
{
*
ret_code
=
kEOF
;
return
0
;
}
if
(
str
.
back
()
==
'B'
)
{
str
.
pop_back
();
if
(
str
.
empty
())
{
*
ret_code
=
kException
;
return
0
;
}
}
size_t
multiples
=
1
;
switch
(
str
.
back
())
{
case
'G'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
30
);
break
;
case
'M'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
20
);
break
;
case
'K'
:
multiples
*=
(
static_cast
<
size_t
>
(
1
)
<<
10
);
break
;
default:
break
;
}
if
(
multiples
!=
1
)
{
str
.
pop_back
();
if
(
str
.
empty
())
{
*
ret_code
=
kException
;
return
0
;
}
}
str
=
TrimStringAndToUpperCase
(
str
);
double
mem_val
=
-
1.0
;
std
::
stringstream
ss
(
str
);
if
(
!
(
ss
>>
mem_val
)
||
mem_val
<
0
)
{
*
ret_code
=
kException
;
return
0
;
}
*
ret_code
=
kNormal
;
return
static_cast
<
size_t
>
(
mem_val
*
multiples
);
}
static
std
::
string
GetDebugStringOfPlan
(
const
std
::
vector
<
size_t
>
&
plan
)
{
std
::
string
ret
(
"["
);
for
(
auto
sz
:
plan
)
{
ret
+=
string
::
HumanReadableSize
(
sz
);
ret
+=
", "
;
}
return
ret
+
"]"
;
}
std
::
vector
<
size_t
>
ReadBufferedAllocatorDivisionPlanFromFile
(
const
std
::
string
&
filepath
)
{
std
::
ifstream
is
(
filepath
.
c_str
());
PADDLE_ENFORCE
(
is
.
good
(),
"File %s not exist"
,
filepath
);
std
::
string
str
;
std
::
vector
<
size_t
>
plan
;
size_t
line_num
=
1
;
while
(
std
::
getline
(
is
,
str
).
good
())
{
DivisionPlanFileStatus
status
;
size_t
ret
=
ParseStringToBytes
(
str
,
&
status
);
if
(
status
==
kEOF
)
{
break
;
}
if
(
status
==
kException
)
{
PADDLE_THROW
(
"Invalid format in line %d of file %s: '%s'. Only support B, KB, MB, "
"GB."
,
line_num
,
filepath
,
str
);
}
plan
.
push_back
(
ret
);
++
line_num
;
}
return
plan
;
}
static
void
CheckAndModifyMemoryDivisionPlan
(
std
::
vector
<
size_t
>
*
division_plan
)
{
// Check whether the division plan is strictly sorted
bool
is_strictly_sorted
=
true
;
for
(
size_t
i
=
1
;
i
<
division_plan
->
size
();
++
i
)
{
if
((
*
division_plan
)[
i
-
1
]
>=
(
*
division_plan
)[
i
])
{
is_strictly_sorted
=
false
;
break
;
}
}
PADDLE_ENFORCE
(
is_strictly_sorted
,
"Divison plan must be stricted sorted"
);
// Insert 0 to disivion plan for clean binary searching code
if
(
division_plan
->
empty
()
||
division_plan
->
front
()
!=
0
)
{
division_plan
->
insert
(
division_plan
->
begin
(),
0
);
}
// Remove MAX from disivion plan for clean binary searching code
constexpr
auto
kSizeTypeMax
=
std
::
numeric_limits
<
size_t
>::
max
();
if
(
division_plan
->
back
()
==
kSizeTypeMax
)
{
division_plan
->
pop_back
();
}
PADDLE_ENFORCE
(
division_plan
->
size
()
>=
1
,
"Division plan cannot be empty"
);
}
static
std
::
vector
<
size_t
>
GetDefaultDivisionPlan
()
{
if
(
!
FLAGS_buffered_allocator_division_plan_path
.
empty
())
{
return
ReadBufferedAllocatorDivisionPlanFromFile
(
FLAGS_buffered_allocator_division_plan_path
);
}
// Default division plan is 4K, 8K, 16K, ..., 500M, 1G
constexpr
size_t
kMaxLogSize
=
30
;
std
::
vector
<
size_t
>
plan
;
for
(
size_t
i
=
12
;
i
<=
kMaxLogSize
;
++
i
)
{
plan
.
push_back
(
static_cast
<
size_t
>
(
1
)
<<
i
);
}
return
plan
;
}
inline
static
size_t
FindDivisionPlanBinIndex
(
const
std
::
vector
<
size_t
>
&
bins
,
size_t
size
)
{
return
static_cast
<
size_t
>
(
std
::
upper_bound
(
bins
.
begin
(),
bins
.
end
(),
size
)
-
bins
.
begin
()
-
1
);
}
inline
static
size_t
TolerantUpperSize
(
size_t
size
)
{
return
static_cast
<
size_t
>
(
size
*
FLAGS_buffered_allocator_excess_times
);
}
MultiBinBufferedAllocator
::
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
)
:
MultiBinBufferedAllocator
(
std
::
move
(
underlying_allocator
),
GetDefaultDivisionPlan
())
{}
MultiBinBufferedAllocator
::
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
const
std
::
vector
<
size_t
>
&
division_plan
)
:
underlying_allocator_
(
std
::
move
(
underlying_allocator
)),
division_plan_
(
division_plan
)
{
CheckAndModifyMemoryDivisionPlan
(
&
division_plan_
);
allocations_
.
resize
(
division_plan_
.
size
()
-
1
);
accumulated_cache_size_
.
assign
(
division_plan_
.
size
()
-
1
,
0UL
);
mtx_
.
resize
(
division_plan_
.
size
()
-
1
);
if
(
underlying_allocator_
->
IsAllocThreadSafe
())
{
for
(
auto
&
mtx
:
mtx_
)
{
mtx
.
reset
(
new
std
::
mutex
());
}
}
VLOG
(
1
)
<<
"Division plan is: "
<<
GetDebugStringOfPlan
(
division_plan_
);
VLOG
(
1
)
<<
"FLAGS_buffered_allocator_excess_times = "
<<
FLAGS_buffered_allocator_excess_times
;
}
void
MultiBinBufferedAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
auto
bin_index
=
FindDivisionPlanBinIndex
(
division_plan_
,
allocation
->
size
());
if
(
bin_index
<
allocations_
.
size
())
{
platform
::
LockGuardPtr
<
std
::
mutex
>
guard
(
mtx_
[
bin_index
]);
allocations_
[
bin_index
].
emplace
(
allocation
->
size
(),
AllocationPtr
(
allocation
));
accumulated_cache_size_
[
bin_index
]
+=
allocation
->
size
();
}
else
{
underlying_allocator_
->
Free
(
allocation
);
}
}
// Maybe we can design more flexible FreeCache strategy based on bin_index
// and require size.
size_t
MultiBinBufferedAllocator
::
ClearCache
()
{
size_t
accumulated_size
=
0
;
// FIXME(zjl): free the largest first when there is no extra
for
(
size_t
i
=
allocations_
.
size
()
-
1
;
i
!=
static_cast
<
size_t
>
(
-
1
);
--
i
)
{
platform
::
LockGuardPtr
<
std
::
mutex
>
lock
(
mtx_
[
i
]);
allocations_
[
i
].
clear
();
accumulated_size
+=
accumulated_cache_size_
[
i
];
accumulated_cache_size_
[
i
]
=
0
;
}
return
accumulated_size
;
}
Allocation
*
MultiBinBufferedAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
auto
bin_index
=
FindDivisionPlanBinIndex
(
division_plan_
,
size
);
auto
upper_size
=
TolerantUpperSize
(
size
);
for
(;
bin_index
<
allocations_
.
size
()
&&
upper_size
>=
division_plan_
[
bin_index
];
++
bin_index
)
{
auto
&
allocation
=
allocations_
[
bin_index
];
platform
::
LockGuardPtr
<
std
::
mutex
>
lock
(
mtx_
[
bin_index
]);
auto
it
=
allocation
.
lower_bound
(
size
);
if
(
it
!=
allocation
.
end
()
&&
it
->
second
->
size
()
<=
upper_size
)
{
size_t
sz
=
it
->
second
->
size
();
auto
ret
=
std
::
move
(
it
->
second
);
allocation
.
erase
(
it
);
accumulated_cache_size_
[
bin_index
]
-=
sz
;
VLOG
(
3
)
<<
"Allocate "
<<
sz
<<
"(required "
<<
size
<<
") from cache directly"
;
return
ret
.
release
();
}
}
size_t
retry_time
=
1
;
while
(
true
)
{
try
{
auto
ret
=
underlying_allocator_
->
Allocate
(
size
,
attr
).
release
();
VLOG
(
2
)
<<
"Allocate "
<<
size
<<
" from underlying directly"
;
return
ret
;
}
catch
(
BadAlloc
&
)
{
size_t
actual_free_size
=
ClearCache
();
VLOG
(
1
)
<<
retry_time
<<
"-th free "
<<
actual_free_size
<<
" bytes caches"
;
if
(
actual_free_size
==
0
)
throw
;
}
++
retry_time
;
}
}
void
UseMultiBinBufferedAllocatorGFlags
()
{}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
std
::
vector
<
size_t
>
ReadBufferedAllocatorDivisionPlanFromFile
(
const
std
::
string
&
filepath
);
class
MultiBinBufferedAllocator
:
public
Allocator
{
public:
explicit
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
);
MultiBinBufferedAllocator
(
std
::
shared_ptr
<
Allocator
>
underlying_allocator
,
const
std
::
vector
<
size_t
>&
division_plan
);
bool
IsAllocThreadSafe
()
const
override
{
return
mtx_
.
front
()
!=
nullptr
;
}
size_t
ClearCache
();
const
std
::
vector
<
size_t
>&
DivisionPlan
()
const
{
return
division_plan_
;
}
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
std
::
vector
<
std
::
multimap
<
size_t
,
AllocationPtr
>>
allocations_
;
std
::
vector
<
size_t
>
accumulated_cache_size_
;
std
::
vector
<
size_t
>
division_plan_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
mutex
>>
mtx_
;
};
extern
void
UseMultiBinBufferedAllocatorGFlags
();
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/multi_bin_buffered_allocator_test.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include <gtest/gtest.h>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
inline
std
::
shared_ptr
<
MultiBinBufferedAllocator
>
GetBufferedAllocator
(
Allocation
*
allocation
,
bool
thread_safe
)
{
std
::
shared_ptr
<
Allocator
>
allocator
(
new
BestFitAllocator
(
allocation
));
if
(
thread_safe
)
{
allocator
.
reset
(
new
LockedAllocator
(
std
::
move
(
allocator
)));
}
return
std
::
make_shared
<
MultiBinBufferedAllocator
>
(
allocator
);
}
TEST
(
buffered_allocator
,
thread_safety
)
{
std
::
unique_ptr
<
CPUAllocator
>
allocator
(
new
CPUAllocator
());
auto
chunk
=
allocator
->
Allocate
(
1
<<
20
,
allocator
->
kDefault
);
{
auto
buf_allocator
=
GetBufferedAllocator
(
chunk
.
get
(),
true
);
ASSERT_EQ
(
buf_allocator
->
IsAllocThreadSafe
(),
true
);
}
{
auto
buf_allocator
=
GetBufferedAllocator
(
chunk
.
get
(),
false
);
ASSERT_EQ
(
buf_allocator
->
IsAllocThreadSafe
(),
false
);
}
}
class
StubAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
};
class
StubAllocator
:
public
Allocator
{
public:
void
ResetCounter
()
{
construct_count_
=
0
;
destruct_count_
=
0
;
}
size_t
GetAllocCount
()
const
{
return
construct_count_
;
}
size_t
GetFreeCount
()
const
{
return
destruct_count_
;
}
protected:
void
FreeImpl
(
Allocation
*
allocation
)
override
{
auto
*
alloc
=
dynamic_cast
<
StubAllocation
*>
(
allocation
);
PADDLE_ENFORCE_NOT_NULL
(
alloc
);
if
(
alloc
->
ptr
())
delete
[]
static_cast
<
uint8_t
*>
(
alloc
->
ptr
());
++
destruct_count_
;
delete
allocation
;
}
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
{
++
construct_count_
;
if
(
size
==
0
)
{
return
new
StubAllocation
(
nullptr
,
0
,
platform
::
CPUPlace
());
}
else
{
return
new
StubAllocation
(
new
uint8_t
[
size
],
size
,
platform
::
CPUPlace
());
}
}
private:
size_t
construct_count_
=
0
;
size_t
destruct_count_
=
0
;
};
constexpr
size_t
kZero
=
0
;
constexpr
size_t
kOne
=
1
;
constexpr
size_t
kTwo
=
2
;
TEST
(
buffered_allocator
,
lazy_free
)
{
std
::
vector
<
int
>
original_alloc_size
({
1022
,
1023
,
1024
,
1025
,
1026
});
for
(
auto
alloc_size
:
original_alloc_size
)
{
auto
stub_allocator
=
std
::
make_shared
<
StubAllocator
>
();
auto
*
underlying_allocator
=
stub_allocator
.
get
();
auto
allocator
=
std
::
make_shared
<
MultiBinBufferedAllocator
>
(
stub_allocator
);
{
underlying_allocator
->
ResetCounter
();
auto
x
=
allocator
->
Allocate
(
alloc_size
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
x
=
nullptr
;
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
{
underlying_allocator
->
ResetCounter
();
auto
x
=
allocator
->
Allocate
(
900
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kZero
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
auto
y
=
allocator
->
Allocate
(
2048
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
x
=
nullptr
;
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
y
=
nullptr
;
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
{
underlying_allocator
->
ResetCounter
();
size_t
cache_size
=
allocator
->
ClearCache
();
ASSERT_EQ
(
cache_size
,
static_cast
<
size_t
>
(
alloc_size
+
2048
));
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kZero
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kTwo
);
}
{
underlying_allocator
->
ResetCounter
();
auto
p
=
allocator
->
Allocate
(
allocator
->
DivisionPlan
().
back
(),
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kOne
);
{
underlying_allocator
->
ResetCounter
();
auto
p
=
allocator
->
Allocate
(
allocator
->
DivisionPlan
().
back
()
-
1
,
allocator
->
kDefault
);
ASSERT_EQ
(
underlying_allocator
->
GetAllocCount
(),
kOne
);
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
ASSERT_EQ
(
underlying_allocator
->
GetFreeCount
(),
kZero
);
}
}
TEST
(
buffered_allocator
,
garbage_collection
)
{
std
::
unique_ptr
<
CPUAllocator
>
cpu_allocator
(
new
CPUAllocator
());
auto
chunk
=
cpu_allocator
->
Allocate
(
2048
,
cpu_allocator
->
kDefault
);
auto
allocator
=
GetBufferedAllocator
(
chunk
.
get
(),
false
);
auto
x1
=
allocator
->
Allocate
(
1600
,
allocator
->
kDefault
);
auto
x2
=
allocator
->
Allocate
(
400
,
allocator
->
kDefault
);
x1
=
nullptr
;
x2
=
nullptr
;
auto
x3
=
allocator
->
Allocate
(
1600
,
allocator
->
kDefault
);
ASSERT_NE
(
x3
,
nullptr
);
ASSERT_NE
(
x3
->
ptr
(),
nullptr
);
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/naive_best_fit_allocator_facade_test.cc
浏览文件 @
c20db635
...
...
@@ -22,8 +22,6 @@ DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
DECLARE_bool
(
enable_buffered_allocator
);
DECLARE_string
(
allocator_strategy
);
namespace
paddle
{
...
...
@@ -38,7 +36,6 @@ TEST(allocator, allocator) {
#endif
FLAGS_allocator_strategy
=
"naive_best_fit"
;
FLAGS_enable_buffered_allocator
=
true
;
auto
&
instance
=
AllocatorFacade
::
Instance
();
platform
::
Place
place
;
...
...
paddle/fluid/memory/allocation/test_multi_bin_buffered_allocator_division_plan.cc
已删除
100644 → 0
浏览文件 @
c75a8803
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
DECLARE_string
(
buffered_allocator_division_plan_path
);
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
buffered_allocator
,
division_plan
)
{
std
::
string
path
=
"/tmp/buffered_allocator_divison_plan"
;
FLAGS_buffered_allocator_division_plan_path
=
path
;
{
std
::
vector
<
std
::
string
>
plan
(
{
"100b"
,
"300.7K"
,
"500.3m"
,
"1.02gB"
,
"2g"
,
"4G"
});
std
::
ofstream
os
(
path
);
for
(
auto
&
p
:
plan
)
{
os
<<
p
<<
std
::
endl
;
}
os
.
close
();
}
auto
plan
=
ReadBufferedAllocatorDivisionPlanFromFile
(
FLAGS_buffered_allocator_division_plan_path
);
ASSERT_EQ
(
plan
.
size
(),
6UL
);
ASSERT_EQ
(
plan
[
0
],
100UL
);
ASSERT_EQ
(
plan
[
1
],
static_cast
<
size_t
>
(
300.7
*
1024
));
ASSERT_EQ
(
plan
[
2
],
static_cast
<
size_t
>
(
500.3
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
3
],
static_cast
<
size_t
>
(
1.02
*
1024
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
4
],
static_cast
<
size_t
>
(
2.0
*
1024
*
1024
*
1024
));
ASSERT_EQ
(
plan
[
5
],
static_cast
<
size_t
>
(
4.0
*
1024
*
1024
*
1024
));
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
c20db635
...
...
@@ -39,7 +39,6 @@ limitations under the License. */
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/multi_bin_buffered_allocator.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/py_func_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
...
...
@@ -135,8 +134,6 @@ PYBIND11_MODULE(core, m) {
paddle
::
memory
::
allocation
::
UseAllocatorStrategyGFlag
();
paddle
::
memory
::
allocation
::
UseMultiBinBufferedAllocatorGFlags
();
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
...
...
python/paddle/fluid/__init__.py
浏览文件 @
c20db635
...
...
@@ -129,9 +129,7 @@ def __bootstrap__():
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'memory_fraction_of_eager_deletion'
,
'allocator_strategy'
,
'enable_buffered_allocator'
,
'buffered_allocator_excess_times'
,
'buffered_allocator_division_plan_path'
,
'reader_queue_speed_test_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
,
'inner_op_parallelism'
,
'enable_parallel_graph'
,
'multiple_of_cupti_buffer_size'
,
'enable_subgraph_optimize'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录