Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e893cbd2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e893cbd2
编写于
3月 18, 2019
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add auto increment best fit allocator
test=develop
上级
a7a4f053
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
361 addition
and
11 deletion
+361
-11
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+4
-0
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+48
-7
paddle/fluid/memory/allocation/allocator_strategy.cc
paddle/fluid/memory/allocation/allocator_strategy.cc
+11
-3
paddle/fluid/memory/allocation/allocator_strategy.h
paddle/fluid/memory/allocation/allocator_strategy.h
+1
-1
paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.cc
...id/memory/allocation/auto_increment_best_fit_allocator.cc
+136
-0
paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.h
...uid/memory/allocation/auto_increment_best_fit_allocator.h
+87
-0
paddle/fluid/memory/allocation/auto_increment_best_fit_allocator_test.cc
...mory/allocation/auto_increment_best_fit_allocator_test.cc
+74
-0
未找到文件。
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
e893cbd2
...
@@ -8,6 +8,9 @@ cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_alloca
...
@@ -8,6 +8,9 @@ cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_alloca
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
cc_test
(
multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator
)
cc_test
(
multi_bin_buffered_allocator_test SRCS multi_bin_buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator multi_bin_buffered_allocator cpu_allocator
)
cc_library
(
auto_increment_best_fit_allocator SRCS auto_increment_best_fit_allocator.cc DEPS allocator
)
cc_test
(
auto_increment_best_fit_allocator_test SRCS auto_increment_best_fit_allocator_test.cc DEPS cpu_allocator auto_increment_best_fit_allocator
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
endif
()
endif
()
...
@@ -56,6 +59,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
...
@@ -56,6 +59,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
retry_allocator
retry_allocator
buffered_allocator
buffered_allocator
multi_bin_buffered_allocator
multi_bin_buffered_allocator
auto_increment_best_fit_allocator
allocator_strategy
allocator_strategy
legacy_allocator
legacy_allocator
)
)
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
e893cbd2
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
...
@@ -195,17 +196,57 @@ class AllocatorFacadePrivate {
...
@@ -195,17 +196,57 @@ class AllocatorFacadePrivate {
~
AllocatorFacadePrivate
()
=
default
;
~
AllocatorFacadePrivate
()
=
default
;
AllocatorFacadePrivate
()
{
AllocatorFacadePrivate
()
{
if
(
GetAllocatorStrategy
()
==
AllocatorStrategy
::
kLegacy
)
{
auto
strategy
=
GetAllocatorStrategy
();
switch
(
strategy
)
{
case
AllocatorStrategy
::
kLegacy
:
{
InitLegacyAllocator
();
InitLegacyAllocator
();
}
else
{
break
;
}
case
AllocatorStrategy
::
kNaiveBestFit
:
{
InitCPUAllocator
();
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
WrapZeroSizeAllocator
();
break
;
}
case
AllocatorStrategy
::
kAutoGrowthBestFit
:
{
InitCPUAllocator
();
InitAutoGrowthCUDAAllocator
();
InitAutoGrowthCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported allocator strategy: %d"
,
static_cast
<
int
>
(
strategy
));
}
}
}
}
}
private:
private:
void
InitAutoGrowthCUDAAllocator
()
{
#ifdef PADDLE_WITH_CUDA
int
dev_cnt
=
platform
::
GetCUDADeviceCount
();
for
(
int
dev_id
=
0
;
dev_id
<
dev_cnt
;
++
dev_id
)
{
auto
cuda_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CUDAAllocator
>
(
platform
::
CUDAPlace
(
dev_id
)));
allocators_
[
platform
::
CUDAPlace
(
dev_id
)]
=
std
::
make_shared
<
AutoIncrementBestFitAllocator
>
(
cuda_allocator
,
platform
::
GpuMaxChunkSize
(),
4096
);
}
#endif
}
void
InitAutoGrowthCUDAPinnedAllocator
()
{
#ifdef PADDLE_WITH_CUDA
auto
cuda_pinned_allocator
=
std
::
make_shared
<
AlignedAllocator
<
4096
>>
(
std
::
make_shared
<
CPUPinnedAllocator
>
());
allocators_
[
platform
::
CUDAPinnedPlace
()]
=
std
::
make_shared
<
AutoIncrementBestFitAllocator
>
(
cuda_pinned_allocator
,
platform
::
CUDAPinnedMaxChunkSize
(),
4096
);
#endif
}
void
InitLegacyAllocator
()
{
void
InitLegacyAllocator
()
{
std
::
vector
<
platform
::
Place
>
places
{
platform
::
CPUPlace
()};
std
::
vector
<
platform
::
Place
>
places
{
platform
::
CPUPlace
()};
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/memory/allocation/allocator_strategy.cc
浏览文件 @
e893cbd2
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
DEFINE_string
(
allocator_strategy
,
"legacy"
,
allocator_strategy
,
"legacy"
,
...
@@ -25,9 +26,16 @@ namespace memory {
...
@@ -25,9 +26,16 @@ namespace memory {
namespace
allocation
{
namespace
allocation
{
static
AllocatorStrategy
GetStrategyFromFlag
()
{
static
AllocatorStrategy
GetStrategyFromFlag
()
{
return
FLAGS_allocator_strategy
==
"legacy"
if
(
FLAGS_allocator_strategy
==
"legacy"
)
{
?
AllocatorStrategy
::
kLegacy
return
AllocatorStrategy
::
kLegacy
;
:
AllocatorStrategy
::
kNaiveBestFit
;
}
else
if
(
FLAGS_allocator_strategy
==
"navie_best_fit"
)
{
return
AllocatorStrategy
::
kNaiveBestFit
;
}
else
if
(
FLAGS_allocator_strategy
==
"auto_growth_best_fit"
)
{
return
AllocatorStrategy
::
kAutoGrowthBestFit
;
}
else
{
PADDLE_THROW
(
"Unsupported allocator strategy: %s"
,
FLAGS_allocator_strategy
);
}
}
}
AllocatorStrategy
GetAllocatorStrategy
()
{
AllocatorStrategy
GetAllocatorStrategy
()
{
...
...
paddle/fluid/memory/allocation/allocator_strategy.h
浏览文件 @
e893cbd2
...
@@ -18,7 +18,7 @@ namespace paddle {
...
@@ -18,7 +18,7 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
enum
class
AllocatorStrategy
{
kLegacy
,
kNaiveBestFit
};
enum
class
AllocatorStrategy
{
kLegacy
,
kNaiveBestFit
,
kAutoGrowthBestFit
};
extern
AllocatorStrategy
GetAllocatorStrategy
();
extern
AllocatorStrategy
GetAllocatorStrategy
();
...
...
paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.cc
0 → 100644
浏览文件 @
e893cbd2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.h"
#include <algorithm>
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <unordered_map>
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
static
size_t
align
(
size_t
size
,
size_t
alignment
)
{
auto
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
alignment
-
remaining
;
}
AutoIncrementBestFitAllocator
::
AutoIncrementBestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
)
:
underlying_allocator_
(
underlying_allocator
),
chunk_size_
(
align
(
chunk_size
,
alignment
)),
alignment_
(
alignment
)
{}
Allocation
*
AutoIncrementBestFitAllocator
::
AllocateImpl
(
size_t
size
,
Attr
attr
)
{
if
(
size
==
0
)
return
nullptr
;
size
=
align
(
size
,
alignment_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
auto
iter
=
free_blocks_
.
lower_bound
(
std
::
make_pair
(
size
,
nullptr
));
BlockIt
block_it
;
if
(
iter
!=
free_blocks_
.
end
())
{
VLOG
(
2
)
<<
"Found "
<<
iter
->
second
->
size_
<<
" for "
<<
size
;
block_it
=
iter
->
second
;
free_blocks_
.
erase
(
iter
);
auto
*
chunk
=
block_it
->
chunk_
;
size_t
remaining_size
=
block_it
->
size_
-
size
;
if
(
remaining_size
==
0
)
{
block_it
->
is_free_
=
false
;
VLOG
(
2
)
<<
"Found and no remaining"
;
}
else
{
auto
remaining_free_block
=
chunk
->
blocks_
.
insert
(
block_it
,
Chunk
::
Block
(
block_it
->
ptr_
,
remaining_size
,
true
,
chunk
));
free_blocks_
.
emplace
(
std
::
make_pair
(
remaining_size
,
block_it
->
ptr_
),
remaining_free_block
);
block_it
->
ptr_
=
reinterpret_cast
<
uint8_t
*>
(
block_it
->
ptr_
)
+
remaining_size
;
block_it
->
size_
=
size
;
block_it
->
is_free_
=
false
;
VLOG
(
2
)
<<
"Found and remaining "
<<
remaining_size
;
}
}
else
{
size_t
alloc_size
=
size
;
if
(
!
underlying_allocator_exhaustive_
&&
chunk_size_
>
size
)
{
alloc_size
=
chunk_size_
;
}
try
{
chunks_
.
emplace_back
(
underlying_allocator_
->
Allocate
(
alloc_size
,
attr
));
}
catch
(
BadAlloc
&
ex
)
{
if
(
size
==
alloc_size
)
throw
ex
;
underlying_allocator_exhaustive_
=
true
;
alloc_size
=
size
;
chunks_
.
emplace_back
(
underlying_allocator_
->
Allocate
(
alloc_size
,
attr
));
}
auto
*
chunk
=
&
(
*
chunks_
.
rbegin
());
uint8_t
*
p
=
reinterpret_cast
<
uint8_t
*>
(
chunk
->
allocation_
->
ptr
());
auto
&
blocks
=
chunk
->
blocks_
;
size_t
remaining_size
=
alloc_size
-
size
;
if
(
remaining_size
>
0
)
{
blocks
.
emplace_back
(
p
,
remaining_size
,
true
,
chunk
);
free_blocks_
.
emplace
(
std
::
make_pair
(
remaining_size
,
p
),
--
(
blocks
.
end
()));
}
blocks
.
emplace_back
(
p
+
remaining_size
,
size
,
false
,
chunk
);
block_it
=
--
(
blocks
.
end
());
VLOG
(
2
)
<<
"Not found and allocate "
<<
alloc_size
<<
", and remaining "
<<
remaining_size
;
}
VLOG
(
2
)
<<
"After allocate, free blocks "
<<
free_blocks_
.
size
();
return
new
Chunk
::
BlockAllocation
(
block_it
);
}
void
AutoIncrementBestFitAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
auto
&
block_it
=
static_cast
<
Chunk
::
BlockAllocation
*>
(
allocation
)
->
block_it_
;
auto
&
blocks
=
block_it
->
chunk_
->
blocks_
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
block_it
->
is_free_
=
true
;
if
(
block_it
!=
blocks
.
begin
())
{
auto
prev_it
=
block_it
;
--
prev_it
;
if
(
prev_it
->
is_free_
)
{
free_blocks_
.
erase
(
std
::
make_pair
(
prev_it
->
size_
,
prev_it
->
ptr_
));
prev_it
->
size_
+=
block_it
->
size_
;
blocks
.
erase
(
block_it
);
block_it
=
prev_it
;
}
}
auto
next_it
=
block_it
;
++
next_it
;
if
(
next_it
!=
blocks
.
end
()
&&
next_it
->
is_free_
)
{
free_blocks_
.
erase
(
std
::
make_pair
(
next_it
->
size_
,
next_it
->
ptr_
));
block_it
->
size_
+=
next_it
->
size_
;
blocks
.
erase
(
next_it
);
}
free_blocks_
.
emplace
(
std
::
make_pair
(
block_it
->
size_
,
block_it
->
ptr_
),
block_it
);
VLOG
(
2
)
<<
"Combine "
<<
block_it
->
size_
<<
", "
<<
blocks
.
size
()
<<
", "
<<
free_blocks_
.
size
();
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.h
0 → 100644
浏览文件 @
e893cbd2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <list>
#include <map>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
AutoIncrementBestFitAllocator
:
public
Allocator
{
public:
explicit
AutoIncrementBestFitAllocator
(
const
std
::
shared_ptr
<
Allocator
>
&
underlying_allocator
,
size_t
chunk_size
,
size_t
alignment
);
bool
IsAllocThreadSafe
()
const
override
{
return
true
;
}
using
AllocationList
=
std
::
list
<
AllocationPtr
>
;
using
AllocationListIt
=
AllocationList
::
iterator
;
struct
Chunk
{
struct
Block
{
Block
(
void
*
ptr
,
size_t
size
,
bool
is_free
,
Chunk
*
chunk
)
:
ptr_
(
ptr
),
size_
(
size
),
is_free_
(
is_free
),
chunk_
(
chunk
)
{}
void
*
ptr_
;
size_t
size_
;
bool
is_free_
;
Chunk
*
chunk_
;
// which chunk it is from
};
explicit
Chunk
(
AllocationPtr
allocation
)
:
allocation_
(
std
::
move
(
allocation
))
{}
AllocationPtr
allocation_
;
std
::
list
<
Block
>
blocks_
;
// std::mutex mtx_;
struct
BlockAllocation
:
public
Allocation
{
explicit
BlockAllocation
(
const
std
::
list
<
Block
>::
iterator
&
it
)
:
Allocation
(
it
->
ptr_
,
it
->
size_
,
it
->
chunk_
->
allocation_
->
place
()),
block_it_
(
it
)
{}
std
::
list
<
Block
>::
iterator
block_it_
;
};
};
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Attr
attr
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
private:
using
BlockIt
=
std
::
list
<
Chunk
::
Block
>::
iterator
;
std
::
shared_ptr
<
Allocator
>
underlying_allocator_
;
std
::
list
<
Chunk
>
chunks_
;
std
::
map
<
std
::
pair
<
size_t
,
void
*>
,
BlockIt
>
free_blocks_
;
size_t
chunk_size_
;
size_t
alignment_
;
bool
underlying_allocator_exhaustive_
{
false
};
mutable
std
::
mutex
mtx_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/auto_increment_best_fit_allocator_test.cc
0 → 100644
浏览文件 @
e893cbd2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <condition_variable> // NOLINT
#include <mutex> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <iostream>
#include "paddle/fluid/memory/allocation/auto_increment_best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
TEST
(
allocator
,
auto_increment_best_fit_allocator
)
{
auto
cpu_allocator
=
std
::
make_shared
<
CPUAllocator
>
();
auto
allocator
=
std
::
make_shared
<
AutoIncrementBestFitAllocator
>
(
cpu_allocator
,
0
,
4096
);
std
::
mutex
mtx
;
std
::
condition_variable
cv
;
bool
flag
=
false
;
auto
thread_main
=
[
&
]
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mtx
);
cv
.
wait
(
lock
,
[
&
]
{
return
flag
;
});
}
for
(
size_t
i
=
10
;
i
>
0
;
--
i
)
{
allocator
->
Allocate
((
i
+
1
)
*
1000
);
}
};
std
::
vector
<
std
::
thread
>
ths
;
for
(
size_t
i
=
10
;
i
<
10
;
++
i
)
{
ths
.
emplace_back
(
thread_main
);
}
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx
);
flag
=
true
;
}
cv
.
notify_all
();
thread_main
();
for
(
auto
&
th
:
ths
)
{
th
.
join
();
}
std
::
cout
<<
"test ends"
<<
std
::
endl
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录