Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
55b15db5
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
55b15db5
编写于
4月 08, 2019
作者:
C
chengduo
提交者:
GitHub
4月 08, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add unit test for fuse all_reduce ops (#16699)
* test fuse all_reduce
上级
ad4a1bd1
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
405 addition
and
308 deletion
+405
-308
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
...framework/details/alloc_continuous_space_for_grad_pass.cc
+317
-306
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h
.../framework/details/alloc_continuous_space_for_grad_pass.h
+79
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-0
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
...fluid/tests/unittests/test_parallel_executor_seresnext.py
+3
-2
未找到文件。
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
浏览文件 @
55b15db5
...
@@ -12,17 +12,18 @@
...
@@ -12,17 +12,18 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
#include <algorithm>
#include <algorithm>
#include <string>
#include <string>
#include <unordered_map>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
DEFINE_uint
32
(
fuse_parameter_memory_size
,
0
,
// 0 KB
DEFINE_uint
64
(
fuse_parameter_memory_size
,
0
,
// 0 KB
"fuse_parameter_memory_size is up limited memory size "
"fuse_parameter_memory_size is up limited memory size "
"of one group parameters' gradient which is the input "
"of one group parameters' gradient which is the input "
"of communication calling(e.g NCCLAllReduce). "
"of communication calling(e.g NCCLAllReduce). "
...
@@ -40,355 +41,365 @@ DEFINE_int32(
...
@@ -40,355 +41,365 @@ DEFINE_int32(
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
namespace
details
{
namespace
details
{
// SetFuseParameterGroupsSize and SetFuseParameterMemorySize are used in unit
// test, because it is invalid that seting 'FLAGS_fuse_parameter_memory_size'
// and 'FLAGS_fuse_parameter_groups_size' in unit test.
void
SetFuseParameterGroupsSize
(
int
group_size
)
{
FLAGS_fuse_parameter_groups_size
=
group_size
;
}
static
const
char
kUnKnow
[]
=
"@UNKNOW@"
;
int
GetFuseParameterGroupsSize
()
{
return
FLAGS_fuse_parameter_groups_size
;
}
static
framework
::
proto
::
VarType
::
Type
kDefaultDtype
=
framework
::
proto
::
VarType
::
Type
::
VarType_Type_BOOL
;
class
AllocContinuousSpaceForGradPass
:
public
ir
::
Pass
{
void
SetFuseParameterMemorySize
(
uint64_t
memory_size
)
{
protected:
FLAGS_fuse_parameter_memory_size
=
memory_size
;
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
{
}
ir
::
Graph
&
result
=
*
graph
;
auto
&
places
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
uint64_t
GetFuseParameterMemorySize
()
{
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
return
FLAGS_fuse_parameter_memory_size
;
}
ResetAttribute
<
ParamsAndGrads
>
(
kParamsAndGrads
,
&
result
);
static
const
char
kUnKnow
[]
=
"@UNKNOW@"
;
ResetAttribute
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
,
&
result
);
static
framework
::
proto
::
VarType
::
Type
kDefaultDtype
=
framework
::
proto
::
VarType
::
Type
::
VarType_Type_BOOL
;
// NOTE: The operator nodes should be in topology order.
void
AllocContinuousSpaceForGradPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
ir
::
Graph
&
result
=
*
graph
;
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
for
(
auto
&
node
:
topo_nodes
)
{
RecordParamsAndGrads
(
node
,
&
params_grads
);
}
if
(
params_grads
.
size
()
==
0
)
{
auto
&
places
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
VLOG
(
10
)
<<
"Doesn't find gradients"
;
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
return
;
}
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
vars
;
ResetAttribute
<
ParamsAndGrads
>
(
kParamsAndGrads
,
&
result
);
for
(
ir
::
Node
*
node
:
result
.
Nodes
())
{
ResetAttribute
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
,
&
result
);
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
// Note: The graph may have the same name node. For example, parameter
// is the input of operator and it also is the output of optimizer;
vars
.
emplace
(
node
->
Var
()
->
Name
(),
node
);
}
}
auto
&
group_grads_params
=
// NOTE: The operator nodes should be in topology order.
result
.
Get
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
);
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
for
(
auto
&
node
:
topo_nodes
)
{
RecordParamsAndGrads
(
node
,
&
params_grads
);
}
// Note: the order of params_grads may be changed by SetGroupGradsAndParams.
if
(
params_grads
.
size
()
==
0
)
{
SetGroupGradsAndParams
(
vars
,
params_grads
,
&
group_grads_params
);
VLOG
(
10
)
<<
"Doesn't find gradients"
;
return
;
}
params_grads
.
clear
();
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
vars
;
for
(
auto
&
group_p_g
:
group_grads_params
)
{
for
(
ir
::
Node
*
node
:
result
.
Nodes
())
{
params_grads
.
insert
(
params_grads
.
begin
(),
group_p_g
.
begin
(),
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
group_p_g
.
end
());
// Note: The graph may have the same name node. For example, parameter
}
// is the input of operator and it also is the output of optimizer;
for
(
auto
&
p_g
:
params_grads
)
{
vars
.
emplace
(
node
->
Var
()
->
Name
(),
node
);
std
::
swap
(
p_g
.
first
,
p_g
.
second
);
}
}
}
// Set Gradients as Persistable to prevent this var becoming reusable.
auto
&
group_grads_params
=
auto
dtype
=
kDefaultDtype
;
result
.
Get
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
);
for
(
auto
&
p_g
:
params_grads
)
{
// Get gradient var
auto
iter
=
vars
.
find
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
(),
"%s is not found."
,
p_g
.
second
);
iter
->
second
->
Var
()
->
SetPersistable
(
true
);
PADDLE_ENFORCE
(
IsSupportedVarType
(
iter
->
second
->
Var
()
->
GetType
()));
// Get Dtype
// Note: the order of params_grads may be changed by SetGroupGradsAndParams.
auto
ele_dtype
=
iter
->
second
->
Var
()
->
GetDataType
();
SetGroupGradsAndParams
(
vars
,
params_grads
,
&
group_grads_params
);
if
(
dtype
==
kDefaultDtype
)
{
dtype
=
ele_dtype
;
PADDLE_ENFORCE_NE
(
ele_dtype
,
kDefaultDtype
,
"The data type should not be bool."
);
}
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
,
"The data type of input is not consistent."
);
}
// Create a FusedVarsSet to avoid duplicating names for fused_var in other
params_grads
.
clear
();
// pass.
for
(
auto
&
group_p_g
:
group_grads_params
)
{
if
(
!
result
.
Has
(
kFusedVars
))
{
params_grads
.
insert
(
params_grads
.
begin
(),
group_p_g
.
begin
(),
result
.
Set
(
kFusedVars
,
new
FusedVars
);
group_p_g
.
end
());
}
}
// the kFusedGrads is used be fuse_optimizer_op_pass.
for
(
auto
&
p_g
:
params_grads
)
{
result
.
Set
(
kFusedGrads
,
new
FusedGrads
);
std
::
swap
(
p_g
.
first
,
p_g
.
second
);
// the fused_var_name should be unique, so it appends
// params_grads.begin()->second.
auto
fused_var_name
=
std
::
string
(
kFusedVarNamePrefix
)
+
"@GRAD@"
+
params_grads
.
begin
()
->
second
;
result
.
Get
<
FusedGrads
>
(
kFusedGrads
)
=
fused_var_name
;
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
,
"%s is duplicate in FusedVars."
,
fused_var_name
);
fused_var_set
.
insert
(
fused_var_name
);
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
vars
,
fused_var_name
,
params_grads
);
}
}
template
<
typename
AttrType
>
// Set Gradients as Persistable to prevent this var becoming reusable.
void
ResetAttribute
(
const
std
::
string
&
attr_name
,
ir
::
Graph
*
graph
)
const
{
auto
dtype
=
kDefaultDtype
;
if
(
graph
->
Has
(
attr_name
))
{
for
(
auto
&
p_g
:
params_grads
)
{
VLOG
(
10
)
<<
attr_name
<<
" is reset."
;
// Get gradient var
graph
->
Erase
(
attr_name
);
auto
iter
=
vars
.
find
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
(),
"%s is not found."
,
p_g
.
second
);
iter
->
second
->
Var
()
->
SetPersistable
(
true
);
PADDLE_ENFORCE
(
IsSupportedVarType
(
iter
->
second
->
Var
()
->
GetType
()));
// Get Dtype
auto
ele_dtype
=
iter
->
second
->
Var
()
->
GetDataType
();
if
(
dtype
==
kDefaultDtype
)
{
dtype
=
ele_dtype
;
PADDLE_ENFORCE_NE
(
ele_dtype
,
kDefaultDtype
,
"The data type should not be bool."
);
}
}
graph
->
Set
(
attr_name
,
new
AttrType
);
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
,
"The data type of input is not consistent."
);
}
}
void
SetGroupGradsAndParams
(
// Create a FusedVarsSet to avoid duplicating names for fused_var in other
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
// pass.
const
ParamsAndGrads
&
params_grads
,
if
(
!
result
.
Has
(
kFusedVars
))
{
GroupGradsAndParams
*
group_grads_params
)
const
{
result
.
Set
(
kFusedVars
,
new
FusedVars
);
SetGroupAccordingToLayers
(
var_nodes
,
params_grads
,
group_grads_params
);
SetGroupAccordingToMemorySize
(
var_nodes
,
group_grads_params
);
SetGroupAccordingToGroupSize
(
var_nodes
,
group_grads_params
);
}
}
// the kFusedGrads is used be fuse_optimizer_op_pass.
void
SetGroupAccordingToLayers
(
result
.
Set
(
kFusedGrads
,
new
FusedGrads
);
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
// the fused_var_name should be unique, so it appends
GroupGradsAndParams
*
group_grads_params
)
const
{
// params_grads.begin()->second.
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
layer_params
;
auto
fused_var_name
=
std
::
string
(
kFusedVarNamePrefix
)
+
"@GRAD@"
+
params_grads
.
begin
()
->
second
;
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
result
.
Get
<
FusedGrads
>
(
kFusedGrads
)
=
fused_var_name
;
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
if
(
pos
==
std
::
string
::
npos
)
{
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
,
layer_params
[
std
::
string
(
kUnKnow
)].
emplace_back
(
i
);
"%s is duplicate in FusedVars."
,
fused_var_name
);
}
else
{
fused_var_set
.
insert
(
fused_var_name
);
layer_params
[
params_grads
[
i
].
first
.
substr
(
0
,
pos
)].
emplace_back
(
i
);
}
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
vars
,
fused_var_name
,
params_grads
);
}
template
<
typename
AttrType
>
void
AllocContinuousSpaceForGradPass
::
ResetAttribute
(
const
std
::
string
&
attr_name
,
ir
::
Graph
*
graph
)
const
{
if
(
graph
->
Has
(
attr_name
))
{
VLOG
(
10
)
<<
attr_name
<<
" is reset."
;
graph
->
Erase
(
attr_name
);
}
graph
->
Set
(
attr_name
,
new
AttrType
);
}
void
AllocContinuousSpaceForGradPass
::
SetGroupGradsAndParams
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
{
SetGroupAccordingToLayers
(
var_nodes
,
params_grads
,
group_grads_params
);
SetGroupAccordingToMemorySize
(
var_nodes
,
group_grads_params
);
SetGroupAccordingToGroupSize
(
var_nodes
,
group_grads_params
);
}
void
AllocContinuousSpaceForGradPass
::
SetGroupAccordingToLayers
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
layer_params
;
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
if
(
pos
==
std
::
string
::
npos
)
{
layer_params
[
std
::
string
(
kUnKnow
)].
emplace_back
(
i
);
}
else
{
layer_params
[
params_grads
[
i
].
first
.
substr
(
0
,
pos
)].
emplace_back
(
i
);
}
}
}
group_grads_params
->
reserve
(
layer_params
.
size
());
group_grads_params
->
reserve
(
layer_params
.
size
());
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
std
::
string
key
=
kUnKnow
;
std
::
string
key
=
kUnKnow
;
if
(
pos
!=
std
::
string
::
npos
)
{
if
(
pos
!=
std
::
string
::
npos
)
{
key
=
params_grads
[
i
].
first
.
substr
(
0
,
pos
);
key
=
params_grads
[
i
].
first
.
substr
(
0
,
pos
);
}
auto
iter
=
layer_params
.
find
(
key
);
if
(
iter
==
layer_params
.
end
())
continue
;
group_grads_params
->
emplace_back
();
auto
&
local_group_grads_params
=
group_grads_params
->
back
();
for
(
auto
&
idx
:
iter
->
second
)
{
local_group_grads_params
.
emplace_back
(
std
::
make_pair
(
params_grads
[
idx
].
second
,
params_grads
[
idx
].
first
));
}
layer_params
.
erase
(
iter
);
}
}
auto
iter
=
layer_params
.
find
(
key
);
VLOG
(
10
)
<<
"SetGroupAccordingToLayers: "
;
if
(
iter
==
layer_params
.
end
())
continue
;
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
group_grads_params
->
emplace_back
();
std
::
stringstream
out
;
auto
&
local_group_grads_params
=
group_grads_params
->
back
();
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
for
(
auto
&
idx
:
iter
->
second
)
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
local_group_grads_params
.
emplace_back
(
}
std
::
make_pair
(
params_grads
[
idx
].
second
,
params_grads
[
idx
].
first
));
VLOG
(
10
)
<<
out
.
str
();
}
}
layer_params
.
erase
(
iter
);
}
}
void
SetGroupAccordingToMemorySize
(
VLOG
(
10
)
<<
"SetGroupAccordingToLayers: "
;
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
GroupGradsAndParams
*
group_grads_params
)
const
{
VLOG
(
10
)
<<
"group "
<<
i
;
if
(
FLAGS_fuse_parameter_memory_size
==
0
)
{
std
::
stringstream
out
;
return
;
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
}
}
size_t
group_memory_size
=
VLOG
(
10
)
<<
out
.
str
();
static_cast
<
size_t
>
(
FLAGS_fuse_parameter_memory_size
);
}
GroupGradsAndParams
local_group_grads_params
;
}
size_t
j
=
0
;
void
AllocContinuousSpaceForGradPass
::
SetGroupAccordingToMemorySize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
{
const
uint64_t
group_memory_size
=
GetFuseParameterMemorySize
();
if
(
group_memory_size
==
0
)
{
return
;
}
GroupGradsAndParams
local_group_grads_params
;
size_t
j
=
0
;
while
(
j
<
group_grads_params
->
size
())
{
local_group_grads_params
.
emplace_back
();
auto
&
group_p_g
=
local_group_grads_params
.
back
();
size_t
local_group_memory_size
=
0
;
while
(
j
<
group_grads_params
->
size
())
{
while
(
j
<
group_grads_params
->
size
())
{
local_group_grads_params
.
emplace_back
();
std
::
for_each
(
auto
&
group_p_g
=
local_group_grads_params
.
back
();
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
(),
size_t
local_group_memory_size
=
0
;
[
&
local_group_memory_size
,
while
(
j
<
group_grads_params
->
size
())
{
&
var_nodes
](
const
std
::
pair
<
std
::
string
,
std
::
string
>
&
g_p
)
{
std
::
for_each
(
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
(),
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
[
&
local_group_memory_size
,
g_p
.
second
);
&
var_nodes
](
const
std
::
pair
<
std
::
string
,
std
::
string
>
&
g_p
)
{
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
size_t
size
=
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
g_p
.
second
);
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
size_t
size
=
local_group_memory_size
+=
size
;
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
});
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
group_grads_params
->
at
(
j
).
end
());
local_group_memory_size
+=
size
;
++
j
;
});
if
(
local_group_memory_size
>=
group_memory_size
)
{
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
break
;
group_grads_params
->
at
(
j
).
end
());
++
j
;
if
(
local_group_memory_size
>=
group_memory_size
)
{
break
;
}
}
}
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
VLOG
(
10
)
<<
string
::
Sprintf
(
"SetGroupAccordingToMemorySize(memory_size: %d):"
,
FLAGS_fuse_parameter_memory_size
);
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
g_p
:
group_grads_params
->
at
(
i
))
{
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
g_p
.
second
);
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
size_t
size
=
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
out
<<
string
::
Sprintf
(
"(%s(%d), %s)"
,
g_p
.
second
,
size
,
g_p
.
first
);
}
}
VLOG
(
10
)
<<
out
.
str
();
}
}
}
}
void
SetGroupAccordingToGroupSize
(
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
{
VLOG
(
10
)
<<
string
::
Sprintf
(
"SetGroupAccordingToMemorySize(memory_size: %d):"
,
if
(
FLAGS_fuse_parameter_groups_size
==
1
)
{
group_memory_size
);
return
;
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
}
VLOG
(
10
)
<<
"group "
<<
i
;
size_t
group_size
=
static_cast
<
size_t
>
(
FLAGS_fuse_parameter_groups_size
);
std
::
stringstream
out
;
if
(
FLAGS_fuse_parameter_groups_size
==
-
1
)
{
for
(
auto
&
g_p
:
group_grads_params
->
at
(
i
))
{
group_size
=
group_grads_params
->
size
();
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
}
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
g_p
.
second
);
PADDLE_ENFORCE_GT
(
group_size
,
1
);
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
size_t
groups
=
(
group_grads_params
->
size
()
+
group_size
-
1
)
/
group_size
;
size_t
size
=
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
GroupGradsAndParams
local_group_grads_params
;
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
local_group_grads_params
.
reserve
(
groups
);
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
out
<<
string
::
Sprintf
(
"(%s(%d), %s)"
,
g_p
.
second
,
size
,
g_p
.
first
);
size_t
j
=
0
;
for
(
size_t
i
=
0
;
i
<
groups
;
++
i
)
{
local_group_grads_params
.
emplace_back
();
auto
&
group_p_g
=
local_group_grads_params
.
back
();
group_p_g
.
reserve
(
group_size
);
while
(
j
<
group_grads_params
->
size
())
{
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
());
++
j
;
if
(
j
%
group_size
==
0
)
break
;
}
}
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
VLOG
(
10
)
<<
"SetGroupAccordingToGroupSize(group_size: "
<<
group_size
<<
"): "
;
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
}
VLOG
(
10
)
<<
out
.
str
();
}
}
VLOG
(
10
)
<<
out
.
str
();
}
}
}
private:
void
AllocContinuousSpaceForGradPass
::
SetGroupAccordingToGroupSize
(
bool
IsSupportedVarType
(
const
proto
::
VarType
::
Type
&
type
)
const
{
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
// Current only support LOD_TENSOR.
GroupGradsAndParams
*
group_grads_params
)
const
{
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
if
(
GetFuseParameterGroupsSize
()
==
1
)
{
return
;
}
}
const
int
group_size
=
GetFuseParameterGroupsSize
()
==
-
1
void
RecordParamsAndGrads
(
ir
::
Node
*
node
,
?
static_cast
<
int
>
(
group_grads_params
->
size
())
ParamsAndGrads
*
params_grads
)
const
{
:
GetFuseParameterGroupsSize
();
try
{
PADDLE_ENFORCE_GT
(
group_size
,
1
);
bool
is_bk_op
=
size_t
groups
=
(
group_grads_params
->
size
()
+
group_size
-
1
)
/
group_size
;
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
node
->
Op
()
->
GetAttr
(
GroupGradsAndParams
local_group_grads_params
;
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
local_group_grads_params
.
reserve
(
groups
);
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
return
;
size_t
j
=
0
;
for
(
size_t
i
=
0
;
i
<
groups
;
++
i
)
{
// Currently, we assume that once gradient is generated, it can be
local_group_grads_params
.
emplace_back
();
// broadcast, and each gradient is only broadcast once.
auto
&
group_p_g
=
local_group_grads_params
.
back
();
auto
backward_vars
=
group_p_g
.
reserve
(
group_size
);
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
node
->
Op
()
->
GetNullableAttr
(
while
(
j
<
group_grads_params
->
size
())
{
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
static_cast
<
size_t
>
(
0
));
group_grads_params
->
at
(
j
).
end
());
++
j
;
for
(
size_t
i
=
0
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
if
(
j
%
group_size
==
0
)
break
;
VLOG
(
10
)
<<
"Trainable parameter: "
<<
backward_vars
[
i
]
<<
", gradient: "
<<
backward_vars
[
i
+
1
];
params_grads
->
emplace_back
(
std
::
make_pair
(
backward_vars
[
i
]
/*param*/
,
backward_vars
[
i
+
1
]
/*grad*/
));
}
}
catch
(
boost
::
bad_get
e
)
{
}
}
}
}
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
VLOG
(
10
)
<<
string
::
Sprintf
(
"SetGroupAccordingToGroupSize(group_size: %d):"
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
group_size
);
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
vars
,
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
const
std
::
string
&
fused_var_name
,
VLOG
(
10
)
<<
"group "
<<
i
;
const
ParamsAndGrads
&
params_grads
)
const
{
std
::
stringstream
out
;
// Init Gradients and FusedVars
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
VLOG
(
10
)
<<
"Init FusedVars and Gradients."
;
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
for
(
auto
it
=
local_scopes
.
rbegin
();
it
!=
local_scopes
.
rend
();
++
it
)
{
}
auto
&
scope
=
*
it
;
VLOG
(
10
)
<<
out
.
str
();
}
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
}
"%s has existed in scope."
,
fused_var_name
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
bool
AllocContinuousSpaceForGradPass
::
IsSupportedVarType
(
const
proto
::
VarType
::
Type
&
type
)
const
{
for
(
auto
&
p_g
:
params_grads
)
{
// Current only support LOD_TENSOR.
auto
iter
=
vars
.
find
(
p_g
.
second
);
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
PADDLE_ENFORCE
(
iter
!=
vars
.
end
());
}
PADDLE_ENFORCE_NOT_NULL
(
iter
->
second
->
Var
());
PADDLE_ENFORCE_EQ
(
iter
->
second
->
Var
()
->
GetType
(),
void
AllocContinuousSpaceForGradPass
::
RecordParamsAndGrads
(
proto
::
VarType
::
LOD_TENSOR
);
ir
::
Node
*
node
,
ParamsAndGrads
*
params_grads
)
const
{
scope
->
Var
(
p_g
.
second
)
->
GetMutable
<
LoDTensor
>
();
try
{
}
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
return
;
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
node
->
Op
()
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
static_cast
<
size_t
>
(
0
));
for
(
size_t
i
=
0
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
VLOG
(
10
)
<<
"Trainable parameter: "
<<
backward_vars
[
i
]
<<
", gradient: "
<<
backward_vars
[
i
+
1
];
params_grads
->
emplace_back
(
std
::
make_pair
(
backward_vars
[
i
]
/*param*/
,
backward_vars
[
i
+
1
]
/*grad*/
));
}
}
}
catch
(
boost
::
bad_get
e
)
{
}
}
void
AllocContinuousSpaceForGradPass
::
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
vars
,
const
std
::
string
&
fused_var_name
,
const
ParamsAndGrads
&
params_grads
)
const
{
// Init Gradients and FusedVars
VLOG
(
10
)
<<
"Init FusedVars and Gradients."
;
for
(
auto
it
=
local_scopes
.
rbegin
();
it
!=
local_scopes
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
"%s has existed in scope."
,
fused_var_name
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
// Alloc continuous space for vars.
std
::
vector
<
std
::
string
>
grads_name
;
std
::
vector
<
std
::
string
>
params_name
;
grads_name
.
reserve
(
params_grads
.
size
());
params_name
.
reserve
(
params_grads
.
size
());
for
(
auto
&
p_g
:
params_grads
)
{
for
(
auto
&
p_g
:
params_grads
)
{
params_name
.
emplace_back
(
p_g
.
first
);
auto
iter
=
vars
.
find
(
p_g
.
second
);
grads_name
.
emplace_back
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
());
}
PADDLE_ENFORCE_NOT_NULL
(
iter
->
second
->
Var
());
framework
::
ProgramDesc
program_desc
;
PADDLE_ENFORCE_EQ
(
iter
->
second
->
Var
()
->
GetType
(),
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
proto
::
VarType
::
LOD_TENSOR
);
program_desc
.
MutableBlock
(
0
));
scope
->
Var
(
p_g
.
second
)
->
GetMutable
<
LoDTensor
>
();
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
}
}
}
}
}
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
// Alloc continuous space for vars.
const
std
::
vector
<
std
::
string
>
&
grads_name
,
std
::
vector
<
std
::
string
>
grads_name
;
const
std
::
string
&
fused_var_name
,
std
::
vector
<
std
::
string
>
params_name
;
BlockDesc
*
global_block
)
const
{
grads_name
.
reserve
(
params_grads
.
size
());
auto
op_desc
=
global_block
->
AppendOp
();
params_name
.
reserve
(
params_grads
.
size
());
op_desc
->
SetType
(
"alloc_continuous_space"
);
for
(
auto
&
p_g
:
params_grads
)
{
op_desc
->
SetInput
(
"Input"
,
params_name
);
params_name
.
emplace_back
(
p_g
.
first
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
grads_name
.
emplace_back
(
p_g
.
second
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
}
framework
::
ProgramDesc
program_desc
;
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
program_desc
.
MutableBlock
(
0
));
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
}
}
}
};
}
void
AllocContinuousSpaceForGradPass
::
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
}
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h
0 → 100644
浏览文件 @
55b15db5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
void
SetFuseParameterGroupsSize
(
int
group_size
);
int
GetFuseParameterGroupsSize
();
void
SetFuseParameterMemorySize
(
uint64_t
memory_size
);
uint64_t
GetFuseParameterMemorySize
();
class
AllocContinuousSpaceForGradPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
template
<
typename
AttrType
>
void
ResetAttribute
(
const
std
::
string
&
attr_name
,
ir
::
Graph
*
graph
)
const
;
void
SetGroupGradsAndParams
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
;
void
SetGroupAccordingToLayers
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
;
void
SetGroupAccordingToMemorySize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
;
void
SetGroupAccordingToGroupSize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
;
private:
bool
IsSupportedVarType
(
const
proto
::
VarType
::
Type
&
type
)
const
;
void
RecordParamsAndGrads
(
ir
::
Node
*
node
,
ParamsAndGrads
*
params_grads
)
const
;
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
vars
,
const
std
::
string
&
fused_var_name
,
const
ParamsAndGrads
&
params_grads
)
const
;
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
55b15db5
...
@@ -21,6 +21,7 @@ limitations under the License. */
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
...
@@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) {
...
@@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) {
// to enable eager deletion mode in unittest.
// to enable eager deletion mode in unittest.
m
.
def
(
"_set_eager_deletion_mode"
,
&
paddle
::
framework
::
SetEagerDeletionMode
);
m
.
def
(
"_set_eager_deletion_mode"
,
&
paddle
::
framework
::
SetEagerDeletionMode
);
m
.
def
(
"_set_fuse_parameter_group_size"
,
&
paddle
::
framework
::
details
::
SetFuseParameterGroupsSize
);
m
.
def
(
"_set_fuse_parameter_memory_size"
,
&
paddle
::
framework
::
details
::
SetFuseParameterMemorySize
);
m
.
add_object
(
"_cleanup"
,
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
浏览文件 @
55b15db5
...
@@ -14,10 +14,11 @@
...
@@ -14,10 +14,11 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
os
os
.
environ
[
'FLAGS_fuse_parameter_memory_size'
]
=
"131072"
os
.
environ
[
'FLAGS_fuse_parameter_groups_size'
]
=
"3"
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_fuse_parameter_group_size
(
3
)
fluid
.
core
.
_set_fuse_parameter_memory_size
(
131072
)
import
paddle.fluid.layers.ops
as
ops
import
paddle.fluid.layers.ops
as
ops
from
paddle.fluid.initializer
import
init_on_cpu
from
paddle.fluid.initializer
import
init_on_cpu
from
paddle.fluid.layers.learning_rate_scheduler
import
_decay_step_counter
from
paddle.fluid.layers.learning_rate_scheduler
import
_decay_step_counter
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录