Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
55b15db5
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
55b15db5
编写于
4月 08, 2019
作者:
C
chengduo
提交者:
GitHub
4月 08, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add unit test for fuse all_reduce ops (#16699)
* test fuse all_reduce
上级
ad4a1bd1
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
405 addition
and
308 deletion
+405
-308
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
...framework/details/alloc_continuous_space_for_grad_pass.cc
+317
-306
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h
.../framework/details/alloc_continuous_space_for_grad_pass.h
+79
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-0
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
...fluid/tests/unittests/test_parallel_executor_seresnext.py
+3
-2
未找到文件。
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc
浏览文件 @
55b15db5
...
...
@@ -12,17 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
DEFINE_uint
32
(
fuse_parameter_memory_size
,
0
,
// 0 KB
DEFINE_uint
64
(
fuse_parameter_memory_size
,
0
,
// 0 KB
"fuse_parameter_memory_size is up limited memory size "
"of one group parameters' gradient which is the input "
"of communication calling(e.g NCCLAllReduce). "
...
...
@@ -40,355 +41,365 @@ DEFINE_int32(
namespace
paddle
{
namespace
framework
{
namespace
details
{
// SetFuseParameterGroupsSize and SetFuseParameterMemorySize are used in unit
// test, because it is invalid that seting 'FLAGS_fuse_parameter_memory_size'
// and 'FLAGS_fuse_parameter_groups_size' in unit test.
void
SetFuseParameterGroupsSize
(
int
group_size
)
{
FLAGS_fuse_parameter_groups_size
=
group_size
;
}
static
const
char
kUnKnow
[]
=
"@UNKNOW@"
;
static
framework
::
proto
::
VarType
::
Type
kDefaultDtype
=
framework
::
proto
::
VarType
::
Type
::
VarType_Type_BOOL
;
int
GetFuseParameterGroupsSize
()
{
return
FLAGS_fuse_parameter_groups_size
;
}
class
AllocContinuousSpaceForGradPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
{
ir
::
Graph
&
result
=
*
graph
;
void
SetFuseParameterMemorySize
(
uint64_t
memory_size
)
{
FLAGS_fuse_parameter_memory_size
=
memory_size
;
}
auto
&
places
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
uint64_t
GetFuseParameterMemorySize
()
{
return
FLAGS_fuse_parameter_memory_size
;
}
ResetAttribute
<
ParamsAndGrads
>
(
kParamsAndGrads
,
&
result
);
ResetAttribute
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
,
&
result
);
static
const
char
kUnKnow
[]
=
"@UNKNOW@"
;
static
framework
::
proto
::
VarType
::
Type
kDefaultDtype
=
framework
::
proto
::
VarType
::
Type
::
VarType_Type_BOOL
;
// NOTE: The operator nodes should be in topology order.
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
for
(
auto
&
node
:
topo_nodes
)
{
RecordParamsAndGrads
(
node
,
&
params_grads
);
}
void
AllocContinuousSpaceForGradPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
ir
::
Graph
&
result
=
*
graph
;
if
(
params_grads
.
size
()
==
0
)
{
VLOG
(
10
)
<<
"Doesn't find gradients"
;
return
;
}
auto
&
places
=
Get
<
const
std
::
vector
<
platform
::
Place
>>
(
kPlaces
);
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
vars
;
for
(
ir
::
Node
*
node
:
result
.
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
// Note: The graph may have the same name node. For example, parameter
// is the input of operator and it also is the output of optimizer;
vars
.
emplace
(
node
->
Var
()
->
Name
(),
node
);
}
}
ResetAttribute
<
ParamsAndGrads
>
(
kParamsAndGrads
,
&
result
);
ResetAttribute
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
,
&
result
);
auto
&
group_grads_params
=
result
.
Get
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
);
// NOTE: The operator nodes should be in topology order.
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
for
(
auto
&
node
:
topo_nodes
)
{
RecordParamsAndGrads
(
node
,
&
params_grads
);
}
// Note: the order of params_grads may be changed by SetGroupGradsAndParams.
SetGroupGradsAndParams
(
vars
,
params_grads
,
&
group_grads_params
);
if
(
params_grads
.
size
()
==
0
)
{
VLOG
(
10
)
<<
"Doesn't find gradients"
;
return
;
}
params_grads
.
clear
();
for
(
auto
&
group_p_g
:
group_grads_params
)
{
params_grads
.
insert
(
params_grads
.
begin
(),
group_p_g
.
begin
(),
group_p_g
.
end
());
}
for
(
auto
&
p_g
:
params_grads
)
{
std
::
swap
(
p_g
.
first
,
p_g
.
second
);
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
vars
;
for
(
ir
::
Node
*
node
:
result
.
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
// Note: The graph may have the same name node. For example, parameter
// is the input of operator and it also is the output of optimizer;
vars
.
emplace
(
node
->
Var
()
->
Name
(),
node
);
}
}
// Set Gradients as Persistable to prevent this var becoming reusable.
auto
dtype
=
kDefaultDtype
;
for
(
auto
&
p_g
:
params_grads
)
{
// Get gradient var
auto
iter
=
vars
.
find
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
(),
"%s is not found."
,
p_g
.
second
);
iter
->
second
->
Var
()
->
SetPersistable
(
true
);
PADDLE_ENFORCE
(
IsSupportedVarType
(
iter
->
second
->
Var
()
->
GetType
()));
auto
&
group_grads_params
=
result
.
Get
<
GroupGradsAndParams
>
(
kGroupGradsAndParams
);
// Get Dtype
auto
ele_dtype
=
iter
->
second
->
Var
()
->
GetDataType
();
if
(
dtype
==
kDefaultDtype
)
{
dtype
=
ele_dtype
;
PADDLE_ENFORCE_NE
(
ele_dtype
,
kDefaultDtype
,
"The data type should not be bool."
);
}
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
,
"The data type of input is not consistent."
);
}
// Note: the order of params_grads may be changed by SetGroupGradsAndParams.
SetGroupGradsAndParams
(
vars
,
params_grads
,
&
group_grads_params
);
// Create a FusedVarsSet to avoid duplicating names for fused_var in other
// pass.
if
(
!
result
.
Has
(
kFusedVars
))
{
result
.
Set
(
kFusedVars
,
new
FusedVars
);
}
// the kFusedGrads is used be fuse_optimizer_op_pass.
result
.
Set
(
kFusedGrads
,
new
FusedGrads
);
// the fused_var_name should be unique, so it appends
// params_grads.begin()->second.
auto
fused_var_name
=
std
::
string
(
kFusedVarNamePrefix
)
+
"@GRAD@"
+
params_grads
.
begin
()
->
second
;
result
.
Get
<
FusedGrads
>
(
kFusedGrads
)
=
fused_var_name
;
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
,
"%s is duplicate in FusedVars."
,
fused_var_name
);
fused_var_set
.
insert
(
fused_var_name
);
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
vars
,
fused_var_name
,
params_grads
);
params_grads
.
clear
();
for
(
auto
&
group_p_g
:
group_grads_params
)
{
params_grads
.
insert
(
params_grads
.
begin
(),
group_p_g
.
begin
(),
group_p_g
.
end
());
}
for
(
auto
&
p_g
:
params_grads
)
{
std
::
swap
(
p_g
.
first
,
p_g
.
second
);
}
template
<
typename
AttrType
>
void
ResetAttribute
(
const
std
::
string
&
attr_name
,
ir
::
Graph
*
graph
)
const
{
if
(
graph
->
Has
(
attr_name
))
{
VLOG
(
10
)
<<
attr_name
<<
" is reset."
;
graph
->
Erase
(
attr_name
);
// Set Gradients as Persistable to prevent this var becoming reusable.
auto
dtype
=
kDefaultDtype
;
for
(
auto
&
p_g
:
params_grads
)
{
// Get gradient var
auto
iter
=
vars
.
find
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
(),
"%s is not found."
,
p_g
.
second
);
iter
->
second
->
Var
()
->
SetPersistable
(
true
);
PADDLE_ENFORCE
(
IsSupportedVarType
(
iter
->
second
->
Var
()
->
GetType
()));
// Get Dtype
auto
ele_dtype
=
iter
->
second
->
Var
()
->
GetDataType
();
if
(
dtype
==
kDefaultDtype
)
{
dtype
=
ele_dtype
;
PADDLE_ENFORCE_NE
(
ele_dtype
,
kDefaultDtype
,
"The data type should not be bool."
);
}
graph
->
Set
(
attr_name
,
new
AttrType
);
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
,
"The data type of input is not consistent."
);
}
void
SetGroupGradsAndParams
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
{
SetGroupAccordingToLayers
(
var_nodes
,
params_grads
,
group_grads_params
);
SetGroupAccordingToMemorySize
(
var_nodes
,
group_grads_params
);
SetGroupAccordingToGroupSize
(
var_nodes
,
group_grads_params
);
// Create a FusedVarsSet to avoid duplicating names for fused_var in other
// pass.
if
(
!
result
.
Has
(
kFusedVars
))
{
result
.
Set
(
kFusedVars
,
new
FusedVars
);
}
void
SetGroupAccordingToLayers
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
layer_params
;
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
if
(
pos
==
std
::
string
::
npos
)
{
layer_params
[
std
::
string
(
kUnKnow
)].
emplace_back
(
i
);
}
else
{
layer_params
[
params_grads
[
i
].
first
.
substr
(
0
,
pos
)].
emplace_back
(
i
);
}
// the kFusedGrads is used be fuse_optimizer_op_pass.
result
.
Set
(
kFusedGrads
,
new
FusedGrads
);
// the fused_var_name should be unique, so it appends
// params_grads.begin()->second.
auto
fused_var_name
=
std
::
string
(
kFusedVarNamePrefix
)
+
"@GRAD@"
+
params_grads
.
begin
()
->
second
;
result
.
Get
<
FusedGrads
>
(
kFusedGrads
)
=
fused_var_name
;
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
PADDLE_ENFORCE_EQ
(
fused_var_set
.
count
(
fused_var_name
),
0
,
"%s is duplicate in FusedVars."
,
fused_var_name
);
fused_var_set
.
insert
(
fused_var_name
);
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
vars
,
fused_var_name
,
params_grads
);
}
template
<
typename
AttrType
>
void
AllocContinuousSpaceForGradPass
::
ResetAttribute
(
const
std
::
string
&
attr_name
,
ir
::
Graph
*
graph
)
const
{
if
(
graph
->
Has
(
attr_name
))
{
VLOG
(
10
)
<<
attr_name
<<
" is reset."
;
graph
->
Erase
(
attr_name
);
}
graph
->
Set
(
attr_name
,
new
AttrType
);
}
void
AllocContinuousSpaceForGradPass
::
SetGroupGradsAndParams
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
{
SetGroupAccordingToLayers
(
var_nodes
,
params_grads
,
group_grads_params
);
SetGroupAccordingToMemorySize
(
var_nodes
,
group_grads_params
);
SetGroupAccordingToGroupSize
(
var_nodes
,
group_grads_params
);
}
void
AllocContinuousSpaceForGradPass
::
SetGroupAccordingToLayers
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
layer_params
;
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
if
(
pos
==
std
::
string
::
npos
)
{
layer_params
[
std
::
string
(
kUnKnow
)].
emplace_back
(
i
);
}
else
{
layer_params
[
params_grads
[
i
].
first
.
substr
(
0
,
pos
)].
emplace_back
(
i
);
}
}
group_grads_params
->
reserve
(
layer_params
.
size
());
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
std
::
string
key
=
kUnKnow
;
if
(
pos
!=
std
::
string
::
npos
)
{
key
=
params_grads
[
i
].
first
.
substr
(
0
,
pos
);
}
auto
iter
=
layer_params
.
find
(
key
);
if
(
iter
==
layer_params
.
end
())
continue
;
group_grads_params
->
emplace_back
();
auto
&
local_group_grads_params
=
group_grads_params
->
back
();
for
(
auto
&
idx
:
iter
->
second
)
{
local_group_grads_params
.
emplace_back
(
std
::
make_pair
(
params_grads
[
idx
].
second
,
params_grads
[
idx
].
first
));
}
layer_params
.
erase
(
iter
);
group_grads_params
->
reserve
(
layer_params
.
size
());
for
(
size_t
i
=
0
;
i
<
params_grads
.
size
();
++
i
)
{
auto
pos
=
params_grads
[
i
].
first
.
find_first_of
(
"."
);
std
::
string
key
=
kUnKnow
;
if
(
pos
!=
std
::
string
::
npos
)
{
key
=
params_grads
[
i
].
first
.
substr
(
0
,
pos
);
}
VLOG
(
10
)
<<
"SetGroupAccordingToLayers: "
;
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
}
VLOG
(
10
)
<<
out
.
str
();
auto
iter
=
layer_params
.
find
(
key
);
if
(
iter
==
layer_params
.
end
())
continue
;
group_grads_params
->
emplace_back
();
auto
&
local_group_grads_params
=
group_grads_params
->
back
();
for
(
auto
&
idx
:
iter
->
second
)
{
local_group_grads_params
.
emplace_back
(
std
::
make_pair
(
params_grads
[
idx
].
second
,
params_grads
[
idx
].
first
));
}
layer_params
.
erase
(
iter
);
}
void
SetGroupAccordingToMemorySize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
{
if
(
FLAGS_fuse_parameter_memory_size
==
0
)
{
return
;
VLOG
(
10
)
<<
"SetGroupAccordingToLayers: "
;
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
}
size_t
group_memory_size
=
static_cast
<
size_t
>
(
FLAGS_fuse_parameter_memory_size
);
GroupGradsAndParams
local_group_grads_params
;
size_t
j
=
0
;
VLOG
(
10
)
<<
out
.
str
();
}
}
void
AllocContinuousSpaceForGradPass
::
SetGroupAccordingToMemorySize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
{
const
uint64_t
group_memory_size
=
GetFuseParameterMemorySize
();
if
(
group_memory_size
==
0
)
{
return
;
}
GroupGradsAndParams
local_group_grads_params
;
size_t
j
=
0
;
while
(
j
<
group_grads_params
->
size
())
{
local_group_grads_params
.
emplace_back
();
auto
&
group_p_g
=
local_group_grads_params
.
back
();
size_t
local_group_memory_size
=
0
;
while
(
j
<
group_grads_params
->
size
())
{
local_group_grads_params
.
emplace_back
();
auto
&
group_p_g
=
local_group_grads_params
.
back
();
size_t
local_group_memory_size
=
0
;
while
(
j
<
group_grads_params
->
size
())
{
std
::
for_each
(
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
(),
[
&
local_group_memory_size
,
&
var_nodes
](
const
std
::
pair
<
std
::
string
,
std
::
string
>
&
g_p
)
{
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
g_p
.
second
);
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
size_t
size
=
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
local_group_memory_size
+=
size
;
});
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
());
++
j
;
if
(
local_group_memory_size
>=
group_memory_size
)
{
break
;
}
}
}
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
VLOG
(
10
)
<<
string
::
Sprintf
(
"SetGroupAccordingToMemorySize(memory_size: %d):"
,
FLAGS_fuse_parameter_memory_size
);
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
g_p
:
group_grads_params
->
at
(
i
))
{
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
g_p
.
second
);
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
size_t
size
=
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
out
<<
string
::
Sprintf
(
"(%s(%d), %s)"
,
g_p
.
second
,
size
,
g_p
.
first
);
std
::
for_each
(
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
(),
[
&
local_group_memory_size
,
&
var_nodes
](
const
std
::
pair
<
std
::
string
,
std
::
string
>
&
g_p
)
{
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
g_p
.
second
);
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
size_t
size
=
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
local_group_memory_size
+=
size
;
});
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
());
++
j
;
if
(
local_group_memory_size
>=
group_memory_size
)
{
break
;
}
VLOG
(
10
)
<<
out
.
str
();
}
}
void
SetGroupAccordingToGroupSize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
{
if
(
FLAGS_fuse_parameter_groups_size
==
1
)
{
return
;
}
size_t
group_size
=
static_cast
<
size_t
>
(
FLAGS_fuse_parameter_groups_size
);
if
(
FLAGS_fuse_parameter_groups_size
==
-
1
)
{
group_size
=
group_grads_params
->
size
();
}
PADDLE_ENFORCE_GT
(
group_size
,
1
);
size_t
groups
=
(
group_grads_params
->
size
()
+
group_size
-
1
)
/
group_size
;
GroupGradsAndParams
local_group_grads_params
;
local_group_grads_params
.
reserve
(
groups
);
size_t
j
=
0
;
for
(
size_t
i
=
0
;
i
<
groups
;
++
i
)
{
local_group_grads_params
.
emplace_back
();
auto
&
group_p_g
=
local_group_grads_params
.
back
();
group_p_g
.
reserve
(
group_size
);
while
(
j
<
group_grads_params
->
size
())
{
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
());
++
j
;
if
(
j
%
group_size
==
0
)
break
;
}
}
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
VLOG
(
10
)
<<
"SetGroupAccordingToGroupSize(group_size: "
<<
group_size
<<
"): "
;
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
}
VLOG
(
10
)
<<
out
.
str
();
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
VLOG
(
10
)
<<
string
::
Sprintf
(
"SetGroupAccordingToMemorySize(memory_size: %d):"
,
group_memory_size
);
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
g_p
:
group_grads_params
->
at
(
i
))
{
auto
iter
=
var_nodes
.
find
(
g_p
.
second
);
PADDLE_ENFORCE
(
iter
!=
var_nodes
.
end
(),
"%s is not found."
,
g_p
.
second
);
auto
shape
=
iter
->
second
->
Var
()
->
GetShape
();
size_t
size
=
framework
::
SizeOfType
(
iter
->
second
->
Var
()
->
GetDataType
());
std
::
for_each
(
shape
.
begin
(),
shape
.
end
(),
[
&
size
](
const
int64_t
&
n
)
{
size
*=
n
;
});
out
<<
string
::
Sprintf
(
"(%s(%d), %s)"
,
g_p
.
second
,
size
,
g_p
.
first
);
}
VLOG
(
10
)
<<
out
.
str
();
}
}
private:
bool
IsSupportedVarType
(
const
proto
::
VarType
::
Type
&
type
)
const
{
// Current only support LOD_TENSOR.
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
void
AllocContinuousSpaceForGradPass
::
SetGroupAccordingToGroupSize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
{
if
(
GetFuseParameterGroupsSize
()
==
1
)
{
return
;
}
void
RecordParamsAndGrads
(
ir
::
Node
*
node
,
ParamsAndGrads
*
params_grads
)
const
{
try
{
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
return
;
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
node
->
Op
()
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
static_cast
<
size_t
>
(
0
));
for
(
size_t
i
=
0
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
VLOG
(
10
)
<<
"Trainable parameter: "
<<
backward_vars
[
i
]
<<
", gradient: "
<<
backward_vars
[
i
+
1
];
params_grads
->
emplace_back
(
std
::
make_pair
(
backward_vars
[
i
]
/*param*/
,
backward_vars
[
i
+
1
]
/*grad*/
));
}
}
catch
(
boost
::
bad_get
e
)
{
const
int
group_size
=
GetFuseParameterGroupsSize
()
==
-
1
?
static_cast
<
int
>
(
group_grads_params
->
size
())
:
GetFuseParameterGroupsSize
();
PADDLE_ENFORCE_GT
(
group_size
,
1
);
size_t
groups
=
(
group_grads_params
->
size
()
+
group_size
-
1
)
/
group_size
;
GroupGradsAndParams
local_group_grads_params
;
local_group_grads_params
.
reserve
(
groups
);
size_t
j
=
0
;
for
(
size_t
i
=
0
;
i
<
groups
;
++
i
)
{
local_group_grads_params
.
emplace_back
();
auto
&
group_p_g
=
local_group_grads_params
.
back
();
group_p_g
.
reserve
(
group_size
);
while
(
j
<
group_grads_params
->
size
())
{
group_p_g
.
insert
(
group_p_g
.
end
(),
group_grads_params
->
at
(
j
).
begin
(),
group_grads_params
->
at
(
j
).
end
());
++
j
;
if
(
j
%
group_size
==
0
)
break
;
}
}
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
vars
,
const
std
::
string
&
fused_var_name
,
const
ParamsAndGrads
&
params_grads
)
const
{
// Init Gradients and FusedVars
VLOG
(
10
)
<<
"Init FusedVars and Gradients."
;
for
(
auto
it
=
local_scopes
.
rbegin
();
it
!=
local_scopes
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
"%s has existed in scope."
,
fused_var_name
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
for
(
auto
&
p_g
:
params_grads
)
{
auto
iter
=
vars
.
find
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
());
PADDLE_ENFORCE_NOT_NULL
(
iter
->
second
->
Var
());
PADDLE_ENFORCE_EQ
(
iter
->
second
->
Var
()
->
GetType
(),
proto
::
VarType
::
LOD_TENSOR
);
scope
->
Var
(
p_g
.
second
)
->
GetMutable
<
LoDTensor
>
();
}
std
::
swap
(
*
group_grads_params
,
local_group_grads_params
);
VLOG
(
10
)
<<
string
::
Sprintf
(
"SetGroupAccordingToGroupSize(group_size: %d):"
,
group_size
);
for
(
size_t
i
=
0
;
i
<
group_grads_params
->
size
();
++
i
)
{
VLOG
(
10
)
<<
"group "
<<
i
;
std
::
stringstream
out
;
for
(
auto
&
p_g
:
group_grads_params
->
at
(
i
))
{
out
<<
"("
<<
p_g
.
second
<<
", "
<<
p_g
.
first
<<
"), "
;
}
VLOG
(
10
)
<<
out
.
str
();
}
}
bool
AllocContinuousSpaceForGradPass
::
IsSupportedVarType
(
const
proto
::
VarType
::
Type
&
type
)
const
{
// Current only support LOD_TENSOR.
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
}
void
AllocContinuousSpaceForGradPass
::
RecordParamsAndGrads
(
ir
::
Node
*
node
,
ParamsAndGrads
*
params_grads
)
const
{
try
{
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
return
;
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
node
->
Op
()
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
static_cast
<
size_t
>
(
0
));
for
(
size_t
i
=
0
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
VLOG
(
10
)
<<
"Trainable parameter: "
<<
backward_vars
[
i
]
<<
", gradient: "
<<
backward_vars
[
i
+
1
];
params_grads
->
emplace_back
(
std
::
make_pair
(
backward_vars
[
i
]
/*param*/
,
backward_vars
[
i
+
1
]
/*grad*/
));
}
}
catch
(
boost
::
bad_get
e
)
{
}
}
void
AllocContinuousSpaceForGradPass
::
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
vars
,
const
std
::
string
&
fused_var_name
,
const
ParamsAndGrads
&
params_grads
)
const
{
// Init Gradients and FusedVars
VLOG
(
10
)
<<
"Init FusedVars and Gradients."
;
for
(
auto
it
=
local_scopes
.
rbegin
();
it
!=
local_scopes
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
"%s has existed in scope."
,
fused_var_name
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
// Alloc continuous space for vars.
std
::
vector
<
std
::
string
>
grads_name
;
std
::
vector
<
std
::
string
>
params_name
;
grads_name
.
reserve
(
params_grads
.
size
());
params_name
.
reserve
(
params_grads
.
size
());
for
(
auto
&
p_g
:
params_grads
)
{
params_name
.
emplace_back
(
p_g
.
first
);
grads_name
.
emplace_back
(
p_g
.
second
);
}
framework
::
ProgramDesc
program_desc
;
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
program_desc
.
MutableBlock
(
0
));
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
}
auto
iter
=
vars
.
find
(
p_g
.
second
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
());
PADDLE_ENFORCE_NOT_NULL
(
iter
->
second
->
Var
());
PADDLE_ENFORCE_EQ
(
iter
->
second
->
Var
()
->
GetType
(),
proto
::
VarType
::
LOD_TENSOR
);
scope
->
Var
(
p_g
.
second
)
->
GetMutable
<
LoDTensor
>
();
}
}
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
// Alloc continuous space for vars.
std
::
vector
<
std
::
string
>
grads_name
;
std
::
vector
<
std
::
string
>
params_name
;
grads_name
.
reserve
(
params_grads
.
size
());
params_name
.
reserve
(
params_grads
.
size
());
for
(
auto
&
p_g
:
params_grads
)
{
params_name
.
emplace_back
(
p_g
.
first
);
grads_name
.
emplace_back
(
p_g
.
second
);
}
framework
::
ProgramDesc
program_desc
;
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
program_desc
.
MutableBlock
(
0
));
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
program_desc
.
Block
(
0
).
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
}
}
};
}
void
AllocContinuousSpaceForGradPass
::
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
}
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h
0 → 100644
浏览文件 @
55b15db5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
void
SetFuseParameterGroupsSize
(
int
group_size
);
int
GetFuseParameterGroupsSize
();
void
SetFuseParameterMemorySize
(
uint64_t
memory_size
);
uint64_t
GetFuseParameterMemorySize
();
class
AllocContinuousSpaceForGradPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
template
<
typename
AttrType
>
void
ResetAttribute
(
const
std
::
string
&
attr_name
,
ir
::
Graph
*
graph
)
const
;
void
SetGroupGradsAndParams
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
;
void
SetGroupAccordingToLayers
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
const
ParamsAndGrads
&
params_grads
,
GroupGradsAndParams
*
group_grads_params
)
const
;
void
SetGroupAccordingToMemorySize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
;
void
SetGroupAccordingToGroupSize
(
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
var_nodes
,
GroupGradsAndParams
*
group_grads_params
)
const
;
private:
bool
IsSupportedVarType
(
const
proto
::
VarType
::
Type
&
type
)
const
;
void
RecordParamsAndGrads
(
ir
::
Node
*
node
,
ParamsAndGrads
*
params_grads
)
const
;
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
&
vars
,
const
std
::
string
&
fused_var_name
,
const
ParamsAndGrads
&
params_grads
)
const
;
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
BlockDesc
*
global_block
)
const
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
55b15db5
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/framework.pb.h"
...
...
@@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) {
// to enable eager deletion mode in unittest.
m
.
def
(
"_set_eager_deletion_mode"
,
&
paddle
::
framework
::
SetEagerDeletionMode
);
m
.
def
(
"_set_fuse_parameter_group_size"
,
&
paddle
::
framework
::
details
::
SetFuseParameterGroupsSize
);
m
.
def
(
"_set_fuse_parameter_memory_size"
,
&
paddle
::
framework
::
details
::
SetFuseParameterMemorySize
);
m
.
add_object
(
"_cleanup"
,
py
::
capsule
([]()
{
ScopePool
::
Instance
().
Clear
();
}));
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py
浏览文件 @
55b15db5
...
...
@@ -14,10 +14,11 @@
from
__future__
import
print_function
import
os
os
.
environ
[
'FLAGS_fuse_parameter_memory_size'
]
=
"131072"
os
.
environ
[
'FLAGS_fuse_parameter_groups_size'
]
=
"3"
import
paddle.fluid
as
fluid
fluid
.
core
.
_set_fuse_parameter_group_size
(
3
)
fluid
.
core
.
_set_fuse_parameter_memory_size
(
131072
)
import
paddle.fluid.layers.ops
as
ops
from
paddle.fluid.initializer
import
init_on_cpu
from
paddle.fluid.layers.learning_rate_scheduler
import
_decay_step_counter
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录