Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
101a2b61
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
101a2b61
编写于
9月 26, 2019
作者:
C
chengduo
提交者:
gongweibao
9月 26, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add dtype for coalesce_tensor_op (#20016)
Add dtype for coalesce_tensor_op
上级
f04f2b23
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
81 addition
and
44 deletion
+81
-44
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
+8
-2
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
...work/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
+42
-17
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
...ework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
+11
-2
paddle/fluid/operators/coalesce_tensor_op.cc
paddle/fluid/operators/coalesce_tensor_op.cc
+6
-19
python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
...n/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
+14
-4
未找到文件。
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
浏览文件 @
101a2b61
...
...
@@ -276,7 +276,7 @@ class CoalesceGradTensorPass : public ir::Pass {
}
auto
dtype
=
GetDtypeOfVar
(
vars_info
,
group_params_grads
->
at
(
i
).
front
().
first
);
GetDtypeOfVar
(
vars_info
,
group_params_grads
->
at
(
i
).
front
().
second
);
VLOG
(
10
)
<<
out
.
str
()
<<
", group size:"
<<
group_params_grads
->
at
(
i
).
size
()
<<
", group memory size:"
<<
static_cast
<
double
>
(
gps_size
)
/
kMB
...
...
@@ -465,28 +465,34 @@ class CoalesceGradTensorPass : public ir::Pass {
std
::
vector
<
std
::
string
>
params_name
;
grads_name
.
reserve
(
params_grads
.
size
());
params_name
.
reserve
(
params_grads
.
size
());
auto
dtype
=
GetDtypeOfVar
(
vars_info
,
params_grads
.
front
().
second
);
for
(
auto
&
p_g
:
params_grads
)
{
params_name
.
emplace_back
(
p_g
.
first
);
grads_name
.
emplace_back
(
p_g
.
second
);
auto
next_dtype
=
GetDtypeOfVar
(
vars_info
,
p_g
.
second
);
PADDLE_ENFORCE_EQ
(
next_dtype
,
dtype
);
}
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
emplace_back
();
ProgramDesc
&
program_desc
=
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
back
();
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
dtype
,
global_block
);
}
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
const
proto
::
VarType
::
Type
&
dtype
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"coalesce_tensor"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
op_desc
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
dtype
));
}
};
}
// namespace ir
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
浏览文件 @
101a2b61
...
...
@@ -162,17 +162,25 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
}
}
// Check dtype
auto
dtype
=
GetDtypeOfVar
(
vars_info
,
aux_var_set
.
at
(
kParam
).
front
());
for
(
auto
vars
:
aux_var_set
)
{
for
(
auto
&
var_name
:
vars
.
second
)
{
PADDLE_ENFORCE_EQ
(
dtype
,
GetDtypeOfVar
(
vars_info
,
var_name
));
}
}
// Step 4: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops
// separately.
if
(
!
grad_fused
)
{
InitFusedGradsAndAllocSpaceForGrads
(
aux_var_set
.
at
(
kParam
),
aux_var_set
.
at
(
kGrad
),
fused_vars_name
.
at
(
kGrad
)
,
&
result
);
InitFusedGradsAndAllocSpaceForGrads
(
aux_var_set
.
at
(
kParam
),
aux_var_set
.
at
(
kGrad
),
fused_vars_name
.
at
(
kGrad
),
dtype
,
&
result
);
}
aux_var_names
.
pop_back
();
InitFusedVarsAndAllocSpaceForVars
(
aux_var_names
,
aux_var_set
,
fused_vars_name
,
&
result
);
dtype
,
&
result
);
// Step 5: Fuse optimizer Ops and Scale Ops
auto
*
fused_opt_node
=
...
...
@@ -252,7 +260,7 @@ void FuseOptimizerOpPass::GradientsFilter(
void
FuseOptimizerOpPass
::
InitFusedGradsAndAllocSpaceForGrads
(
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
vector
<
std
::
string
>
&
grads
,
const
std
::
string
&
fused_grad_name
,
ir
::
Graph
*
result
)
const
{
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
{
auto
&
pinned_var_set
=
result
->
GetOrInit
<
details
::
PinnedVars
>
(
details
::
kPinnedVars
);
...
...
@@ -279,8 +287,8 @@ void FuseOptimizerOpPass::InitFusedGradsAndAllocSpaceForGrads(
ProgramDesc
&
program_desc
=
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
back
();
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
AppendAllocContinuousSpace
(
params
,
grads
,
fused_grad_name
,
global_block
,
false
,
false
);
AppendAllocContinuousSpace
(
params
,
grads
,
fused_grad_name
,
dtype
,
global_block
,
false
,
false
);
}
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
...
...
@@ -302,15 +310,30 @@ bool FuseOptimizerOpPass::IsLoDTensorType(
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
}
const
VarDesc
*
FuseOptimizerOpPass
::
GetVarDescFromVarsInfo
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
vars_info
,
const
std
::
string
&
var_name
)
const
{
auto
grad_iter
=
vars_info
.
find
(
var_name
);
PADDLE_ENFORCE_EQ
(
grad_iter
!=
vars_info
.
end
(),
true
,
"%s is not found."
,
var_name
);
PADDLE_ENFORCE_EQ
(
!
grad_iter
->
second
.
empty
(),
true
,
"%s is not found."
,
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
grad_iter
->
second
.
front
()
->
Var
());
return
grad_iter
->
second
.
front
()
->
Var
();
}
proto
::
VarType
::
Type
FuseOptimizerOpPass
::
GetDtypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
&
vars_info
,
const
std
::
string
&
name
)
const
{
auto
var_desc
=
GetVarDescFromVarsInfo
(
vars_info
,
name
);
return
var_desc
->
GetDataType
();
}
proto
::
VarType
::
Type
FuseOptimizerOpPass
::
GetTypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
var
_nodes
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
var
s_info
,
const
std
::
string
&
name
)
const
{
auto
grad_iter
=
var_nodes
.
find
(
name
);
PADDLE_ENFORCE_EQ
(
grad_iter
!=
var_nodes
.
end
(),
true
,
"%s is not found."
,
name
);
PADDLE_ENFORCE_GT
(
grad_iter
->
second
.
size
(),
0
);
PADDLE_ENFORCE_NOT_NULL
(
grad_iter
->
second
.
front
()
->
Var
());
return
grad_iter
->
second
.
front
()
->
Var
()
->
GetType
();
auto
var_desc
=
GetVarDescFromVarsInfo
(
vars_info
,
name
);
return
var_desc
->
GetType
();
}
void
FuseOptimizerOpPass
::
InitFusedVarsAndAllocSpaceForVars
(
...
...
@@ -318,7 +341,7 @@ void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
ir
::
Graph
*
result
)
const
{
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
{
// Define Ops
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
emplace_back
();
ProgramDesc
&
program_desc
=
...
...
@@ -327,7 +350,7 @@ void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
for
(
auto
&
var_name
:
aux_var_names
)
{
AppendAllocContinuousSpace
(
aux_var_set
.
at
(
var_name
),
aux_var_set
.
at
(
var_name
),
fused_vars_name
.
at
(
var_name
),
global_block
,
true
);
fused_vars_name
.
at
(
var_name
),
dtype
,
global_block
,
true
);
}
}
...
...
@@ -393,7 +416,8 @@ void FuseOptimizerOpPass::GetSpecifiedOpsAndVars(
void
FuseOptimizerOpPass
::
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
in_args
,
const
std
::
vector
<
std
::
string
>
&
out_args
,
const
std
::
string
&
fused_out_arg
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
)
const
{
const
proto
::
VarType
::
Type
&
dtype
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"coalesce_tensor"
);
op_desc
->
SetInput
(
"Input"
,
in_args
);
...
...
@@ -401,6 +425,7 @@ void FuseOptimizerOpPass::AppendAllocContinuousSpace(
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_out_arg
});
op_desc
->
SetAttr
(
"copy_data"
,
copy_data
);
op_desc
->
SetAttr
(
"check_name"
,
check_name
);
op_desc
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
dtype
));
}
void
FuseOptimizerOpPass
::
InsertInputAndOutputForFusedOpNode
(
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
浏览文件 @
101a2b61
...
...
@@ -64,28 +64,37 @@ class FuseOptimizerOpPass : public ir::Pass {
void
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
in_args
,
const
std
::
vector
<
std
::
string
>
&
out_args
,
const
std
::
string
&
fused_out_arg
,
const
proto
::
VarType
::
Type
&
dtype
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
=
true
)
const
;
void
InitFusedGradsAndAllocSpaceForGrads
(
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
vector
<
std
::
string
>
&
grads
,
const
std
::
string
&
fused_grad_name
,
ir
::
Graph
*
result
)
const
;
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
;
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
std
::
string
>
&
aux_var_names
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
ir
::
Graph
*
result
)
const
;
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
GetVarInfo
(
const
Graph
&
result
)
const
;
proto
::
VarType
::
Type
GetDtypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
&
vars_info
,
const
std
::
string
&
name
)
const
;
proto
::
VarType
::
Type
GetTypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
var_nodes
,
const
std
::
string
&
name
)
const
;
const
VarDesc
*
GetVarDescFromVarsInfo
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
vars_info
,
const
std
::
string
&
var_name
)
const
;
void
GradientsFilter
(
const
std
::
vector
<
size_t
>
&
new_grad_idx
,
std
::
vector
<
Node
*>
*
opt_nodes
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
...
...
paddle/fluid/operators/coalesce_tensor_op.cc
浏览文件 @
101a2b61
...
...
@@ -23,9 +23,6 @@
namespace
paddle
{
namespace
operators
{
static
framework
::
proto
::
VarType
::
Type
kDefaultDtype
=
framework
::
proto
::
VarType
::
Type
::
VarType_Type_BOOL
;
template
<
typename
DeviceContext
,
typename
T
>
class
CoalesceTensorOp
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -66,8 +63,10 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
// Get numel and dtype
size_t
numel
=
0
;
auto
dtype
=
kDefaultDtype
;
GetMemSizeAndDtype
(
in_tensors
,
in_var_names
,
&
numel
,
&
dtype
,
auto
dtype
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
context
.
Attr
<
int
>
(
"dtype"
));
size_t
size_of_dtype
=
framework
::
SizeOfType
(
dtype
);
GetMemSizeAndDtype
(
in_tensors
,
in_var_names
,
&
numel
,
size_of_dtype
,
context
.
GetPlace
());
// Alloc the continuous space
...
...
@@ -78,7 +77,6 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
// Init the continuous space
auto
out_tensors
=
context
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Output"
);
size_t
offset
=
0
;
size_t
size_of_dtype
=
framework
::
SizeOfType
(
dtype
);
if
(
context
.
Attr
<
bool
>
(
"copy_data"
))
{
for
(
size_t
i
=
0
;
i
<
in_var_names
.
size
();
++
i
)
{
size_t
len
=
static_cast
<
size_t
>
(
in_tensors
[
i
]
->
numel
());
...
...
@@ -120,27 +118,15 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
void
GetMemSizeAndDtype
(
const
std
::
vector
<
const
framework
::
LoDTensor
*>
&
lod_tensors
,
const
std
::
vector
<
std
::
string
>
var_names
,
size_t
*
numel
,
framework
::
proto
::
VarType
::
Type
*
dtype
,
const
platform
::
Place
&
place
)
const
{
const
size_t
&
size_of_dtype
,
const
platform
::
Place
&
place
)
const
{
PADDLE_ENFORCE_EQ
(
lod_tensors
.
size
(),
var_names
.
size
());
*
numel
=
0
;
size_t
size_of_dtype
=
0
;
std
::
stringstream
ss
;
ss
<<
"alloc_space_for_vars: "
;
for
(
size_t
i
=
0
;
i
<
var_names
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
lod_tensors
[
i
]
->
IsInitialized
(),
"%s is not initialized."
,
var_names
[
i
]);
auto
p_dtype
=
lod_tensors
[
i
]
->
type
();
if
(
*
dtype
==
kDefaultDtype
)
{
PADDLE_ENFORCE_NE
(
p_dtype
,
kDefaultDtype
,
"%s's type should not be %s."
,
var_names
[
i
],
kDefaultDtype
);
*
dtype
=
p_dtype
;
size_of_dtype
=
framework
::
SizeOfType
(
p_dtype
);
}
PADDLE_ENFORCE_EQ
(
p_dtype
,
*
dtype
,
"Input vars is not equal."
);
auto
size
=
lod_tensors
[
i
]
->
numel
();
PADDLE_ENFORCE_GT
(
size
,
0
);
ss
<<
"input("
<<
var_names
[
i
]
<<
") dim:("
<<
lod_tensors
[
i
]
->
dims
()
...
...
@@ -178,6 +164,7 @@ class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) The output tensor "
"of coalesce_tensor operator. And the tensors of"
" Output is sliced from the tensor of FusedOutput."
);
AddAttr
<
int
>
(
"dtype"
,
"The output data type."
);
AddAttr
<
bool
>
(
"copy_data"
,
"Whether to copy the Input value to Output."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"set_constant"
,
...
...
python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
浏览文件 @
101a2b61
...
...
@@ -25,7 +25,7 @@ alignment = 256
class
TestAllocContinuousSpace
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"coalesce_tensor"
self
.
dtype
=
np
.
float32
self
.
dtype
,
self
.
fluid_dtype
=
self
.
init_dtype
()
attrs
=
self
.
init_attr
()
self
.
copy_data
=
attrs
[
"copy_data"
]
self
.
constant
=
attrs
[
"constant"
]
...
...
@@ -38,7 +38,7 @@ class TestAllocContinuousSpace(OpTest):
self
.
outputs
=
{
'Output'
:
self
.
Outputs
,
'FusedOutput'
:
self
.
FusedOutput
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
return
np
.
float32
,
int
(
core
.
VarDesc
.
VarType
.
FP32
)
def
init_input
(
self
):
inputs
=
[]
...
...
@@ -51,7 +51,12 @@ class TestAllocContinuousSpace(OpTest):
return
inputs
def
init_attr
(
self
):
return
{
"copy_data"
:
True
,
"set_constant"
:
False
,
"constant"
:
0.0
}
return
{
"copy_data"
:
True
,
"set_constant"
:
False
,
"constant"
:
0.0
,
"dtype"
:
self
.
fluid_dtype
}
def
init_output
(
self
,
input_list
,
set_constant
,
constant
):
inputs
=
[]
...
...
@@ -82,7 +87,12 @@ class TestAllocContinuousSpace(OpTest):
class
TestAllocContinuousSpace2
(
TestAllocContinuousSpace
):
def
init_attr
(
self
):
return
{
"copy_data"
:
False
,
"set_constant"
:
True
,
"constant"
:
0.5
}
return
{
"copy_data"
:
False
,
"set_constant"
:
True
,
"constant"
:
0.5
,
"dtype"
:
self
.
fluid_dtype
}
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录