Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
101a2b61
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
101a2b61
编写于
9月 26, 2019
作者:
C
chengduo
提交者:
gongweibao
9月 26, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add dtype for coalesce_tensor_op (#20016)
Add dtype for coalesce_tensor_op
上级
f04f2b23
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
81 addition
and
44 deletion
+81
-44
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
+8
-2
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
...work/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
+42
-17
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
...ework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
+11
-2
paddle/fluid/operators/coalesce_tensor_op.cc
paddle/fluid/operators/coalesce_tensor_op.cc
+6
-19
python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
...n/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
+14
-4
未找到文件。
paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc
浏览文件 @
101a2b61
...
...
@@ -276,7 +276,7 @@ class CoalesceGradTensorPass : public ir::Pass {
}
auto
dtype
=
GetDtypeOfVar
(
vars_info
,
group_params_grads
->
at
(
i
).
front
().
first
);
GetDtypeOfVar
(
vars_info
,
group_params_grads
->
at
(
i
).
front
().
second
);
VLOG
(
10
)
<<
out
.
str
()
<<
", group size:"
<<
group_params_grads
->
at
(
i
).
size
()
<<
", group memory size:"
<<
static_cast
<
double
>
(
gps_size
)
/
kMB
...
...
@@ -465,28 +465,34 @@ class CoalesceGradTensorPass : public ir::Pass {
std
::
vector
<
std
::
string
>
params_name
;
grads_name
.
reserve
(
params_grads
.
size
());
params_name
.
reserve
(
params_grads
.
size
());
auto
dtype
=
GetDtypeOfVar
(
vars_info
,
params_grads
.
front
().
second
);
for
(
auto
&
p_g
:
params_grads
)
{
params_name
.
emplace_back
(
p_g
.
first
);
grads_name
.
emplace_back
(
p_g
.
second
);
auto
next_dtype
=
GetDtypeOfVar
(
vars_info
,
p_g
.
second
);
PADDLE_ENFORCE_EQ
(
next_dtype
,
dtype
);
}
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
emplace_back
();
ProgramDesc
&
program_desc
=
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
back
();
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
AppendAllocSpaceForVarsOp
(
params_name
,
grads_name
,
fused_var_name
,
dtype
,
global_block
);
}
void
AppendAllocSpaceForVarsOp
(
const
std
::
vector
<
std
::
string
>
&
params_name
,
const
std
::
vector
<
std
::
string
>
&
grads_name
,
const
std
::
string
&
fused_var_name
,
const
proto
::
VarType
::
Type
&
dtype
,
BlockDesc
*
global_block
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"coalesce_tensor"
);
op_desc
->
SetInput
(
"Input"
,
params_name
);
op_desc
->
SetOutput
(
"Output"
,
grads_name
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_var_name
});
op_desc
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
dtype
));
}
};
}
// namespace ir
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
浏览文件 @
101a2b61
...
...
@@ -162,17 +162,25 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
}
}
// Check dtype
auto
dtype
=
GetDtypeOfVar
(
vars_info
,
aux_var_set
.
at
(
kParam
).
front
());
for
(
auto
vars
:
aux_var_set
)
{
for
(
auto
&
var_name
:
vars
.
second
)
{
PADDLE_ENFORCE_EQ
(
dtype
,
GetDtypeOfVar
(
vars_info
,
var_name
));
}
}
// Step 4: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops
// separately.
if
(
!
grad_fused
)
{
InitFusedGradsAndAllocSpaceForGrads
(
aux_var_set
.
at
(
kParam
),
aux_var_set
.
at
(
kGrad
),
fused_vars_name
.
at
(
kGrad
)
,
&
result
);
InitFusedGradsAndAllocSpaceForGrads
(
aux_var_set
.
at
(
kParam
),
aux_var_set
.
at
(
kGrad
),
fused_vars_name
.
at
(
kGrad
),
dtype
,
&
result
);
}
aux_var_names
.
pop_back
();
InitFusedVarsAndAllocSpaceForVars
(
aux_var_names
,
aux_var_set
,
fused_vars_name
,
&
result
);
dtype
,
&
result
);
// Step 5: Fuse optimizer Ops and Scale Ops
auto
*
fused_opt_node
=
...
...
@@ -252,7 +260,7 @@ void FuseOptimizerOpPass::GradientsFilter(
void
FuseOptimizerOpPass
::
InitFusedGradsAndAllocSpaceForGrads
(
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
vector
<
std
::
string
>
&
grads
,
const
std
::
string
&
fused_grad_name
,
ir
::
Graph
*
result
)
const
{
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
{
auto
&
pinned_var_set
=
result
->
GetOrInit
<
details
::
PinnedVars
>
(
details
::
kPinnedVars
);
...
...
@@ -279,8 +287,8 @@ void FuseOptimizerOpPass::InitFusedGradsAndAllocSpaceForGrads(
ProgramDesc
&
program_desc
=
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
back
();
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
AppendAllocContinuousSpace
(
params
,
grads
,
fused_grad_name
,
global_block
,
false
,
false
);
AppendAllocContinuousSpace
(
params
,
grads
,
fused_grad_name
,
dtype
,
global_block
,
false
,
false
);
}
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
...
...
@@ -302,15 +310,30 @@ bool FuseOptimizerOpPass::IsLoDTensorType(
return
type
==
proto
::
VarType
::
LOD_TENSOR
;
}
const
VarDesc
*
FuseOptimizerOpPass
::
GetVarDescFromVarsInfo
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
vars_info
,
const
std
::
string
&
var_name
)
const
{
auto
grad_iter
=
vars_info
.
find
(
var_name
);
PADDLE_ENFORCE_EQ
(
grad_iter
!=
vars_info
.
end
(),
true
,
"%s is not found."
,
var_name
);
PADDLE_ENFORCE_EQ
(
!
grad_iter
->
second
.
empty
(),
true
,
"%s is not found."
,
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
grad_iter
->
second
.
front
()
->
Var
());
return
grad_iter
->
second
.
front
()
->
Var
();
}
proto
::
VarType
::
Type
FuseOptimizerOpPass
::
GetDtypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
&
vars_info
,
const
std
::
string
&
name
)
const
{
auto
var_desc
=
GetVarDescFromVarsInfo
(
vars_info
,
name
);
return
var_desc
->
GetDataType
();
}
proto
::
VarType
::
Type
FuseOptimizerOpPass
::
GetTypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
var
_nodes
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
var
s_info
,
const
std
::
string
&
name
)
const
{
auto
grad_iter
=
var_nodes
.
find
(
name
);
PADDLE_ENFORCE_EQ
(
grad_iter
!=
var_nodes
.
end
(),
true
,
"%s is not found."
,
name
);
PADDLE_ENFORCE_GT
(
grad_iter
->
second
.
size
(),
0
);
PADDLE_ENFORCE_NOT_NULL
(
grad_iter
->
second
.
front
()
->
Var
());
return
grad_iter
->
second
.
front
()
->
Var
()
->
GetType
();
auto
var_desc
=
GetVarDescFromVarsInfo
(
vars_info
,
name
);
return
var_desc
->
GetType
();
}
void
FuseOptimizerOpPass
::
InitFusedVarsAndAllocSpaceForVars
(
...
...
@@ -318,7 +341,7 @@ void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
ir
::
Graph
*
result
)
const
{
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
{
// Define Ops
result
->
Get
<
details
::
ProgramDescs
>
(
details
::
kProgramDescs
).
emplace_back
();
ProgramDesc
&
program_desc
=
...
...
@@ -327,7 +350,7 @@ void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
for
(
auto
&
var_name
:
aux_var_names
)
{
AppendAllocContinuousSpace
(
aux_var_set
.
at
(
var_name
),
aux_var_set
.
at
(
var_name
),
fused_vars_name
.
at
(
var_name
),
global_block
,
true
);
fused_vars_name
.
at
(
var_name
),
dtype
,
global_block
,
true
);
}
}
...
...
@@ -393,7 +416,8 @@ void FuseOptimizerOpPass::GetSpecifiedOpsAndVars(
void
FuseOptimizerOpPass
::
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
in_args
,
const
std
::
vector
<
std
::
string
>
&
out_args
,
const
std
::
string
&
fused_out_arg
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
)
const
{
const
proto
::
VarType
::
Type
&
dtype
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"coalesce_tensor"
);
op_desc
->
SetInput
(
"Input"
,
in_args
);
...
...
@@ -401,6 +425,7 @@ void FuseOptimizerOpPass::AppendAllocContinuousSpace(
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_out_arg
});
op_desc
->
SetAttr
(
"copy_data"
,
copy_data
);
op_desc
->
SetAttr
(
"check_name"
,
check_name
);
op_desc
->
SetAttr
(
"dtype"
,
static_cast
<
int
>
(
dtype
));
}
void
FuseOptimizerOpPass
::
InsertInputAndOutputForFusedOpNode
(
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
浏览文件 @
101a2b61
...
...
@@ -64,28 +64,37 @@ class FuseOptimizerOpPass : public ir::Pass {
void
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
in_args
,
const
std
::
vector
<
std
::
string
>
&
out_args
,
const
std
::
string
&
fused_out_arg
,
const
proto
::
VarType
::
Type
&
dtype
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
=
true
)
const
;
void
InitFusedGradsAndAllocSpaceForGrads
(
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
vector
<
std
::
string
>
&
grads
,
const
std
::
string
&
fused_grad_name
,
ir
::
Graph
*
result
)
const
;
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
;
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
std
::
string
>
&
aux_var_names
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
ir
::
Graph
*
result
)
const
;
const
proto
::
VarType
::
Type
&
dtype
,
ir
::
Graph
*
result
)
const
;
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
GetVarInfo
(
const
Graph
&
result
)
const
;
proto
::
VarType
::
Type
GetDtypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
&
vars_info
,
const
std
::
string
&
name
)
const
;
proto
::
VarType
::
Type
GetTypeOfVar
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
var_nodes
,
const
std
::
string
&
name
)
const
;
const
VarDesc
*
GetVarDescFromVarsInfo
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
Node
*>>
&
vars_info
,
const
std
::
string
&
var_name
)
const
;
void
GradientsFilter
(
const
std
::
vector
<
size_t
>
&
new_grad_idx
,
std
::
vector
<
Node
*>
*
opt_nodes
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
...
...
paddle/fluid/operators/coalesce_tensor_op.cc
浏览文件 @
101a2b61
...
...
@@ -23,9 +23,6 @@
namespace
paddle
{
namespace
operators
{
static
framework
::
proto
::
VarType
::
Type
kDefaultDtype
=
framework
::
proto
::
VarType
::
Type
::
VarType_Type_BOOL
;
template
<
typename
DeviceContext
,
typename
T
>
class
CoalesceTensorOp
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -66,8 +63,10 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
// Get numel and dtype
size_t
numel
=
0
;
auto
dtype
=
kDefaultDtype
;
GetMemSizeAndDtype
(
in_tensors
,
in_var_names
,
&
numel
,
&
dtype
,
auto
dtype
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
context
.
Attr
<
int
>
(
"dtype"
));
size_t
size_of_dtype
=
framework
::
SizeOfType
(
dtype
);
GetMemSizeAndDtype
(
in_tensors
,
in_var_names
,
&
numel
,
size_of_dtype
,
context
.
GetPlace
());
// Alloc the continuous space
...
...
@@ -78,7 +77,6 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
// Init the continuous space
auto
out_tensors
=
context
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Output"
);
size_t
offset
=
0
;
size_t
size_of_dtype
=
framework
::
SizeOfType
(
dtype
);
if
(
context
.
Attr
<
bool
>
(
"copy_data"
))
{
for
(
size_t
i
=
0
;
i
<
in_var_names
.
size
();
++
i
)
{
size_t
len
=
static_cast
<
size_t
>
(
in_tensors
[
i
]
->
numel
());
...
...
@@ -120,27 +118,15 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
void
GetMemSizeAndDtype
(
const
std
::
vector
<
const
framework
::
LoDTensor
*>
&
lod_tensors
,
const
std
::
vector
<
std
::
string
>
var_names
,
size_t
*
numel
,
framework
::
proto
::
VarType
::
Type
*
dtype
,
const
platform
::
Place
&
place
)
const
{
const
size_t
&
size_of_dtype
,
const
platform
::
Place
&
place
)
const
{
PADDLE_ENFORCE_EQ
(
lod_tensors
.
size
(),
var_names
.
size
());
*
numel
=
0
;
size_t
size_of_dtype
=
0
;
std
::
stringstream
ss
;
ss
<<
"alloc_space_for_vars: "
;
for
(
size_t
i
=
0
;
i
<
var_names
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
lod_tensors
[
i
]
->
IsInitialized
(),
"%s is not initialized."
,
var_names
[
i
]);
auto
p_dtype
=
lod_tensors
[
i
]
->
type
();
if
(
*
dtype
==
kDefaultDtype
)
{
PADDLE_ENFORCE_NE
(
p_dtype
,
kDefaultDtype
,
"%s's type should not be %s."
,
var_names
[
i
],
kDefaultDtype
);
*
dtype
=
p_dtype
;
size_of_dtype
=
framework
::
SizeOfType
(
p_dtype
);
}
PADDLE_ENFORCE_EQ
(
p_dtype
,
*
dtype
,
"Input vars is not equal."
);
auto
size
=
lod_tensors
[
i
]
->
numel
();
PADDLE_ENFORCE_GT
(
size
,
0
);
ss
<<
"input("
<<
var_names
[
i
]
<<
") dim:("
<<
lod_tensors
[
i
]
->
dims
()
...
...
@@ -178,6 +164,7 @@ class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) The output tensor "
"of coalesce_tensor operator. And the tensors of"
" Output is sliced from the tensor of FusedOutput."
);
AddAttr
<
int
>
(
"dtype"
,
"The output data type."
);
AddAttr
<
bool
>
(
"copy_data"
,
"Whether to copy the Input value to Output."
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"set_constant"
,
...
...
python/paddle/fluid/tests/unittests/test_coalesce_tensor_op.py
浏览文件 @
101a2b61
...
...
@@ -25,7 +25,7 @@ alignment = 256
class
TestAllocContinuousSpace
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"coalesce_tensor"
self
.
dtype
=
np
.
float32
self
.
dtype
,
self
.
fluid_dtype
=
self
.
init_dtype
()
attrs
=
self
.
init_attr
()
self
.
copy_data
=
attrs
[
"copy_data"
]
self
.
constant
=
attrs
[
"constant"
]
...
...
@@ -38,7 +38,7 @@ class TestAllocContinuousSpace(OpTest):
self
.
outputs
=
{
'Output'
:
self
.
Outputs
,
'FusedOutput'
:
self
.
FusedOutput
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
return
np
.
float32
,
int
(
core
.
VarDesc
.
VarType
.
FP32
)
def
init_input
(
self
):
inputs
=
[]
...
...
@@ -51,7 +51,12 @@ class TestAllocContinuousSpace(OpTest):
return
inputs
def
init_attr
(
self
):
return
{
"copy_data"
:
True
,
"set_constant"
:
False
,
"constant"
:
0.0
}
return
{
"copy_data"
:
True
,
"set_constant"
:
False
,
"constant"
:
0.0
,
"dtype"
:
self
.
fluid_dtype
}
def
init_output
(
self
,
input_list
,
set_constant
,
constant
):
inputs
=
[]
...
...
@@ -82,7 +87,12 @@ class TestAllocContinuousSpace(OpTest):
class
TestAllocContinuousSpace2
(
TestAllocContinuousSpace
):
def
init_attr
(
self
):
return
{
"copy_data"
:
False
,
"set_constant"
:
True
,
"constant"
:
0.5
}
return
{
"copy_data"
:
False
,
"set_constant"
:
True
,
"constant"
:
0.5
,
"dtype"
:
self
.
fluid_dtype
}
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录