Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1a0fc5d8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1a0fc5d8
编写于
12月 21, 2017
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add the simple support of no_grad_set
上级
278ac7be
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
51 addition
and
23 deletion
+51
-23
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+2
-1
python/paddle/v2/fluid/backward.py
python/paddle/v2/fluid/backward.py
+49
-22
未找到文件。
paddle/pybind/pybind.cc
浏览文件 @
1a0fc5d8
...
@@ -314,7 +314,8 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -314,7 +314,8 @@ All parameter, weight, gradient are variables in Paddle.
InferenceOptimize
(
*
(
origin
.
Proto
()),
&
pruned_desc
);
InferenceOptimize
(
*
(
origin
.
Proto
()),
&
pruned_desc
);
return
new
ProgramDescBind
(
pruned_desc
);
return
new
ProgramDescBind
(
pruned_desc
);
});
});
m
.
def
(
"get_empty_var_name"
,
[]()
{
return
framework
::
kEmptyVarName
;
});
m
.
def
(
"empty_var_name"
,
[]()
{
return
framework
::
kEmptyVarName
;
});
m
.
def
(
"grad_var_suffix"
,
[]()
{
return
framework
::
kGradVarSuffix
;
});
m
.
def_submodule
(
m
.
def_submodule
(
"var_names"
,
"var_names"
,
"The module will return special predefined variable name in Paddle"
)
"The module will return special predefined variable name in Paddle"
)
...
...
python/paddle/v2/fluid/backward.py
浏览文件 @
1a0fc5d8
...
@@ -32,12 +32,27 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):
...
@@ -32,12 +32,27 @@ def _create_op_desc_(op_type, inputs, outputs, attrs):
return
op_desc
return
op_desc
def
backward_impl
(
target
,
def
_is_all_in_set_
(
cands
,
s
):
block
,
for
c
in
cands
:
target_block
,
if
not
c
in
s
:
no_grad_set
,
return
False
grad_info_map
,
return
True
callback
=
None
):
def
_strip_grad_suffix_
(
name
):
return
name
[:
name
.
find
(
core
.
grad_var_suffix
())]
def
_append_grad_suffix_
(
name
):
return
name
+
core
.
grad_var_suffix
()
def
_backward_impl_
(
target
,
block
,
target_block
,
no_grad_set
,
grad_info_map
,
callback
=
None
):
grad_op_descs
=
[]
grad_op_descs
=
[]
grad_to_var
=
dict
()
grad_to_var
=
dict
()
program
=
block
.
program
program
=
block
.
program
...
@@ -47,8 +62,8 @@ def backward_impl(target,
...
@@ -47,8 +62,8 @@ def backward_impl(target,
sub_block_idx
=
each_op
.
block_attr
(
"sub_block"
)
sub_block_idx
=
each_op
.
block_attr
(
"sub_block"
)
sub_block
=
program
.
block
(
sub_block_idx
)
sub_block
=
program
.
block
(
sub_block_idx
)
grad_sub_block
=
program
.
create_block
(
parent_idx
=
sub_block_idx
)
grad_sub_block
=
program
.
create_block
(
parent_idx
=
sub_block_idx
)
backward_impl
(
target
,
sub_block
,
grad_sub_block
,
no_grad_set
,
_backward_impl_
(
target
,
sub_block
,
grad_sub_block
,
no_grad_set
,
grad_info_map
,
callback
)
grad_info_map
,
callback
)
grad_sub_block_list
.
append
(
grad_sub_block
)
grad_sub_block_list
.
append
(
grad_sub_block
)
grad_op_desc
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
grad_op_desc
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
each_op
.
desc
,
no_grad_set
[
block
.
idx
],
grad_sub_block_list
)
each_op
.
desc
,
no_grad_set
[
block
.
idx
],
grad_sub_block_list
)
...
@@ -61,14 +76,14 @@ def backward_impl(target,
...
@@ -61,14 +76,14 @@ def backward_impl(target,
pending_sum_ops
=
[]
pending_sum_ops
=
[]
var_rename_count
=
collections
.
defaultdict
(
int
)
var_rename_count
=
collections
.
defaultdict
(
int
)
var_inputs
=
collections
.
defaultdict
(
list
)
var_inputs
=
collections
.
defaultdict
(
list
)
for
pos
,
op_desc
in
enumerate
(
grad_op_descs
):
for
idx
,
op_desc
in
enumerate
(
grad_op_descs
):
for
var_name
in
op_desc
.
input_arg_names
():
for
var_name
in
op_desc
.
input_arg_names
():
if
len
(
var_inputs
[
var_name
])
>
1
:
if
len
(
var_inputs
[
var_name
])
>
1
:
pending_sum_ops
.
append
((
_create_op_desc_
(
pending_sum_ops
.
append
((
_create_op_desc_
(
op_type
=
"sum_op"
,
op_type
=
"sum_op"
,
inputs
=
var_inputs
[
var_name
],
inputs
=
var_inputs
[
var_name
],
outputs
=
[
var_name
],
outputs
=
[
var_name
],
attrs
=
{}),
pos
))
attrs
=
{}),
idx
))
var_inputs
[
var_name
]
=
[
var_name
]
var_inputs
[
var_name
]
=
[
var_name
]
for
var_name
in
op_desc
.
output_arg_names
():
for
var_name
in
op_desc
.
output_arg_names
():
if
len
(
var_inputs
[
var_name
])
==
0
:
if
len
(
var_inputs
[
var_name
])
==
0
:
...
@@ -81,7 +96,7 @@ def backward_impl(target,
...
@@ -81,7 +96,7 @@ def backward_impl(target,
var_rename_count
[
var_name
]
=
var_rename_count
[
var_name
]
+
1
var_rename_count
[
var_name
]
=
var_rename_count
[
var_name
]
+
1
# rename original var_name
# rename original var_name
var_inputs
[
var_name
][
0
]
=
new_name
var_inputs
[
var_name
][
0
]
=
new_name
_rename_arg_
(
grad_op_descs
,
var_name
,
new_name
,
0
,
pos
)
_rename_arg_
(
grad_op_descs
,
var_name
,
new_name
,
0
,
idx
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
new_name
=
var_name
+
"@RENAME@"
+
\
new_name
=
var_name
+
"@RENAME@"
+
\
...
@@ -96,18 +111,31 @@ def backward_impl(target,
...
@@ -96,18 +111,31 @@ def backward_impl(target,
inputs
=
{
"X"
:
inputs
},
inputs
=
{
"X"
:
inputs
},
outputs
=
{
"Out"
:
var_name
},
outputs
=
{
"Out"
:
var_name
},
attrs
=
{}),
len
(
grad_op_descs
)))
attrs
=
{}),
len
(
grad_op_descs
)))
# TODO: remove op in no grad set
# 根据append的顺序可以看出pending_sum_ops一定是根据sum_op的插入位置排序的
# 根据append的顺序可以看出pending_sum_ops一定是根据sum_op的插入位置排序的
for
p
in
reversed
(
pending_sum_ops
):
for
p
in
reversed
(
pending_sum_ops
):
grad_op_descs
.
insert
(
p
[
1
],
p
[
0
])
grad_op_descs
.
insert
(
p
[
1
],
p
[
0
])
# Remove ops whose outputs are all in no_grad_set
grad_op_descs
=
filter
(
lambda
op_desc
:
not
_is_all_in_set_
(
op_desc
.
output_arg_names
(),
no_grad_set
[
block
.
idx
]),
grad_op_descs
)
# Insert fill_zeros_like_op
to_insert
=
[]
for
idx
,
op_desc
in
enumerate
(
grad_op_descs
):
for
arg
in
op_desc
.
input_arg_names
():
if
arg
in
no_grad_set
[
block
.
idx
]:
to_insert
.
append
((
arg
,
idx
))
for
ele
in
reversed
(
to_insert
):
arg
=
ele
[
0
]
fill_zeros_like_op
=
_create_op_desc_
(
"fill_zeros_like"
,
{
"X"
:
[
_strip_grad_suffix_
(
arg
)]},
{
"Y"
:
[
arg
]},
{})
grad_op_descs
.
insert
(
ele
[
1
],
fill_zeros_like_op
)
# create new gradient variables in the target block desc
# create new gradient variables in the target block desc
for
op_desc
in
grad_op_descs
:
for
op_desc
in
grad_op_descs
:
for
grad_var_name
in
op_desc
.
output_arg_names
():
for
grad_var_name
in
op_desc
.
output_arg_names
():
grad_var_name
=
grad_var_name
.
encode
(
"ascii"
)
grad_var_name
=
grad_var_name
.
encode
(
"ascii"
)
if
target_block
.
desc
.
has_var
(
if
target_block
.
desc
.
has_var
(
grad_var_name
)
or
grad_var_name
==
core
.
get_empty_var_name
(
grad_var_name
)
or
grad_var_name
==
core
.
empty_var_name
():
):
continue
continue
target_block
.
desc
.
var
(
grad_var_name
)
target_block
.
desc
.
var
(
grad_var_name
)
if
not
grad_to_var
.
has_key
(
grad_var_name
):
if
not
grad_to_var
.
has_key
(
grad_var_name
):
...
@@ -115,8 +143,8 @@ def backward_impl(target,
...
@@ -115,8 +143,8 @@ def backward_impl(target,
grad_info_map
[
grad_to_var
[
grad_var_name
]]
=
(
grad_var_name
,
grad_info_map
[
grad_to_var
[
grad_var_name
]]
=
(
grad_var_name
,
target_block
)
target_block
)
if
target_block
.
idx
==
0
:
if
target_block
.
idx
==
0
:
grad_target_name
=
(
target
.
name
+
"@GRAD"
)
grad_target_name
=
_append_grad_suffix_
(
target
.
name
)
target_block
.
desc
.
var
(
grad_target_name
)
target_block
.
desc
.
var
(
grad_target_name
.
encode
(
"ascii"
)
)
grad_op_descs
.
insert
(
grad_op_descs
.
insert
(
0
,
0
,
_create_op_desc_
(
_create_op_desc_
(
...
@@ -134,7 +162,6 @@ def backward_impl(target,
...
@@ -134,7 +162,6 @@ def backward_impl(target,
op_desc
.
infer_shape
(
target_block
.
desc
)
op_desc
.
infer_shape
(
target_block
.
desc
)
target_block
.
desc
.
append_allocated_op
(
op_desc
)
target_block
.
desc
.
append_allocated_op
(
op_desc
)
pdb
.
set_trace
()
target_block
.
sync_with_cpp
()
target_block
.
sync_with_cpp
()
...
@@ -165,14 +192,14 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
...
@@ -165,14 +192,14 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
for
var
in
block
.
vars
.
itervalues
():
for
var
in
block
.
vars
.
itervalues
():
assert
isinstance
(
var
,
framework
.
Variable
)
assert
isinstance
(
var
,
framework
.
Variable
)
if
var
.
stop_gradient
:
if
var
.
stop_gradient
:
block_no_grad_set
.
add
(
var
.
name
)
block_no_grad_set
.
add
(
_append_grad_suffix_
(
var
.
name
)
)
no_grad_set
[
block
.
idx
]
=
block_no_grad_set
no_grad_set
[
block
.
idx
]
=
block_no_grad_set
grad_info_map
=
dict
()
grad_info_map
=
dict
()
root_block
=
loss
.
block
.
program
.
block
(
0
)
root_block
=
loss
.
block
.
program
.
block
(
0
)
pdb
.
set_trace
()
backward_impl
(
loss
,
root_block
,
root_block
,
no_grad_set
,
grad_info_map
)
_backward_impl_
(
loss
,
root_block
,
root_block
,
no_grad_set
,
grad_info_map
)
pdb
.
set_trace
()
if
parameter_list
is
not
None
:
if
parameter_list
is
not
None
:
parameters
=
parameter_list
parameters
=
parameter_list
else
:
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录