Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5b9dbbb9
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5b9dbbb9
编写于
12月 26, 2017
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
code clean
上级
febb7251
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
92 addition
and
89 deletion
+92
-89
python/paddle/v2/fluid/backward.py
python/paddle/v2/fluid/backward.py
+92
-89
未找到文件。
python/paddle/v2/fluid/backward.py
浏览文件 @
5b9dbbb9
from
paddle.v2.fluid
import
framework
as
framework
from
paddle.v2.fluid
import
framework
as
framework
from
.
import
core
from
.
import
core
import
collections
import
collections
import
pdb
__all__
=
[
'append_backward'
]
__all__
=
[
'append_backward'
]
...
@@ -45,7 +44,7 @@ def _infer_var_data_type_(var_name, block):
...
@@ -45,7 +44,7 @@ def _infer_var_data_type_(var_name, block):
grad_var
.
set_dtype
(
core
.
DataType
.
FP32
)
grad_var
.
set_dtype
(
core
.
DataType
.
FP32
)
def
_
is_
all_in_set_
(
cands
,
s
):
def
_all_in_set_
(
cands
,
s
):
for
c
in
cands
:
for
c
in
cands
:
if
not
c
in
s
:
if
not
c
in
s
:
return
False
return
False
...
@@ -61,112 +60,114 @@ def _append_grad_suffix_(name):
...
@@ -61,112 +60,114 @@ def _append_grad_suffix_(name):
return
name
+
core
.
grad_var_suffix
()
return
name
+
core
.
grad_var_suffix
()
def
_append_backward_ops_
(
target
,
def
_addup_repetitive_outputs_
(
op_descs
):
block
,
# In backward part, an variable my be the output of more than one ops.
target_block
,
# In this case, the variable should be the accumulation of all the outputs.
no_grad_set
,
# We adopt adding `sum_op`s to implement the accumulate.
callback
=
None
):
grad_op_descs
=
[]
grad_to_var
=
dict
()
program
=
block
.
program
for
each_op
in
reversed
(
block
.
ops
):
grad_sub_block_list
=
[]
if
each_op
.
has_attr
(
"sub_block"
):
sub_block_idx
=
each_op
.
block_attr
(
"sub_block"
)
sub_block
=
program
.
block
(
sub_block_idx
)
grad_sub_block
=
program
.
create_block
(
parent_idx
=
sub_block_idx
)
sub_grad_to_var
=
_append_backward_ops_
(
target
,
sub_block
,
grad_sub_block
,
no_grad_set
,
callback
)
grad_to_var
=
dict
(
grad_to_var
,
**
sub_grad_to_var
)
grad_sub_block_list
.
append
(
grad_sub_block
.
desc
)
grad_op_desc
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
each_op
.
desc
,
no_grad_set
[
block
.
idx
],
grad_sub_block_list
)
grad_op_descs
.
append
(
grad_op_desc
)
grad_to_var
=
dict
(
grad_to_var
,
**
op_grad_to_var
)
# grad_op_descs = [[op1_g1, op1_g2], [op2_g], ...]
# flatten grad_op_descs
grad_op_descs
=
[
op
for
sublist
in
grad_op_descs
for
op
in
sublist
]
# ?????
pending_sum_ops
=
[]
pending_sum_ops
=
[]
var_rename_count
=
collections
.
defaultdict
(
int
)
var_rename_count
=
collections
.
defaultdict
(
int
)
var_input
s
=
collections
.
defaultdict
(
list
)
renamed_var
s
=
collections
.
defaultdict
(
list
)
for
idx
,
op_desc
in
enumerate
(
grad_
op_descs
):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
var_name
in
op_desc
.
input_arg_names
():
for
var_name
in
op_desc
.
input_arg_names
():
if
len
(
var_inputs
[
var_name
])
>
1
:
if
len
(
renamed_vars
[
var_name
])
>
1
:
pending_sum_ops
.
append
((
_create_op_desc_
(
pending_sum_ops
.
append
(
op_type
=
"sum"
,
(
_create_op_desc_
(
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
inputs
=
{
"X"
:
var_inputs
[
var_name
]},
{
"Out"
:
[
var_name
]},
{}),
idx
))
outputs
=
{
"Out"
:
[
var_name
]},
renamed_vars
[
var_name
]
=
[
var_name
]
attrs
=
{}),
idx
))
var_inputs
[
var_name
]
=
[
var_name
]
for
var_name
in
op_desc
.
output_arg_names
():
for
var_name
in
op_desc
.
output_arg_names
():
if
var_name
in
op_desc
.
input_arg_names
():
if
var_name
==
core
.
empty_var_name
(
# in place operator
)
or
var_name
in
op_desc
.
input_arg_names
():
# empty variable or inplace op
continue
continue
if
var_name
==
core
.
empty_var_name
()
or
len
(
var_inputs
[
if
len
(
renamed_vars
[
var_name
])
==
0
:
var_name
])
==
0
:
# it's the first time we get the variable
# it's the first time we get the variable
var_input
s
[
var_name
]
=
[
var_name
]
renamed_var
s
[
var_name
]
=
[
var_name
]
else
:
else
:
if
len
(
var_input
s
[
var_name
])
==
1
:
if
len
(
renamed_var
s
[
var_name
])
==
1
:
new_name
=
var_name
+
"@RENAME@"
+
\
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
=
var_rename_count
[
var_name
]
+
1
var_rename_count
[
var_name
]
+=
1
# rename original var_name
# rename original var_name
var_input
s
[
var_name
][
0
]
=
new_name
renamed_var
s
[
var_name
][
0
]
=
new_name
_rename_arg_
(
grad_
op_descs
,
var_name
,
new_name
,
0
,
idx
)
_rename_arg_
(
op_descs
,
var_name
,
new_name
,
0
,
idx
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
new_name
=
var_name
+
"@RENAME@"
+
\
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
=
var_rename_count
[
var_name
]
+
1
var_rename_count
[
var_name
]
+=
1
op_desc
.
rename_output
(
var_name
,
new_name
)
op_desc
.
rename_output
(
var_name
,
new_name
)
var_input
s
[
var_name
].
append
(
new_name
)
renamed_var
s
[
var_name
].
append
(
new_name
)
for
var_name
,
inputs
in
var_input
s
.
iteritems
():
for
var_name
,
inputs
in
renamed_var
s
.
iteritems
():
if
len
(
inputs
)
>
1
:
if
len
(
inputs
)
>
1
:
pending_sum_ops
.
append
((
_create_op_desc_
(
pending_sum_ops
.
append
((
_create_op_desc_
(
op_type
=
"sum"
,
"sum"
,
{
"X"
:
inputs
},
{
"Out"
:
[
var_name
]},
{}),
len
(
op_descs
)))
inputs
=
{
"X"
:
inputs
},
outputs
=
{
"Out"
:
[
var_name
]},
attrs
=
{}),
len
(
grad_op_descs
)))
# sum_op descs are sorted according to their insert position
# sum_op descs are sorted according to their insert position
for
p
in
reversed
(
pending_sum_ops
):
for
p
in
reversed
(
pending_sum_ops
):
grad_op_descs
.
insert
(
p
[
1
],
p
[
0
])
op_descs
.
insert
(
p
[
1
],
p
[
0
])
# Remove ops whose outputs are all in no_grad_set
grad_op_descs
=
filter
(
return
op_descs
lambda
op_desc
:
not
_is_all_in_set_
(
op_desc
.
output_arg_names
(),
no_grad_set
[
block
.
idx
]),
grad_op_descs
)
def
_remove_no_grad_branch_
(
op_descs
,
no_grad_set
):
# Remove ops whose outputs are all in no_grad_dict
op_descs
=
filter
(
lambda
op_desc
:
not
_all_in_set_
(
op_desc
.
output_arg_names
(),
no_grad_set
),
op_descs
)
# Insert fill_zeros_like_op
# Insert fill_zeros_like_op
to_insert
=
[]
to_insert
=
[]
for
idx
,
op_desc
in
enumerate
(
grad_
op_descs
):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
arg
in
op_desc
.
input_arg_names
():
for
arg
in
op_desc
.
input_arg_names
():
if
core
.
grad_var_suffix
()
in
arg
and
arg
in
no_grad_set
[
block
.
idx
]:
if
core
.
grad_var_suffix
()
in
arg
and
arg
in
no_grad_set
:
to_insert
.
append
((
arg
,
idx
))
to_insert
.
append
((
_create_op_desc_
(
"fill_zeros_like"
,
{
for
ele
in
reversed
(
to_insert
):
"X"
:
[
_strip_grad_suffix_
(
arg
)]
arg
=
ele
[
0
]
},
{
"Y"
:
[
arg
]},
{}),
idx
))
fill_zeros_like_op
=
_create_op_desc_
(
"fill_zeros_like"
,
{
"X"
:
[
_strip_grad_suffix_
(
arg
)]},
{
"Y"
:
[
arg
]},
map
(
lambda
p
:
op_descs
.
insert
(
p
[
1
],
p
[
0
]),
reversed
(
to_insert
))
{})
grad_op_descs
.
insert
(
ele
[
1
],
fill_zeros_like_op
)
return
op_descs
def
_append_backward_ops_
(
target
,
block
,
target_block
,
no_grad_dict
,
grad_to_var
,
callback
=
None
):
grad_op_descs
=
[]
program
=
block
.
program
for
op
in
reversed
(
block
.
ops
):
grad_sub_block_list
=
[]
# If the op has its own sub-block, deal with the sub-block first
if
op
.
has_attr
(
"sub_block"
):
sub_block
=
program
.
block
(
op
.
block_attr
(
"sub_block"
))
grad_sub_block
=
program
.
create_block
(
parent_idx
=
sub_block
.
idx
)
_append_backward_ops_
(
target
,
sub_block
,
grad_sub_block
,
no_grad_dict
,
grad_to_var
,
callback
)
grad_sub_block_list
.
append
(
grad_sub_block
.
desc
)
grad_op_desc
,
op_grad_to_var
=
core
.
get_grad_op_desc
(
op
.
desc
,
no_grad_dict
[
block
.
idx
],
grad_sub_block_list
)
grad_op_descs
.
extend
(
grad_op_desc
)
grad_to_var
.
update
(
op_grad_to_var
)
grad_op_descs
=
_addup_repetitive_outputs_
(
grad_op_descs
)
grad_op_descs
=
_remove_no_grad_branch_
(
grad_op_descs
,
no_grad_dict
[
block
.
idx
])
if
target_block
.
idx
==
0
:
if
target_block
.
idx
==
0
:
grad_target_name
=
_append_grad_suffix_
(
target
.
name
)
# target_block.desc.var(grad_target_name.encode("ascii"))
grad_op_descs
.
insert
(
grad_op_descs
.
insert
(
0
,
0
,
_create_op_desc_
(
_create_op_desc_
(
"fill_constant"
,
{},
{
op_type
=
"fill_constant"
,
"Out"
:
[
_append_grad_suffix_
(
target
.
name
)]
inputs
=
{},
},
{
"shape"
:
[
1
],
outputs
=
{
"Out"
:
[
grad_target_name
]},
attrs
=
{
"shape"
:
[
1
],
"value"
:
1.0
,
"value"
:
1.0
,
"dtype"
:
target
.
dtype
}))
"dtype"
:
target
.
dtype
}))
# append op_desc in grad_op_descs to target_block
for
op_desc
in
grad_op_descs
:
for
op_desc
in
grad_op_descs
:
new_op_desc
=
target_block
.
desc
.
append_op
()
new_op_desc
=
target_block
.
desc
.
append_op
()
new_op_desc
.
copy_from
(
op_desc
)
new_op_desc
.
copy_from
(
op_desc
)
return
grad_to_var
def
_append_backward_vars_
(
block
,
start_op_idx
,
grad_to_var
,
grad_info_map
):
def
_append_backward_vars_
(
block
,
start_op_idx
,
grad_to_var
,
grad_info_map
):
for
op_idx
in
range
(
start_op_idx
,
block
.
desc
.
op_size
()):
for
op_idx
in
range
(
start_op_idx
,
block
.
desc
.
op_size
()):
...
@@ -194,15 +195,15 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
...
@@ -194,15 +195,15 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
_infer_var_data_type_
(
arg
,
block
)
_infer_var_data_type_
(
arg
,
block
)
def
append_backward
(
loss
,
parameter_list
=
None
,
no_grad_
se
t
=
None
):
def
append_backward
(
loss
,
parameter_list
=
None
,
no_grad_
dic
t
=
None
):
"""
"""
Create and add gradient Operators in BlockDesc to compute
Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list
gradients of `loss` for parameters in parameter_list
:param loss: an variable generated by cost function.
:param loss: an variable generated by cost function.
:type loss: Variable
:type loss: Variable
:param no_grad_
se
t: variable that should not create gradient
:param no_grad_
dic
t: variable that should not create gradient
:type no_grad_
se
t: set
:type no_grad_
dic
t: set
:param parameter_list: parameters that need to compute gradient and
:param parameter_list: parameters that need to compute gradient and
update to optimize the lost.
update to optimize the lost.
:type: list
:type: list
...
@@ -212,8 +213,8 @@ def append_backward(loss, parameter_list=None, no_grad_set=None):
...
@@ -212,8 +213,8 @@ def append_backward(loss, parameter_list=None, no_grad_set=None):
assert
isinstance
(
loss
,
framework
.
Variable
)
assert
isinstance
(
loss
,
framework
.
Variable
)
program
=
loss
.
block
.
program
program
=
loss
.
block
.
program
if
no_grad_
se
t
is
None
:
if
no_grad_
dic
t
is
None
:
no_grad_
se
t
=
dict
()
no_grad_
dic
t
=
dict
()
assert
isinstance
(
program
,
framework
.
Program
)
assert
isinstance
(
program
,
framework
.
Program
)
for
block
in
program
.
blocks
:
for
block
in
program
.
blocks
:
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
...
@@ -222,19 +223,21 @@ def append_backward(loss, parameter_list=None, no_grad_set=None):
...
@@ -222,19 +223,21 @@ def append_backward(loss, parameter_list=None, no_grad_set=None):
assert
isinstance
(
var
,
framework
.
Variable
)
assert
isinstance
(
var
,
framework
.
Variable
)
if
var
.
stop_gradient
:
if
var
.
stop_gradient
:
block_no_grad_set
.
add
(
_append_grad_suffix_
(
var
.
name
))
block_no_grad_set
.
add
(
_append_grad_suffix_
(
var
.
name
))
no_grad_set
[
block
.
idx
]
=
block_no_grad_set
no_grad_dict
[
block
.
idx
]
=
block_no_grad_set
else
:
elif
isinstance
(
no_grad_dict
,
set
):
# FIX ME
no_grad_dict
=
{
0
:
no_grad_dict
}
no_grad_set
=
{
0
:
no_grad_set
}
grad_info_map
=
dict
()
grad_info_map
=
dict
()
root_block
=
program
.
block
(
0
)
root_block
=
program
.
block
(
0
)
fwd_op_num
=
root_block
.
desc
.
op_size
()
fwd_op_num
=
root_block
.
desc
.
op_size
()
current_block_idx
=
program
.
current_block_idx
current_block_idx
=
program
.
current_block_idx
grad_to_var
=
_append_backward_ops_
(
loss
,
root_block
,
root_block
,
grad_to_var
=
dict
()
no_grad_set
)
_append_backward_ops_
(
loss
,
root_block
,
root_block
,
no_grad_dict
,
grad_to_var
)
_append_backward_vars_
(
root_block
,
fwd_op_num
,
grad_to_var
,
grad_info_map
)
_append_backward_vars_
(
root_block
,
fwd_op_num
,
grad_to_var
,
grad_info_map
)
program
.
current_block_idx
=
current_block_idx
program
.
current_block_idx
=
current_block_idx
program
.
sync_with_cpp
()
program
.
sync_with_cpp
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录