Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
df927768
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
df927768
编写于
1月 10, 2018
作者:
F
fengjiayi
提交者:
GitHub
1月 10, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7269 from emailweixu/calc_gradient
Calculating gradients for partial graph
上级
5f985000
6e5eae13
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
302 addition
and
45 deletion
+302
-45
paddle/framework/grad_op_desc_maker.h
paddle/framework/grad_op_desc_maker.h
+5
-1
paddle/framework/op_desc.h
paddle/framework/op_desc.h
+1
-1
paddle/operators/norm_op.cc
paddle/operators/norm_op.cc
+1
-1
paddle/operators/norm_op.h
paddle/operators/norm_op.h
+1
-1
python/paddle/v2/fluid/backward.py
python/paddle/v2/fluid/backward.py
+227
-38
python/paddle/v2/fluid/layers/ops.py
python/paddle/v2/fluid/layers/ops.py
+11
-1
python/paddle/v2/fluid/layers/tensor.py
python/paddle/v2/fluid/layers/tensor.py
+31
-2
python/paddle/v2/fluid/tests/test_calc_gradient.py
python/paddle/v2/fluid/tests/test_calc_gradient.py
+25
-0
未找到文件。
paddle/framework/grad_op_desc_maker.h
浏览文件 @
df927768
...
...
@@ -87,7 +87,11 @@ class GradOpDescMakerBase {
auto
onames
=
this
->
Output
(
name
);
ret_val
.
reserve
(
onames
.
size
());
std
::
transform
(
onames
.
begin
(),
onames
.
end
(),
std
::
back_inserter
(
ret_val
),
GradVarName
);
[
this
](
const
std
::
string
&
fwd_var_name
)
->
std
::
string
{
auto
g_name
=
GradVarName
(
fwd_var_name
);
(
*
this
->
grad_to_var_
)[
g_name
]
=
fwd_var_name
;
return
g_name
;
});
return
ret_val
;
}
...
...
paddle/framework/op_desc.h
浏览文件 @
df927768
...
...
@@ -129,7 +129,7 @@ class OpDesc {
}
proto
::
OpDesc
desc_
;
// input arg name =>
out
put variable names
// input arg name =>
in
put variable names
VariableNameMap
inputs_
;
// output arg name => output variable names
VariableNameMap
outputs_
;
...
...
paddle/operators/norm_op.cc
浏览文件 @
df927768
...
...
@@ -39,7 +39,7 @@ class NormOpMaker : public framework::OpProtoAndCheckerMaker {
"M = C * H * W"
);
AddComment
(
R"DOC(
"Input shape: $(N, C, H, W)$
Sc
la
e shape: $(C, 1)$
Sc
al
e shape: $(C, 1)$
Output shape: $(N, C, H, W)$
Where
forward
...
...
paddle/operators/norm_op.h
浏览文件 @
df927768
python/paddle/v2/fluid/backward.py
浏览文件 @
df927768
from
paddle.v2.fluid
import
framework
as
framework
from
.
import
core
import
collections
import
copy
__all__
=
[
'append_backward'
]
__all__
=
[
'append_backward'
,
'calc_gradient'
]
def
_rename_arg_
(
op_descs
,
old_name
,
new_name
,
begin_idx
=
None
,
end_idx
=
None
):
...
...
@@ -65,6 +66,18 @@ def _all_in_set_(cands, s):
return
True
def
_some_in_set_
(
cands
,
s
):
"""
Test if some elements of 'cands' are in set 's'
"""
if
len
(
cands
)
==
0
:
return
False
for
c
in
cands
:
if
c
in
s
:
return
True
return
False
def
_strip_grad_suffix_
(
name
):
"""
Strip the grad suffix from the given varibale name
...
...
@@ -169,8 +182,8 @@ def _remove_no_grad_branch_(op_descs, no_grad_set):
return
op_descs
def
_append_backward_ops_
(
target
,
block
,
def
_append_backward_ops_
(
block
,
ops
,
target_block
,
no_grad_dict
,
grad_to_var
,
...
...
@@ -179,8 +192,8 @@ def _append_backward_ops_(target,
Create all grad ops, and insert them into given block
Args:
target(Variable): the target variable of forward pass
block(Block): the block where forward ops are
ops(Op): the forward operators whose backward ops need to be added
target_block(Block): the block which is going to hold new generated grad ops
no_grad_dict(dict):
key(int) block index
...
...
@@ -202,14 +215,14 @@ def _append_backward_ops_(target,
# grad_op_descs holds created grad_op, and will be appended to target_block
grad_op_descs
=
[]
program
=
block
.
program
for
op
in
reversed
(
block
.
ops
):
for
op
in
reversed
(
ops
):
grad_sub_block_list
=
[]
# If the op has its own sub-block, deal with the sub-block first
if
op
.
has_attr
(
"sub_block"
):
sub_block
=
program
.
block
(
op
.
block_attr
(
"sub_block"
))
grad_sub_block
=
program
.
create_block
(
parent_idx
=
sub_block
.
idx
)
_append_backward_ops_
(
target
,
sub_block
,
grad_sub_block
,
no_grad_dict
,
grad_to_var
,
callback
)
_append_backward_ops_
(
sub_block
,
sub_block
.
ops
,
grad_sub_block
,
no_grad_dict
,
grad_to_var
)
grad_sub_block_list
.
append
(
grad_sub_block
.
desc
)
# Getting op's corresponding grad_op
...
...
@@ -224,14 +237,6 @@ def _append_backward_ops_(target,
grad_op_descs
=
_remove_no_grad_branch_
(
grad_op_descs
,
no_grad_dict
[
block
.
idx
])
if
target_block
.
idx
==
0
:
grad_op_descs
.
insert
(
0
,
_create_op_desc_
(
"fill_constant"
,
{},
{
"Out"
:
[
_append_grad_suffix_
(
target
.
name
)]
},
{
"shape"
:
[
1
],
"value"
:
1.0
,
"dtype"
:
target
.
dtype
}))
# append op_desc in grad_op_descs to target_block
for
op_desc
in
grad_op_descs
:
new_op_desc
=
target_block
.
desc
.
append_op
()
...
...
@@ -252,7 +257,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
In most cases, this dict is generated by _append_backward_ops_()
grad_info_map(dict)(output argument):
key(str): forward variable name
val(tuple): a tuple of (str,
int), str is the corresponding grad name, int is the block index
val(tuple): a tuple of (str,
Block), str is the corresponding grad name, Block is the block containing grad variable
"""
for
op_idx
in
range
(
start_op_idx
,
block
.
desc
.
op_size
()):
op_desc
=
block
.
desc
.
op
(
op_idx
)
...
...
@@ -279,41 +284,63 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
_infer_var_data_type_
(
arg
,
block
)
def
_rename_grad_
(
block
,
start_op_idx
,
grad_to_var
,
target_grad_map
):
var_map
=
copy
.
copy
(
target_grad_map
)
for
op_idx
in
range
(
start_op_idx
,
block
.
desc
.
op_size
()):
op_desc
=
block
.
desc
.
op
(
op_idx
)
for
name
in
op_desc
.
input_arg_names
():
if
name
in
var_map
:
op_desc
.
rename_input
(
name
,
var_map
[
name
])
for
name
in
op_desc
.
output_arg_names
():
if
block
.
desc
.
find_var
(
name
.
encode
(
"ascii"
)):
new_name
=
"%s_%s"
%
(
name
,
core
.
unique_integer
(
name
))
op_desc
.
rename_output
(
name
,
new_name
)
var_map
[
name
]
=
new_name
for
g
,
ng
in
var_map
.
iteritems
():
if
g
in
grad_to_var
:
grad_to_var
[
ng
]
=
grad_to_var
[
g
]
grad_to_var
.
pop
(
g
)
def
_get_stop_gradients_
(
program
):
no_grad_dict
=
dict
()
assert
isinstance
(
program
,
framework
.
Program
)
for
block
in
program
.
blocks
:
assert
isinstance
(
block
,
framework
.
Block
)
block_no_grad_set
=
set
()
for
var
in
block
.
vars
.
itervalues
():
assert
isinstance
(
var
,
framework
.
Variable
)
if
var
.
stop_gradient
:
block_no_grad_set
.
add
(
_append_grad_suffix_
(
var
.
name
))
no_grad_dict
[
block
.
idx
]
=
block_no_grad_set
return
no_grad_dict
def
append_backward
(
loss
,
parameter_list
=
None
,
no_grad_set
=
None
,
callback
=
None
):
"""
Append backward part to main_program
Args:
loss(Variable): The variable generated by cost function.
parameter_list(list
): Parameters that need to be updated by optimizer.
If None, it means all parameters need to be updated.
parameter_list(list
[string]): Parameters that need to be updated by
optimizer.
If None, it means all parameters need to be updated.
no_grad_set(set): Variables that have no gradients in Block 0.
If None, the set will be generated inside the function and
contains all variables with `step_gradient=True` from all blocks
.
All variables with `step_gradient=True` from all blocks will be
automatically added
.
Return:
(list[
Variable]): list of (parameters, gradients
) pair.
(list[
(Variable,Variable)]): list of (parameter, gradient
) pair.
"""
assert
isinstance
(
loss
,
framework
.
Variable
)
program
=
loss
.
block
.
program
no_grad_dict
=
dict
()
if
no_grad_set
is
None
:
assert
isinstance
(
program
,
framework
.
Program
)
for
block
in
program
.
blocks
:
assert
isinstance
(
block
,
framework
.
Block
)
block_no_grad_set
=
set
()
for
var
in
block
.
vars
.
itervalues
():
assert
isinstance
(
var
,
framework
.
Variable
)
if
var
.
stop_gradient
:
block_no_grad_set
.
add
(
_append_grad_suffix_
(
var
.
name
))
no_grad_dict
[
block
.
idx
]
=
block_no_grad_set
elif
isinstance
(
no_grad_set
,
set
):
no_grad_dict
=
{
0
:
set
([
_append_grad_suffix_
(
name
)
for
name
in
no_grad_set
])
}
else
:
raise
ValueError
(
"'no_grad_set' should be a set or None."
)
no_grad_set
=
set
()
no_grad_set
=
copy
.
copy
(
no_grad_set
)
no_grad_dict
=
_get_stop_gradients_
(
program
)
no_grad_dict
[
0
].
update
(
map
(
_append_grad_suffix_
,
no_grad_set
))
grad_info_map
=
dict
()
root_block
=
program
.
block
(
0
)
...
...
@@ -322,8 +349,25 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, callback=None):
current_block_idx
=
program
.
current_block_idx
grad_to_var
=
dict
()
_append_backward_ops_
(
loss
,
root_block
,
root_block
,
no_grad_dict
,
op_desc
=
_create_op_desc_
(
"fill_constant"
,
{},
{
"Out"
:
[
_append_grad_suffix_
(
loss
.
name
)]
},
{
"shape"
:
[
1
],
"value"
:
1.0
,
"dtype"
:
loss
.
dtype
})
root_block
.
desc
.
append_op
().
copy_from
(
op_desc
)
block_no_grad_set
=
set
(
map
(
_strip_grad_suffix_
,
no_grad_dict
[
0
]))
op_path
=
_find_op_path_
(
root_block
,
[
loss
],
[],
block_no_grad_set
)
no_grad_dict
[
0
].
update
(
map
(
_append_grad_suffix_
,
block_no_grad_set
))
_append_backward_ops_
(
root_block
,
op_path
,
root_block
,
no_grad_dict
,
grad_to_var
,
callback
)
# Because calc_gradient may be called multiple times,
# we need rename the internal gradient variables so that they have
# different names.
_rename_grad_
(
root_block
,
fwd_op_num
,
grad_to_var
,
{})
_append_backward_vars_
(
root_block
,
fwd_op_num
,
grad_to_var
,
grad_info_map
)
program
.
current_block_idx
=
current_block_idx
...
...
@@ -334,6 +378,7 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, callback=None):
else
:
params
=
program
.
global_block
().
all_parameters
()
parameters
=
[
param
.
name
for
param
in
params
]
params_and_grads
=
[]
for
param
in
parameters
:
if
param
not
in
grad_info_map
:
...
...
@@ -351,3 +396,147 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, callback=None):
else
:
params_and_grads
.
append
((
param_var
,
None
))
return
params_and_grads
def
_as_list
(
x
):
if
x
is
None
:
return
[]
return
list
(
x
)
if
isinstance
(
x
,
collections
.
Sequence
)
else
[
x
]
def
_find_op_path_
(
block
,
outputs
,
inputs
,
no_grad_set
):
"""
no_grad_set will also be changed
"""
input_names
=
set
([
inp
.
name
for
inp
in
inputs
])
output_names
=
set
([
out
.
name
for
out
in
outputs
])
relevant_op_flags
=
[
True
]
*
len
(
block
.
ops
)
# All the inputs of the block are used if inputs is empty,
if
inputs
:
for
i
,
op
in
enumerate
(
block
.
ops
):
if
_some_in_set_
(
op
.
desc
.
input_arg_names
(),
input_names
):
for
name
in
op
.
desc
.
output_arg_names
():
if
name
not
in
no_grad_set
:
input_names
.
add
(
name
)
else
:
relevant_op_flags
[
i
]
=
False
for
i
,
op
in
reversed
(
list
(
enumerate
(
block
.
ops
))):
if
_some_in_set_
(
op
.
desc
.
output_arg_names
(),
output_names
):
for
name
in
op
.
desc
.
input_arg_names
():
if
name
not
in
no_grad_set
:
output_names
.
add
(
name
)
else
:
relevant_op_flags
[
i
]
=
False
op_path
=
[
block
.
ops
[
i
]
for
i
in
range
(
len
(
block
.
ops
))
if
relevant_op_flags
[
i
]
]
if
inputs
:
for
op
in
op_path
:
for
name
in
op
.
desc
.
input_arg_names
():
if
name
not
in
input_names
:
no_grad_set
.
add
(
name
)
return
op_path
def
calc_gradient
(
targets
,
inputs
,
target_gradients
=
None
,
no_grad_set
=
None
):
"""
Backpropagate the graidents of targets to inputs.
Args:
targets(Variable|list[Variable]): The target variables
inputs(Variable|list[Variable]): The input variables
no_grad_set(set[string]): The names of variables that have no gradients
in Block 0. All variables with `stop_gradient=True` from all blocks
will be automatically added.
Return:
(list[Variable]): list of gradients for inputs
If an input does not affect targets, the corresponding gradient variable
will be None
"""
targets
=
_as_list
(
targets
)
inputs
=
_as_list
(
inputs
)
target_gradients
=
_as_list
(
target_gradients
)
block
=
targets
[
0
].
block
prog
=
block
.
program
block_idx
=
block
.
idx
if
not
target_gradients
:
target_gradients
=
[
None
]
*
len
(
targets
)
if
len
(
targets
)
!=
len
(
target_gradients
):
raise
ValueError
(
"Should have the same number of target_gradients as targets"
)
if
no_grad_set
is
None
:
no_grad_set
=
set
()
no_grad_set
=
copy
.
copy
(
no_grad_set
)
no_grad_dict
=
_get_stop_gradients_
(
prog
)
no_grad_dict
[
0
].
update
(
map
(
_append_grad_suffix_
,
no_grad_set
))
fwd_op_num
=
block
.
desc
.
op_size
()
target_grad_map
=
{}
for
i
,
grad
in
enumerate
(
target_gradients
):
target
=
targets
[
i
]
if
grad
is
None
:
grad_name
=
_append_grad_suffix_
(
target
.
name
)
op_desc
=
_create_op_desc_
(
"fill_constant_batch_size_like"
,
{
"Input"
:
[
target
.
name
]},
{
"Out"
:
[
grad_name
]},
{
"shape"
:
target
.
shape
,
"value"
:
1.0
,
"dtype"
:
target
.
dtype
,
'input_dim_idx'
:
0
,
'output_dim_idx'
:
0
})
block
.
desc
.
append_op
().
copy_from
(
op_desc
)
else
:
if
target
.
block
.
idx
!=
block_idx
or
target
.
block
.
program
!=
prog
:
raise
ValueError
(
"all targets must be in the same block"
)
if
target
.
shape
!=
grad
.
shape
:
raise
ValueError
(
"The shapes of target and grad are different: %s %s"
%
(
target
.
name
,
grad
.
name
))
target_grad_map
[
_append_grad_suffix_
(
target
.
name
)]
=
grad
.
name
for
input
in
inputs
:
if
input
.
block
.
program
!=
prog
:
raise
"input must be in the same program as targets"
block_no_grad_set
=
set
(
map
(
_strip_grad_suffix_
,
no_grad_dict
[
0
]))
op_path
=
_find_op_path_
(
block
,
targets
,
inputs
,
block_no_grad_set
)
no_grad_dict
[
0
].
update
(
map
(
_append_grad_suffix_
,
block_no_grad_set
))
grad_to_var
=
dict
()
grad_info_map
=
dict
()
_append_backward_ops_
(
block
,
op_path
,
block
,
no_grad_dict
,
grad_to_var
)
# Because calc_gradient may be called multiple times,
# we need rename the internal gradient variables so that they have
# different names.
_rename_grad_
(
block
,
fwd_op_num
,
grad_to_var
,
target_grad_map
)
_append_backward_vars_
(
block
,
fwd_op_num
,
grad_to_var
,
grad_info_map
)
prog
.
sync_with_cpp
()
grad_vars
=
[]
for
input_var
in
inputs
:
if
input_var
.
name
not
in
grad_info_map
:
grad_vars
.
append
(
None
)
else
:
grad_info
=
grad_info_map
[
input_var
.
name
]
grad_block
=
grad_info
[
1
]
grad_var
=
grad_block
.
var
(
grad_info
[
0
])
grad_vars
.
append
(
grad_var
)
if
len
(
grad_vars
)
==
1
:
return
grad_vars
[
0
]
else
:
return
grad_vars
python/paddle/v2/fluid/layers/ops.py
浏览文件 @
df927768
from
..registry
import
register_layer
__activations__
=
[
'abs'
,
'tanh'
,
'sigmoid'
,
'relu'
,
'sqrt'
,
'ceil'
,
'floor'
,
'log'
,
'round'
'abs'
,
'ceil'
,
'exp'
,
'floor'
,
'log'
,
'relu'
,
'round'
,
'sigmoid'
,
'sqrt'
,
'square'
,
'tanh'
,
]
__all__
=
[
...
...
python/paddle/v2/fluid/layers/tensor.py
浏览文件 @
df927768
from
..layer_helper
import
LayerHelper
from
..param_attr
import
ParamAttr
__all__
=
[
'create_tensor'
,
'cast'
,
'concat'
,
'sums'
,
'assign'
,
'create_tensor'
,
'c
reate_parameter'
,
'c
ast'
,
'concat'
,
'sums'
,
'assign'
,
'fill_constant_batch_size_like'
,
'fill_constant'
,
'ones'
,
'zeros'
]
...
...
@@ -11,6 +12,33 @@ def create_tensor(dtype, name=None):
return
helper
.
create_variable
(
name
=
helper
.
name
,
dtype
=
dtype
)
def
create_parameter
(
shape
,
dtype
,
attr
=
None
,
is_bias
=
False
,
default_initializer
=
None
):
"""
Create a parameter
Args:
shape(list[int]): shape of the parameter
dtype(string): element type of the parameter
attr(ParamAttr): attributes of the parameter
is_bias(bool): This can affect which default initializer is chosen
when default_initializer is None. If is_bias,
initializer.Constant(0.0) will be used. Otherwise,
Xavier() will be used.
default_initializer(Initializer): initializer for the parameter
Returns:
Parameter: the created parameter
"""
helper
=
LayerHelper
(
"create_parameter"
)
if
attr
is
None
:
attr
=
ParamAttr
()
return
helper
.
create_parameter
(
attr
,
shape
,
dtype
,
is_bias
,
default_initializer
)
def
cast
(
x
,
dtype
):
"""
This function takes in the input with input_dtype
...
...
@@ -180,7 +208,8 @@ def fill_constant_batch_size_like(input,
Examples:
.. code-block:: python
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
data = fluid.layers.fill_constant_batch_size_like(
input=like, shape=[1], value=0, dtype='int64')
"""
helper
=
LayerHelper
(
"fill_constant_batch_size_like"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
dtype
)
...
...
python/paddle/v2/fluid/tests/test_calc_gradient.py
0 → 100644
浏览文件 @
df927768
import
unittest
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.optimizer
as
optimizer
from
paddle.v2.fluid.backward
import
calc_gradient
class
TestCalcGradient
(
unittest
.
TestCase
):
def
test_calc_gradient
(
self
):
x
=
layers
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
])
y
=
layers
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
10
,
8
])
mul_out
=
layers
.
mul
(
x
=
x
,
y
=
y
)
mean_out
=
layers
.
mean
(
x
=
mul_out
)
a
=
calc_gradient
(
mean_out
,
mul_out
)
b
=
calc_gradient
(
mean_out
,
x
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{},
fetch_list
=
[
a
,
b
])
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录