Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
24ec6ed0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
24ec6ed0
编写于
4月 29, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
4月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add some double/triple grad kernel yaml file (#42361)
* add double yaml * add inline func
上级
2bee99df
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
224 addition
and
67 deletion
+224
-67
paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
...uto_code_generator/final_state_generator/codegen_utils.py
+6
-11
paddle/phi/api/lib/kernel_dispatch.h
paddle/phi/api/lib/kernel_dispatch.h
+10
-2
paddle/phi/kernels/activation_grad_kernel.h
paddle/phi/kernels/activation_grad_kernel.h
+3
-3
paddle/phi/kernels/batch_norm_grad_kernel.h
paddle/phi/kernels/batch_norm_grad_kernel.h
+6
-6
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
+6
-6
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
+1
-1
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
+1
-1
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+6
-6
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
+1
-1
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+3
-3
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+2
-2
paddle/phi/ops/compat/batch_norm_sig.cc
paddle/phi/ops/compat/batch_norm_sig.cc
+7
-7
paddle/phi/ops/compat/elementwise_sig.cc
paddle/phi/ops/compat/elementwise_sig.cc
+1
-1
python/paddle/fluid/tests/unittests/gradient_checker.py
python/paddle/fluid/tests/unittests/gradient_checker.py
+46
-15
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
...n/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+20
-0
python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
.../paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+17
-0
python/paddle/nn/functional/activation.py
python/paddle/nn/functional/activation.py
+4
-1
python/paddle/utils/code_gen/api.yaml
python/paddle/utils/code_gen/api.yaml
+6
-0
python/paddle/utils/code_gen/backward.yaml
python/paddle/utils/code_gen/backward.yaml
+78
-1
未找到文件。
paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
浏览文件 @
24ec6ed0
...
...
@@ -22,17 +22,12 @@ import os
### Global Variables ###
########################
ops_to_fill_zero_for_empty_grads
=
set
([
"split_grad"
,
"rnn_grad"
,
"matmul_double_grad"
,
"matmul_triple_grad"
,
"sigmoid_double_grad"
,
"sigmoid_triple_grad"
,
"add_double_grad"
,
"add_triple_grad"
,
"multiply_double_grad"
,
"multiply_triple_grad"
,
"conv2d_grad_grad"
,
"split_grad"
,
"rnn_grad"
,
"matmul_double_grad"
,
"matmul_triple_grad"
,
"sigmoid_double_grad"
,
"sigmoid_triple_grad"
,
"add_double_grad"
,
"add_triple_grad"
,
"multiply_double_grad"
,
"multiply_triple_grad"
,
"conv2d_grad_grad"
,
"batch_norm_double_grad"
,
"tanh_double_grad"
,
"tanh_triple_grad"
,
"subtract_double_grad"
,
"divide_double_grad"
,
"log_double_grad"
,
"elu_double_grad"
])
# For API dispatch used at python-level
...
...
paddle/phi/api/lib/kernel_dispatch.h
浏览文件 @
24ec6ed0
...
...
@@ -96,8 +96,7 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): add global device guard method to set backend
void
operator
()(
const
Tensor
&
x
)
{
const
phi
::
TensorBase
&
tensor
=
*
x
.
impl
();
inline
void
AssignKernelKeySet
(
const
phi
::
TensorBase
&
tensor
)
{
key_set
.
backend_set
=
key_set
.
backend_set
|
detail
::
GetTensorBackendSet
(
tensor
);
// TODO(chenweihang): select multi layout and dtype
...
...
@@ -110,6 +109,8 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
}
}
void
operator
()(
const
Tensor
&
x
)
{
AssignKernelKeySet
(
*
x
.
impl
());
}
void
operator
()(
const
std
::
vector
<
Tensor
>&
x
)
{
const
phi
::
TensorBase
&
tensor
=
*
x
.
at
(
0
).
impl
();
key_set
.
backend_set
=
...
...
@@ -119,6 +120,13 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
key_set
.
dtype
=
tensor
.
dtype
();
}
void
operator
()(
const
paddle
::
optional
<
const
Tensor
&>
x
)
{
if
(
x
.
get_ptr
()
!=
nullptr
)
{
const
phi
::
TensorBase
&
tensor
=
*
(
x
.
get_ptr
()
->
impl
());
AssignKernelKeySet
(
tensor
);
}
}
// skip other type args, these args don't used in kernel selection
template
<
typename
T
>
void
operator
()(
const
T
&
x
)
{
...
...
paddle/phi/kernels/activation_grad_kernel.h
浏览文件 @
24ec6ed0
...
...
@@ -82,18 +82,18 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
DenseTensor
*
dout_new
,
DenseTensor
*
ddout
);
template
<
typename
T
,
typename
Context
>
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
d
_ddout
,
const
DenseTensor
&
d
dx
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_ddout
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
);
...
...
paddle/phi/kernels/batch_norm_grad_kernel.h
浏览文件 @
24ec6ed0
...
...
@@ -66,16 +66,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout
,
...
...
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
浏览文件 @
24ec6ed0
...
...
@@ -341,16 +341,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout_str
,
...
...
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
浏览文件 @
24ec6ed0
...
...
@@ -38,9 +38,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
DenseTensor
*
ddout
)
{
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
...
...
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
浏览文件 @
24ec6ed0
...
...
@@ -30,9 +30,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
DenseTensor
*
ddout
);
...
...
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
浏览文件 @
24ec6ed0
...
...
@@ -908,16 +908,16 @@ void BatchNormGradKernel(const Context &dev_ctx,
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout_str
,
...
...
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
浏览文件 @
24ec6ed0
...
...
@@ -46,9 +46,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
DenseTensor
*
ddout
)
{
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
...
...
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
24ec6ed0
...
...
@@ -152,8 +152,8 @@ void LeakyReluDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
DenseTensor
*
dout_new
,
DenseTensor
*
ddout
)
{
if
(
dout_new
)
{
...
...
@@ -171,10 +171,10 @@ void TanhDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
d
_ddout
,
const
DenseTensor
&
d
dx
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_ddout
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
)
{
...
...
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
24ec6ed0
...
...
@@ -121,13 +121,13 @@ KernelSignature ReluDoubleGradOpArgumentMapping(
KernelSignature
TanhDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"tanh_double_grad"
,
{
"Out"
,
"D
DX"
,
"DOut
"
},
{},
{
"DOutNew"
,
"DDOut"
});
"tanh_double_grad"
,
{
"Out"
,
"D
Out"
,
"DDX
"
},
{},
{
"DOutNew"
,
"DDOut"
});
}
KernelSignature
TanhTripleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"tanh_triple_grad"
,
{
"Out"
,
"D
DX"
,
"DOut"
,
"D_DDOut"
,
"D_DOut_New
"
},
{
"Out"
,
"D
Out"
,
"DDX"
,
"D_DOut_New"
,
"D_DDOut
"
},
{},
{
"D_OutNew"
,
"D_DOut"
,
"D_DDx"
});
}
...
...
paddle/phi/ops/compat/batch_norm_sig.cc
浏览文件 @
24ec6ed0
...
...
@@ -82,16 +82,16 @@ KernelSignature BatchNormGradOpArgumentMapping(
KernelSignature
BatchNormGradGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"batch_norm_grad_grad"
,
{
"DDX"
,
"DDScale"
,
"DDBias"
,
"DY"
,
"X"
,
{
"X"
,
"Scale"
,
"Mean"
,
"Variance"
,
"SavedMean"
,
"SavedVariance"
,
"Mean"
,
"Variance"
},
"DY"
,
"DDX"
,
"DDScale"
,
"DDBias"
},
{
"momentum"
,
"epsilon"
,
"data_layout"
,
...
...
paddle/phi/ops/compat/elementwise_sig.cc
浏览文件 @
24ec6ed0
...
...
@@ -133,7 +133,7 @@ KernelSignature ElementwiseSubGradOpArgumentMapping(
KernelSignature
ElementwiseSubDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"subtract_double_grad"
,
{
"Y"
,
"D
DX"
,
"DDY"
,
"DOut
"
},
{
"axis"
},
{
"DDOut"
});
"subtract_double_grad"
,
{
"Y"
,
"D
Out"
,
"DDX"
,
"DDY
"
},
{
"axis"
},
{
"DDOut"
});
}
KernelSignature
ElementwiseDivGradOpArgumentMapping
(
...
...
python/paddle/fluid/tests/unittests/gradient_checker.py
浏览文件 @
24ec6ed0
...
...
@@ -560,7 +560,10 @@ def get_static_double_grad(x,
# so, they are also the input of second-order backward.
x
+=
y_grads
x_init
+=
dy_init
y
=
dx
# filter None in dx for DX/DY may be None in kernel
filted_dx
=
[
dxi
for
dxi
in
dx
if
dxi
is
not
None
]
y
=
filted_dx
# check input arguments
x
=
_as_list
(
x
)
...
...
@@ -619,6 +622,7 @@ def get_static_double_grad(x,
def
get_eager_double_grad
(
func
,
x_init
=
None
,
dy_init
=
None
,
place
=
None
,
return_mid_result
=
False
):
"""
Get Double Grad result of dygraph.
...
...
@@ -627,6 +631,7 @@ def get_eager_double_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (bool): A flag that controls the return content.
Returns:
If 'return_mid_result' set True.
...
...
@@ -635,6 +640,10 @@ def get_eager_double_grad(func,
If 'return_mid_result' set False.
A list of numpy array that stores second derivative result calulated by dygraph.
"""
if
isinstance
(
place
,
fluid
.
CPUPlace
):
paddle
.
set_device
(
"cpu"
)
if
isinstance
(
place
,
fluid
.
CUDAPlace
):
paddle
.
set_device
(
"gpu"
)
inputs
=
[]
dys
=
[]
for
x
in
x_init
:
...
...
@@ -648,7 +657,12 @@ def get_eager_double_grad(func,
# calculate first derivative
outputs
=
func
(
inputs
)
d_inputs
=
paddle
.
grad
(
outputs
=
outputs
,
inputs
=
inputs
,
grad_outputs
=
dys
,
create_graph
=
True
)
outputs
=
outputs
,
inputs
=
inputs
,
grad_outputs
=
dys
,
create_graph
=
True
,
allow_unused
=
True
)
d_inputs
=
[
d_input
for
d_input
in
d_inputs
if
d_input
is
not
None
]
# calcluate second derivative
inputs
=
inputs
+
dys
...
...
@@ -663,15 +677,20 @@ def get_eager_double_grad(func,
ddy
=
paddle
.
ones
(
shape
=
d_input
.
shape
,
dtype
=
d_input
.
dtype
)
ddy
.
stop_gradient
=
False
ddys
.
append
(
ddy
)
dd_inputs
=
paddle
.
grad
(
outputs
=
d_inputs
,
inputs
=
inputs
,
grad_outputs
=
ddys
,
create_graph
=
create_graph
)
create_graph
=
create_graph
,
allow_unused
=
True
)
if
return_mid_result
:
return
dd_inputs
,
inputs
+
ddys
else
:
return
[
dd_input
.
numpy
()
for
dd_input
in
dd_inputs
]
return
[
dd_input
.
numpy
()
for
dd_input
in
dd_inputs
if
dd_input
is
not
None
]
def
double_grad_check_for_dygraph
(
func
,
...
...
@@ -693,7 +712,6 @@ def double_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
...
...
@@ -722,19 +740,25 @@ def double_grad_check_for_dygraph(func,
paddle
.
disable_static
()
with
_test_eager_guard
():
eager_double_grad
=
get_eager_double_grad
(
func
,
x_init
,
y_grads_init
)
eager_double_grad
=
get_eager_double_grad
(
func
,
x_init
,
y_grads_init
,
place
)
paddle
.
enable_static
()
static_double_grad
=
get_static_double_grad
(
x
,
y
,
x_init
,
y_grads_init
,
place
)
if
len
(
static_double_grad
)
!=
len
(
eager_double_grad
):
msg
=
"The output grad tensor's number of static graph is different with dygraph, "
\
"please check the python api unit test used."
raise
RuntimeError
(
msg
)
for
i
in
six
.
moves
.
xrange
(
len
(
static_double_grad
)):
if
not
np
.
allclose
(
static_double_grad
[
i
],
eager_double_grad
[
i
],
rtol
,
atol
):
msg
=
'Check eager double result fail. Mismatch between static_graph double grad
%s
'
\
'and eager double grad
%s on %s,
\n
'
\
msg
=
'Check eager double result fail. Mismatch between static_graph double grad '
\
'and eager double grad
on %s, the output double grad tensor
\'
s index is : %d
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
%
(
st
atic_double_grad
[
i
].
name
,
eager_double_grad
[
i
].
name
,
str
(
place
)
,
static_double_grad
[
i
],
eager_double_grad
[
i
])
%
(
st
r
(
place
),
i
,
static_double_grad
[
i
],
eager_double_grad
[
i
])
return
fail_test
(
msg
)
...
...
@@ -794,6 +818,7 @@ def get_static_triple_grad(x,
def
get_eager_triple_grad
(
func
,
x_init
=
None
,
dy_init
=
None
,
place
=
None
,
return_mid_result
=
False
):
"""
Get triple Grad result of dygraph.
...
...
@@ -802,12 +827,13 @@ def get_eager_triple_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (list[Tensor], list[Tensor]): If set True, the
Returns:
A list of numpy array that stores second derivative result calulated by dygraph
"""
dd_y
,
dd_x
=
get_eager_double_grad
(
func
,
x_init
,
dy_init
,
return_mid_result
=
True
)
func
,
x_init
,
dy_init
,
place
,
return_mid_result
=
True
)
# calcluate third derivative
dddys
=
[]
...
...
@@ -839,7 +865,6 @@ def triple_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
...
...
@@ -868,17 +893,23 @@ def triple_grad_check_for_dygraph(func,
paddle
.
disable_static
()
with
_test_eager_guard
():
eager_triple_grad
=
get_eager_triple_grad
(
func
,
x_init
,
y_grads_init
)
eager_triple_grad
=
get_eager_triple_grad
(
func
,
x_init
,
y_grads_init
,
place
)
paddle
.
enable_static
()
static_triple_grad
=
get_static_triple_grad
(
x
,
y
,
x_init
,
y_grads_init
,
place
)
if
len
(
static_triple_grad
)
!=
len
(
eager_triple_grad
):
msg
=
"The output grad tensor's number of static graph is different with dygraph, "
\
"please check the python api unit test used."
raise
RuntimeError
(
msg
)
for
i
in
six
.
moves
.
xrange
(
len
(
static_triple_grad
)):
if
not
np
.
allclose
(
static_triple_grad
[
i
],
eager_triple_grad
[
i
],
rtol
,
atol
):
msg
=
'Check eager double result fail. Mismatch between static_graph double grad
%s
'
\
'and eager double grad
%s on %s,
\n
'
\
msg
=
'Check eager double result fail. Mismatch between static_graph double grad '
\
'and eager double grad
on %s, the output double grad tensor
\'
s index is : %d
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
%
(
st
atic_triple_grad
[
i
].
name
,
eager_triple_grad
[
i
].
name
,
str
(
place
)
,
static_triple_grad
[
i
],
eager_triple_grad
[
i
])
%
(
st
r
(
place
),
i
,
static_triple_grad
[
i
],
eager_triple_grad
[
i
])
return
fail_test
(
msg
)
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
浏览文件 @
24ec6ed0
...
...
@@ -52,6 +52,9 @@ class TestSigmoidTripleGradCheck(unittest.TestCase):
class
TestSigmoidDoubleGradCheck
(
unittest
.
TestCase
):
def
sigmoid_wrapper
(
self
,
x
):
return
fluid
.
layers
.
sigmoid
(
x
[
0
])
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
...
...
@@ -64,6 +67,8 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
sigmoid_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -75,6 +80,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
class
TestTanhTripleGradCheck
(
unittest
.
TestCase
):
def
tanh_wrapper
(
self
,
x
):
return
paddle
.
tanh
(
x
[
0
])
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
...
...
@@ -87,6 +95,8 @@ class TestTanhTripleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
triple_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
triple_grad_check_for_dygraph
(
self
.
tanh_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -98,6 +108,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
class
TestTanhDoubleGradCheck
(
unittest
.
TestCase
):
def
tanh_wrapper
(
self
,
x
):
return
paddle
.
tanh
(
x
[
0
])
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
...
...
@@ -110,6 +123,8 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
tanh_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -173,6 +188,9 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
class
TestELUDoubleGradCheck
(
unittest
.
TestCase
):
def
elu_wrapper
(
self
,
x
):
return
paddle
.
nn
.
functional
.
elu
(
x
[
0
],
alpha
=
0.2
)
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
4
,
4
,
4
]
...
...
@@ -189,6 +207,8 @@ class TestELUDoubleGradCheck(unittest.TestCase):
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
elu_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
浏览文件 @
24ec6ed0
...
...
@@ -139,6 +139,9 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase):
class
TestElementwiseSubDoubleGradCheck
(
unittest
.
TestCase
):
def
subtract_wrapper
(
self
,
x
):
return
paddle
.
subtract
(
x
[
0
],
x
[
1
])
@
prog_scope
()
def
func
(
self
,
place
):
# the shape of input variable should be clearly specified, not inlcude -1.
...
...
@@ -156,6 +159,11 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
gradient_checker
.
double_grad_check
(
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
subtract_wrapper
,
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -195,6 +203,9 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase):
class
TestElementwiseDivDoubleGradCheck
(
unittest
.
TestCase
):
def
divide_wrapper
(
self
,
x
):
return
paddle
.
divide
(
x
[
0
],
x
[
1
])
@
prog_scope
()
def
func
(
self
,
place
):
# the shape of input variable should be clearly specified, not inlcude -1.
...
...
@@ -213,6 +224,12 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
gradient_checker
.
double_grad_check
(
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
,
atol
=
1e-3
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
divide_wrapper
,
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
atol
=
1e-3
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
python/paddle/nn/functional/activation.py
浏览文件 @
24ec6ed0
...
...
@@ -112,7 +112,10 @@ def elu(x, alpha=1.0, name=None):
# [ 1. 15.6 ]]
"""
if
in_dynamic_mode
():
if
in_dygraph_mode
():
return
_C_ops
.
final_state_elu
(
x
,
alpha
)
if
_in_legacy_dygraph
():
return
_C_ops
.
elu
(
x
,
'alpha'
,
alpha
)
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'elu'
)
...
...
python/paddle/utils/code_gen/api.yaml
浏览文件 @
24ec6ed0
...
...
@@ -466,6 +466,7 @@
func
:
DeformableConvInferMeta
kernel
:
func
:
deformable_conv
data_type
:
x
optional
:
mask
backward
:
deformable_conv_grad
...
...
@@ -546,6 +547,7 @@
func
:
DropoutInferMeta
kernel
:
func
:
dropout
data_type
:
x
optional
:
seed_tensor
backward
:
dropout_grad
...
...
@@ -1065,6 +1067,7 @@
func
:
LayerNormInferMeta
kernel
:
func
:
layer_norm
data_type
:
x
backward
:
layer_norm_grad
optional
:
scale, bias
...
...
@@ -1608,6 +1611,7 @@
func
:
PsroiPoolInferMeta
kernel
:
func
:
psroi_pool
data_type
:
x
optional
:
boxes_num
backward
:
psroi_pool_grad
...
...
@@ -1713,6 +1717,7 @@
func
:
RoiAlignInferMeta
kernel
:
func
:
roi_align
data_type
:
x
optional
:
boxes_num
backward
:
roi_align_grad
...
...
@@ -1723,6 +1728,7 @@
func
:
RoiPoolInferMeta
kernel
:
func
:
roi_pool
data_type
:
x
optional
:
boxes_num
intermediate
:
arg_max
backward
:
roi_pool_grad
...
...
python/paddle/utils/code_gen/backward.yaml
浏览文件 @
24ec6ed0
...
...
@@ -152,6 +152,18 @@
kernel
:
func
:
atanh_grad
-
backward_api
:
batch_norm_double_grad
forward
:
batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias)
args
:
(Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output
:
Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
x
,
scale
,
x
]
kernel
:
func
:
batch_norm_grad_grad
data_type
:
x
optional
:
out_mean, out_variance
-
backward_api
:
batch_norm_grad
forward
:
batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
...
...
@@ -163,6 +175,7 @@
func
:
batch_norm_grad
data_type
:
out_grad
optional
:
mean_out, variance_out, reserve_space
backward
:
batch_norm_double_grad
-
backward_api
:
bce_loss_grad
forward
:
bce_loss (Tensor input, Tensor label) -> Tensor(out)
...
...
@@ -362,6 +375,7 @@
func
:
DeformableConvGradInferMeta
kernel
:
func
:
deformable_conv_grad
data_type
:
x
optional
:
mask
-
backward_api
:
depthwise_conv2d_transpose_grad
...
...
@@ -414,6 +428,18 @@
kernel
:
func
:
dist_grad
-
backward_api
:
divide_double_grad
forward
:
divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args
:
(Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output
:
Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
y
,
grad_x
,
grad_x
]
kernel
:
func
:
divide_double_grad
data_type
:
out
optional
:
grad_x_grad, grad_y_grad
-
backward_api
:
divide_grad
forward
:
divide (Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1)
...
...
@@ -423,6 +449,7 @@
param
:
[
x
,
y
]
kernel
:
func
:
divide_grad
backward
:
divide_double_grad
-
backward_api
:
dropout_grad
forward
:
dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
...
...
@@ -455,6 +482,16 @@
kernel
:
func
:
elementwise_pow_grad
-
backward_api
:
elu_double_grad
forward
:
elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args
:
(Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output
:
Tensor(x_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralBinaryGradInferMeta
param
:
[
x
,
x
]
kernel
:
func
:
elu_double_grad
-
backward_api
:
elu_grad
forward
:
elu (Tensor x, float alpha) -> Tensor(out)
args
:
(Tensor x, Tensor out, Tensor out_grad, float alpha)
...
...
@@ -464,6 +501,7 @@
param
:
[
x
]
kernel
:
func
:
elu_grad
backward
:
elu_double_grad
-
backward_api
:
erf_grad
forward
:
erf (Tensor x) -> Tensor(out)
...
...
@@ -633,6 +671,7 @@
param
:
[
x
]
kernel
:
func
:
graph_send_recv_grad
data_type
:
out_grad
optional
:
out, dst_count
-
backward_api
:
gumbel_softmax_grad
...
...
@@ -1287,6 +1326,7 @@
param
:
[
x
]
kernel
:
func
:
psroi_pool_grad
data_type
:
x
optional
:
boxes_num
# output is optional
...
...
@@ -1381,6 +1421,7 @@
param
:
[
x
]
kernel
:
func
:
roi_align_grad
data_type
:
boxes
optional
:
boxes_num
-
backward_api
:
roi_pool_grad
...
...
@@ -1392,6 +1433,7 @@
param
:
[
x
]
kernel
:
func
:
roi_pool_grad
data_type
:
x
optional
:
boxes_num
-
backward_api
:
roll_grad
...
...
@@ -1498,7 +1540,7 @@
func
:
UnchangedInferMeta
param
:
[
x
]
kernel
:
func
:
sigmoid_cross_entropy_with_logits_grad
func
:
sigmoid_cross_entropy_with_logits_grad
-
backward_api
:
sigmoid_double_grad
forward
:
sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
...
...
@@ -1654,6 +1696,18 @@
func
:
strided_slice_grad
no_need_buffer
:
x
-
backward_api
:
subtract_double_grad
forward
:
subtract_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args
:
(Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output
:
Tensor(grad_out_grad)
infer_meta
:
func
:
UnchangedInferMeta
param
:
[
grad_out
]
kernel
:
func
:
subtract_double_grad
optional
:
grad_x_grad, grad_y_grad
no_need_buffer
:
y, grad_out
-
backward_api
:
subtract_grad
forward
:
subtract (Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out_grad, int axis = -1)
...
...
@@ -1664,6 +1718,7 @@
kernel
:
func
:
subtract_grad
no_need_buffer
:
x, y
backward
:
subtract_double_grad
-
backward_api
:
sum_double_grad
forward
:
sum_grad (Tensor x, Tensor grad_out, int64_t[] dims, bool keep_dim, bool reduce_all=false) -> Tensor(grad_x)
...
...
@@ -1720,6 +1775,17 @@
kernel
:
func
:
tan_grad
-
backward_api
:
tanh_double_grad
forward
:
tanh_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args
:
(Tensor out, Tensor grad_out, Tensor grad_x_grad)
output
:
Tensor(out_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralBinaryGradInferMeta
param
:
[
out
,
out
]
kernel
:
func
:
tanh_double_grad
backward
:
tanh_triple_grad
-
backward_api
:
tanh_grad
forward
:
tanh (Tensor x) -> Tensor(out)
args
:
(Tensor out, Tensor out_grad)
...
...
@@ -1729,6 +1795,7 @@
param
:
[
out
]
kernel
:
func
:
tanh_grad
backward
:
tanh_double_grad
-
backward_api
:
tanh_shrink_grad
forward
:
tanh_shrink (Tensor x) -> Tensor(out)
...
...
@@ -1740,6 +1807,16 @@
kernel
:
func
:
tanh_shrink_grad
-
backward_api
:
tanh_triple_grad
forward
:
tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad)
args
:
(Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad)
output
:
Tensor(out_grad), Tensor(grad_out_forward_grad), Tensor(grad_x_grad_forward_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
out
,
out
,
grad_x_grad_forward
]
kernel
:
func
:
tanh_triple_grad
-
backward_api
:
thresholded_relu_grad
forward
:
thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args
:
(Tensor x, Tensor out_grad, float threshold)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录