Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
24ec6ed0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2305
Star
20932
Fork
5423
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
24ec6ed0
编写于
4月 29, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
4月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add some double/triple grad kernel yaml file (#42361)
* add double yaml * add inline func
上级
2bee99df
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
224 addition
and
67 deletion
+224
-67
paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
...uto_code_generator/final_state_generator/codegen_utils.py
+6
-11
paddle/phi/api/lib/kernel_dispatch.h
paddle/phi/api/lib/kernel_dispatch.h
+10
-2
paddle/phi/kernels/activation_grad_kernel.h
paddle/phi/kernels/activation_grad_kernel.h
+3
-3
paddle/phi/kernels/batch_norm_grad_kernel.h
paddle/phi/kernels/batch_norm_grad_kernel.h
+6
-6
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
+6
-6
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
+1
-1
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
+1
-1
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+6
-6
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
+1
-1
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+3
-3
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+2
-2
paddle/phi/ops/compat/batch_norm_sig.cc
paddle/phi/ops/compat/batch_norm_sig.cc
+7
-7
paddle/phi/ops/compat/elementwise_sig.cc
paddle/phi/ops/compat/elementwise_sig.cc
+1
-1
python/paddle/fluid/tests/unittests/gradient_checker.py
python/paddle/fluid/tests/unittests/gradient_checker.py
+46
-15
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
...n/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+20
-0
python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
.../paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+17
-0
python/paddle/nn/functional/activation.py
python/paddle/nn/functional/activation.py
+4
-1
python/paddle/utils/code_gen/api.yaml
python/paddle/utils/code_gen/api.yaml
+6
-0
python/paddle/utils/code_gen/backward.yaml
python/paddle/utils/code_gen/backward.yaml
+78
-1
未找到文件。
paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
浏览文件 @
24ec6ed0
...
...
@@ -22,17 +22,12 @@ import os
### Global Variables ###
########################
ops_to_fill_zero_for_empty_grads
=
set
([
"split_grad"
,
"rnn_grad"
,
"matmul_double_grad"
,
"matmul_triple_grad"
,
"sigmoid_double_grad"
,
"sigmoid_triple_grad"
,
"add_double_grad"
,
"add_triple_grad"
,
"multiply_double_grad"
,
"multiply_triple_grad"
,
"conv2d_grad_grad"
,
"split_grad"
,
"rnn_grad"
,
"matmul_double_grad"
,
"matmul_triple_grad"
,
"sigmoid_double_grad"
,
"sigmoid_triple_grad"
,
"add_double_grad"
,
"add_triple_grad"
,
"multiply_double_grad"
,
"multiply_triple_grad"
,
"conv2d_grad_grad"
,
"batch_norm_double_grad"
,
"tanh_double_grad"
,
"tanh_triple_grad"
,
"subtract_double_grad"
,
"divide_double_grad"
,
"log_double_grad"
,
"elu_double_grad"
])
# For API dispatch used at python-level
...
...
paddle/phi/api/lib/kernel_dispatch.h
浏览文件 @
24ec6ed0
...
...
@@ -96,8 +96,7 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): add global device guard method to set backend
void
operator
()(
const
Tensor
&
x
)
{
const
phi
::
TensorBase
&
tensor
=
*
x
.
impl
();
inline
void
AssignKernelKeySet
(
const
phi
::
TensorBase
&
tensor
)
{
key_set
.
backend_set
=
key_set
.
backend_set
|
detail
::
GetTensorBackendSet
(
tensor
);
// TODO(chenweihang): select multi layout and dtype
...
...
@@ -110,6 +109,8 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
}
}
void
operator
()(
const
Tensor
&
x
)
{
AssignKernelKeySet
(
*
x
.
impl
());
}
void
operator
()(
const
std
::
vector
<
Tensor
>&
x
)
{
const
phi
::
TensorBase
&
tensor
=
*
x
.
at
(
0
).
impl
();
key_set
.
backend_set
=
...
...
@@ -119,6 +120,13 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
key_set
.
dtype
=
tensor
.
dtype
();
}
void
operator
()(
const
paddle
::
optional
<
const
Tensor
&>
x
)
{
if
(
x
.
get_ptr
()
!=
nullptr
)
{
const
phi
::
TensorBase
&
tensor
=
*
(
x
.
get_ptr
()
->
impl
());
AssignKernelKeySet
(
tensor
);
}
}
// skip other type args, these args don't used in kernel selection
template
<
typename
T
>
void
operator
()(
const
T
&
x
)
{
...
...
paddle/phi/kernels/activation_grad_kernel.h
浏览文件 @
24ec6ed0
...
...
@@ -82,18 +82,18 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
DenseTensor
*
dout_new
,
DenseTensor
*
ddout
);
template
<
typename
T
,
typename
Context
>
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
d
_ddout
,
const
DenseTensor
&
d
dx
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_ddout
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
);
...
...
paddle/phi/kernels/batch_norm_grad_kernel.h
浏览文件 @
24ec6ed0
...
...
@@ -66,16 +66,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout
,
...
...
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
浏览文件 @
24ec6ed0
...
...
@@ -341,16 +341,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout_str
,
...
...
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
浏览文件 @
24ec6ed0
...
...
@@ -38,9 +38,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
DenseTensor
*
ddout
)
{
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
...
...
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
浏览文件 @
24ec6ed0
...
...
@@ -30,9 +30,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
DenseTensor
*
ddout
);
...
...
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
浏览文件 @
24ec6ed0
...
...
@@ -908,16 +908,16 @@ void BatchNormGradKernel(const Context &dev_ctx,
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
epsilon
,
const
std
::
string
&
data_layout_str
,
...
...
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
浏览文件 @
24ec6ed0
...
...
@@ -46,9 +46,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
DenseTensor
*
ddout
)
{
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
...
...
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
24ec6ed0
...
...
@@ -152,8 +152,8 @@ void LeakyReluDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
DenseTensor
*
dout_new
,
DenseTensor
*
ddout
)
{
if
(
dout_new
)
{
...
...
@@ -171,10 +171,10 @@ void TanhDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
d
_ddout
,
const
DenseTensor
&
d
dx
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_ddout
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
)
{
...
...
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
24ec6ed0
...
...
@@ -121,13 +121,13 @@ KernelSignature ReluDoubleGradOpArgumentMapping(
KernelSignature
TanhDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"tanh_double_grad"
,
{
"Out"
,
"D
DX"
,
"DOut
"
},
{},
{
"DOutNew"
,
"DDOut"
});
"tanh_double_grad"
,
{
"Out"
,
"D
Out"
,
"DDX
"
},
{},
{
"DOutNew"
,
"DDOut"
});
}
KernelSignature
TanhTripleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"tanh_triple_grad"
,
{
"Out"
,
"D
DX"
,
"DOut"
,
"D_DDOut"
,
"D_DOut_New
"
},
{
"Out"
,
"D
Out"
,
"DDX"
,
"D_DOut_New"
,
"D_DDOut
"
},
{},
{
"D_OutNew"
,
"D_DOut"
,
"D_DDx"
});
}
...
...
paddle/phi/ops/compat/batch_norm_sig.cc
浏览文件 @
24ec6ed0
...
...
@@ -82,16 +82,16 @@ KernelSignature BatchNormGradOpArgumentMapping(
KernelSignature
BatchNormGradGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"batch_norm_grad_grad"
,
{
"DDX"
,
"DDScale"
,
"DDBias"
,
"DY"
,
"X"
,
{
"X"
,
"Scale"
,
"Mean"
,
"Variance"
,
"SavedMean"
,
"SavedVariance"
,
"Mean"
,
"Variance"
},
"DY"
,
"DDX"
,
"DDScale"
,
"DDBias"
},
{
"momentum"
,
"epsilon"
,
"data_layout"
,
...
...
paddle/phi/ops/compat/elementwise_sig.cc
浏览文件 @
24ec6ed0
...
...
@@ -133,7 +133,7 @@ KernelSignature ElementwiseSubGradOpArgumentMapping(
KernelSignature
ElementwiseSubDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"subtract_double_grad"
,
{
"Y"
,
"D
DX"
,
"DDY"
,
"DOut
"
},
{
"axis"
},
{
"DDOut"
});
"subtract_double_grad"
,
{
"Y"
,
"D
Out"
,
"DDX"
,
"DDY
"
},
{
"axis"
},
{
"DDOut"
});
}
KernelSignature
ElementwiseDivGradOpArgumentMapping
(
...
...
python/paddle/fluid/tests/unittests/gradient_checker.py
浏览文件 @
24ec6ed0
...
...
@@ -560,7 +560,10 @@ def get_static_double_grad(x,
# so, they are also the input of second-order backward.
x
+=
y_grads
x_init
+=
dy_init
y
=
dx
# filter None in dx for DX/DY may be None in kernel
filted_dx
=
[
dxi
for
dxi
in
dx
if
dxi
is
not
None
]
y
=
filted_dx
# check input arguments
x
=
_as_list
(
x
)
...
...
@@ -619,6 +622,7 @@ def get_static_double_grad(x,
def
get_eager_double_grad
(
func
,
x_init
=
None
,
dy_init
=
None
,
place
=
None
,
return_mid_result
=
False
):
"""
Get Double Grad result of dygraph.
...
...
@@ -627,6 +631,7 @@ def get_eager_double_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (bool): A flag that controls the return content.
Returns:
If 'return_mid_result' set True.
...
...
@@ -635,6 +640,10 @@ def get_eager_double_grad(func,
If 'return_mid_result' set False.
A list of numpy array that stores second derivative result calulated by dygraph.
"""
if
isinstance
(
place
,
fluid
.
CPUPlace
):
paddle
.
set_device
(
"cpu"
)
if
isinstance
(
place
,
fluid
.
CUDAPlace
):
paddle
.
set_device
(
"gpu"
)
inputs
=
[]
dys
=
[]
for
x
in
x_init
:
...
...
@@ -648,7 +657,12 @@ def get_eager_double_grad(func,
# calculate first derivative
outputs
=
func
(
inputs
)
d_inputs
=
paddle
.
grad
(
outputs
=
outputs
,
inputs
=
inputs
,
grad_outputs
=
dys
,
create_graph
=
True
)
outputs
=
outputs
,
inputs
=
inputs
,
grad_outputs
=
dys
,
create_graph
=
True
,
allow_unused
=
True
)
d_inputs
=
[
d_input
for
d_input
in
d_inputs
if
d_input
is
not
None
]
# calcluate second derivative
inputs
=
inputs
+
dys
...
...
@@ -663,15 +677,20 @@ def get_eager_double_grad(func,
ddy
=
paddle
.
ones
(
shape
=
d_input
.
shape
,
dtype
=
d_input
.
dtype
)
ddy
.
stop_gradient
=
False
ddys
.
append
(
ddy
)
dd_inputs
=
paddle
.
grad
(
outputs
=
d_inputs
,
inputs
=
inputs
,
grad_outputs
=
ddys
,
create_graph
=
create_graph
)
create_graph
=
create_graph
,
allow_unused
=
True
)
if
return_mid_result
:
return
dd_inputs
,
inputs
+
ddys
else
:
return
[
dd_input
.
numpy
()
for
dd_input
in
dd_inputs
]
return
[
dd_input
.
numpy
()
for
dd_input
in
dd_inputs
if
dd_input
is
not
None
]
def
double_grad_check_for_dygraph
(
func
,
...
...
@@ -693,7 +712,6 @@ def double_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
...
...
@@ -722,19 +740,25 @@ def double_grad_check_for_dygraph(func,
paddle
.
disable_static
()
with
_test_eager_guard
():
eager_double_grad
=
get_eager_double_grad
(
func
,
x_init
,
y_grads_init
)
eager_double_grad
=
get_eager_double_grad
(
func
,
x_init
,
y_grads_init
,
place
)
paddle
.
enable_static
()
static_double_grad
=
get_static_double_grad
(
x
,
y
,
x_init
,
y_grads_init
,
place
)
if
len
(
static_double_grad
)
!=
len
(
eager_double_grad
):
msg
=
"The output grad tensor's number of static graph is different with dygraph, "
\
"please check the python api unit test used."
raise
RuntimeError
(
msg
)
for
i
in
six
.
moves
.
xrange
(
len
(
static_double_grad
)):
if
not
np
.
allclose
(
static_double_grad
[
i
],
eager_double_grad
[
i
],
rtol
,
atol
):
msg
=
'Check eager double result fail. Mismatch between static_graph double grad
%s
'
\
'and eager double grad
%s on %s,
\n
'
\
msg
=
'Check eager double result fail. Mismatch between static_graph double grad '
\
'and eager double grad
on %s, the output double grad tensor
\'
s index is : %d
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
%
(
st
atic_double_grad
[
i
].
name
,
eager_double_grad
[
i
].
name
,
str
(
place
)
,
static_double_grad
[
i
],
eager_double_grad
[
i
])
%
(
st
r
(
place
),
i
,
static_double_grad
[
i
],
eager_double_grad
[
i
])
return
fail_test
(
msg
)
...
...
@@ -794,6 +818,7 @@ def get_static_triple_grad(x,
def
get_eager_triple_grad
(
func
,
x_init
=
None
,
dy_init
=
None
,
place
=
None
,
return_mid_result
=
False
):
"""
Get triple Grad result of dygraph.
...
...
@@ -802,12 +827,13 @@ def get_eager_triple_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (list[Tensor], list[Tensor]): If set True, the
Returns:
A list of numpy array that stores second derivative result calulated by dygraph
"""
dd_y
,
dd_x
=
get_eager_double_grad
(
func
,
x_init
,
dy_init
,
return_mid_result
=
True
)
func
,
x_init
,
dy_init
,
place
,
return_mid_result
=
True
)
# calcluate third derivative
dddys
=
[]
...
...
@@ -839,7 +865,6 @@ def triple_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
...
...
@@ -868,17 +893,23 @@ def triple_grad_check_for_dygraph(func,
paddle
.
disable_static
()
with
_test_eager_guard
():
eager_triple_grad
=
get_eager_triple_grad
(
func
,
x_init
,
y_grads_init
)
eager_triple_grad
=
get_eager_triple_grad
(
func
,
x_init
,
y_grads_init
,
place
)
paddle
.
enable_static
()
static_triple_grad
=
get_static_triple_grad
(
x
,
y
,
x_init
,
y_grads_init
,
place
)
if
len
(
static_triple_grad
)
!=
len
(
eager_triple_grad
):
msg
=
"The output grad tensor's number of static graph is different with dygraph, "
\
"please check the python api unit test used."
raise
RuntimeError
(
msg
)
for
i
in
six
.
moves
.
xrange
(
len
(
static_triple_grad
)):
if
not
np
.
allclose
(
static_triple_grad
[
i
],
eager_triple_grad
[
i
],
rtol
,
atol
):
msg
=
'Check eager double result fail. Mismatch between static_graph double grad
%s
'
\
'and eager double grad
%s on %s,
\n
'
\
msg
=
'Check eager double result fail. Mismatch between static_graph double grad '
\
'and eager double grad
on %s, the output double grad tensor
\'
s index is : %d
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
%
(
st
atic_triple_grad
[
i
].
name
,
eager_triple_grad
[
i
].
name
,
str
(
place
)
,
static_triple_grad
[
i
],
eager_triple_grad
[
i
])
%
(
st
r
(
place
),
i
,
static_triple_grad
[
i
],
eager_triple_grad
[
i
])
return
fail_test
(
msg
)
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
浏览文件 @
24ec6ed0
...
...
@@ -52,6 +52,9 @@ class TestSigmoidTripleGradCheck(unittest.TestCase):
class
TestSigmoidDoubleGradCheck
(
unittest
.
TestCase
):
def
sigmoid_wrapper
(
self
,
x
):
return
fluid
.
layers
.
sigmoid
(
x
[
0
])
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
...
...
@@ -64,6 +67,8 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
sigmoid_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -75,6 +80,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
class
TestTanhTripleGradCheck
(
unittest
.
TestCase
):
def
tanh_wrapper
(
self
,
x
):
return
paddle
.
tanh
(
x
[
0
])
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
...
...
@@ -87,6 +95,8 @@ class TestTanhTripleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
triple_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
triple_grad_check_for_dygraph
(
self
.
tanh_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -98,6 +108,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
class
TestTanhDoubleGradCheck
(
unittest
.
TestCase
):
def
tanh_wrapper
(
self
,
x
):
return
paddle
.
tanh
(
x
[
0
])
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
...
...
@@ -110,6 +123,8 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
tanh_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -173,6 +188,9 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
class
TestELUDoubleGradCheck
(
unittest
.
TestCase
):
def
elu_wrapper
(
self
,
x
):
return
paddle
.
nn
.
functional
.
elu
(
x
[
0
],
alpha
=
0.2
)
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
4
,
4
,
4
]
...
...
@@ -189,6 +207,8 @@ class TestELUDoubleGradCheck(unittest.TestCase):
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
elu_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
浏览文件 @
24ec6ed0
...
...
@@ -139,6 +139,9 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase):
class
TestElementwiseSubDoubleGradCheck
(
unittest
.
TestCase
):
def
subtract_wrapper
(
self
,
x
):
return
paddle
.
subtract
(
x
[
0
],
x
[
1
])
@
prog_scope
()
def
func
(
self
,
place
):
# the shape of input variable should be clearly specified, not inlcude -1.
...
...
@@ -156,6 +159,11 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
gradient_checker
.
double_grad_check
(
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
subtract_wrapper
,
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
@@ -195,6 +203,9 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase):
class
TestElementwiseDivDoubleGradCheck
(
unittest
.
TestCase
):
def
divide_wrapper
(
self
,
x
):
return
paddle
.
divide
(
x
[
0
],
x
[
1
])
@
prog_scope
()
def
func
(
self
,
place
):
# the shape of input variable should be clearly specified, not inlcude -1.
...
...
@@ -213,6 +224,12 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
gradient_checker
.
double_grad_check
(
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
,
atol
=
1e-3
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
divide_wrapper
,
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
atol
=
1e-3
)
def
test_grad
(
self
):
paddle
.
enable_static
()
...
...
python/paddle/nn/functional/activation.py
浏览文件 @
24ec6ed0
...
...
@@ -112,7 +112,10 @@ def elu(x, alpha=1.0, name=None):
# [ 1. 15.6 ]]
"""
if
in_dynamic_mode
():
if
in_dygraph_mode
():
return
_C_ops
.
final_state_elu
(
x
,
alpha
)
if
_in_legacy_dygraph
():
return
_C_ops
.
elu
(
x
,
'alpha'
,
alpha
)
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'elu'
)
...
...
python/paddle/utils/code_gen/api.yaml
浏览文件 @
24ec6ed0
...
...
@@ -466,6 +466,7 @@
func
:
DeformableConvInferMeta
kernel
:
func
:
deformable_conv
data_type
:
x
optional
:
mask
backward
:
deformable_conv_grad
...
...
@@ -546,6 +547,7 @@
func
:
DropoutInferMeta
kernel
:
func
:
dropout
data_type
:
x
optional
:
seed_tensor
backward
:
dropout_grad
...
...
@@ -1065,6 +1067,7 @@
func
:
LayerNormInferMeta
kernel
:
func
:
layer_norm
data_type
:
x
backward
:
layer_norm_grad
optional
:
scale, bias
...
...
@@ -1608,6 +1611,7 @@
func
:
PsroiPoolInferMeta
kernel
:
func
:
psroi_pool
data_type
:
x
optional
:
boxes_num
backward
:
psroi_pool_grad
...
...
@@ -1713,6 +1717,7 @@
func
:
RoiAlignInferMeta
kernel
:
func
:
roi_align
data_type
:
x
optional
:
boxes_num
backward
:
roi_align_grad
...
...
@@ -1723,6 +1728,7 @@
func
:
RoiPoolInferMeta
kernel
:
func
:
roi_pool
data_type
:
x
optional
:
boxes_num
intermediate
:
arg_max
backward
:
roi_pool_grad
...
...
python/paddle/utils/code_gen/backward.yaml
浏览文件 @
24ec6ed0
...
...
@@ -152,6 +152,18 @@
kernel
:
func
:
atanh_grad
-
backward_api
:
batch_norm_double_grad
forward
:
batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias)
args
:
(Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output
:
Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
x
,
scale
,
x
]
kernel
:
func
:
batch_norm_grad_grad
data_type
:
x
optional
:
out_mean, out_variance
-
backward_api
:
batch_norm_grad
forward
:
batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
...
...
@@ -163,6 +175,7 @@
func
:
batch_norm_grad
data_type
:
out_grad
optional
:
mean_out, variance_out, reserve_space
backward
:
batch_norm_double_grad
-
backward_api
:
bce_loss_grad
forward
:
bce_loss (Tensor input, Tensor label) -> Tensor(out)
...
...
@@ -362,6 +375,7 @@
func
:
DeformableConvGradInferMeta
kernel
:
func
:
deformable_conv_grad
data_type
:
x
optional
:
mask
-
backward_api
:
depthwise_conv2d_transpose_grad
...
...
@@ -414,6 +428,18 @@
kernel
:
func
:
dist_grad
-
backward_api
:
divide_double_grad
forward
:
divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args
:
(Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output
:
Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
y
,
grad_x
,
grad_x
]
kernel
:
func
:
divide_double_grad
data_type
:
out
optional
:
grad_x_grad, grad_y_grad
-
backward_api
:
divide_grad
forward
:
divide (Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1)
...
...
@@ -423,6 +449,7 @@
param
:
[
x
,
y
]
kernel
:
func
:
divide_grad
backward
:
divide_double_grad
-
backward_api
:
dropout_grad
forward
:
dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
...
...
@@ -455,6 +482,16 @@
kernel
:
func
:
elementwise_pow_grad
-
backward_api
:
elu_double_grad
forward
:
elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args
:
(Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output
:
Tensor(x_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralBinaryGradInferMeta
param
:
[
x
,
x
]
kernel
:
func
:
elu_double_grad
-
backward_api
:
elu_grad
forward
:
elu (Tensor x, float alpha) -> Tensor(out)
args
:
(Tensor x, Tensor out, Tensor out_grad, float alpha)
...
...
@@ -464,6 +501,7 @@
param
:
[
x
]
kernel
:
func
:
elu_grad
backward
:
elu_double_grad
-
backward_api
:
erf_grad
forward
:
erf (Tensor x) -> Tensor(out)
...
...
@@ -633,6 +671,7 @@
param
:
[
x
]
kernel
:
func
:
graph_send_recv_grad
data_type
:
out_grad
optional
:
out, dst_count
-
backward_api
:
gumbel_softmax_grad
...
...
@@ -1287,6 +1326,7 @@
param
:
[
x
]
kernel
:
func
:
psroi_pool_grad
data_type
:
x
optional
:
boxes_num
# output is optional
...
...
@@ -1381,6 +1421,7 @@
param
:
[
x
]
kernel
:
func
:
roi_align_grad
data_type
:
boxes
optional
:
boxes_num
-
backward_api
:
roi_pool_grad
...
...
@@ -1392,6 +1433,7 @@
param
:
[
x
]
kernel
:
func
:
roi_pool_grad
data_type
:
x
optional
:
boxes_num
-
backward_api
:
roll_grad
...
...
@@ -1498,7 +1540,7 @@
func
:
UnchangedInferMeta
param
:
[
x
]
kernel
:
func
:
sigmoid_cross_entropy_with_logits_grad
func
:
sigmoid_cross_entropy_with_logits_grad
-
backward_api
:
sigmoid_double_grad
forward
:
sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
...
...
@@ -1654,6 +1696,18 @@
func
:
strided_slice_grad
no_need_buffer
:
x
-
backward_api
:
subtract_double_grad
forward
:
subtract_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args
:
(Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output
:
Tensor(grad_out_grad)
infer_meta
:
func
:
UnchangedInferMeta
param
:
[
grad_out
]
kernel
:
func
:
subtract_double_grad
optional
:
grad_x_grad, grad_y_grad
no_need_buffer
:
y, grad_out
-
backward_api
:
subtract_grad
forward
:
subtract (Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out_grad, int axis = -1)
...
...
@@ -1664,6 +1718,7 @@
kernel
:
func
:
subtract_grad
no_need_buffer
:
x, y
backward
:
subtract_double_grad
-
backward_api
:
sum_double_grad
forward
:
sum_grad (Tensor x, Tensor grad_out, int64_t[] dims, bool keep_dim, bool reduce_all=false) -> Tensor(grad_x)
...
...
@@ -1720,6 +1775,17 @@
kernel
:
func
:
tan_grad
-
backward_api
:
tanh_double_grad
forward
:
tanh_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args
:
(Tensor out, Tensor grad_out, Tensor grad_x_grad)
output
:
Tensor(out_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralBinaryGradInferMeta
param
:
[
out
,
out
]
kernel
:
func
:
tanh_double_grad
backward
:
tanh_triple_grad
-
backward_api
:
tanh_grad
forward
:
tanh (Tensor x) -> Tensor(out)
args
:
(Tensor out, Tensor out_grad)
...
...
@@ -1729,6 +1795,7 @@
param
:
[
out
]
kernel
:
func
:
tanh_grad
backward
:
tanh_double_grad
-
backward_api
:
tanh_shrink_grad
forward
:
tanh_shrink (Tensor x) -> Tensor(out)
...
...
@@ -1740,6 +1807,16 @@
kernel
:
func
:
tanh_shrink_grad
-
backward_api
:
tanh_triple_grad
forward
:
tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad)
args
:
(Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad)
output
:
Tensor(out_grad), Tensor(grad_out_forward_grad), Tensor(grad_x_grad_forward_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
out
,
out
,
grad_x_grad_forward
]
kernel
:
func
:
tanh_triple_grad
-
backward_api
:
thresholded_relu_grad
forward
:
thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args
:
(Tensor x, Tensor out_grad, float threshold)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录