Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
24ec6ed0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
接近 2 年 前同步成功
通知
2323
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
24ec6ed0
编写于
4月 29, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
4月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add some double/triple grad kernel yaml file (#42361)
* add double yaml * add inline func
上级
2bee99df
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
224 addition
and
67 deletion
+224
-67
paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
...uto_code_generator/final_state_generator/codegen_utils.py
+6
-11
paddle/phi/api/lib/kernel_dispatch.h
paddle/phi/api/lib/kernel_dispatch.h
+10
-2
paddle/phi/kernels/activation_grad_kernel.h
paddle/phi/kernels/activation_grad_kernel.h
+3
-3
paddle/phi/kernels/batch_norm_grad_kernel.h
paddle/phi/kernels/batch_norm_grad_kernel.h
+6
-6
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
+6
-6
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
+1
-1
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
+1
-1
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+6
-6
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
+1
-1
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+3
-3
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+2
-2
paddle/phi/ops/compat/batch_norm_sig.cc
paddle/phi/ops/compat/batch_norm_sig.cc
+7
-7
paddle/phi/ops/compat/elementwise_sig.cc
paddle/phi/ops/compat/elementwise_sig.cc
+1
-1
python/paddle/fluid/tests/unittests/gradient_checker.py
python/paddle/fluid/tests/unittests/gradient_checker.py
+46
-15
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
...n/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+20
-0
python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
.../paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+17
-0
python/paddle/nn/functional/activation.py
python/paddle/nn/functional/activation.py
+4
-1
python/paddle/utils/code_gen/api.yaml
python/paddle/utils/code_gen/api.yaml
+6
-0
python/paddle/utils/code_gen/backward.yaml
python/paddle/utils/code_gen/backward.yaml
+78
-1
未找到文件。
paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
浏览文件 @
24ec6ed0
...
@@ -22,17 +22,12 @@ import os
...
@@ -22,17 +22,12 @@ import os
### Global Variables ###
### Global Variables ###
########################
########################
ops_to_fill_zero_for_empty_grads
=
set
([
ops_to_fill_zero_for_empty_grads
=
set
([
"split_grad"
,
"split_grad"
,
"rnn_grad"
,
"matmul_double_grad"
,
"matmul_triple_grad"
,
"rnn_grad"
,
"sigmoid_double_grad"
,
"sigmoid_triple_grad"
,
"add_double_grad"
,
"matmul_double_grad"
,
"add_triple_grad"
,
"multiply_double_grad"
,
"multiply_triple_grad"
,
"matmul_triple_grad"
,
"conv2d_grad_grad"
,
"batch_norm_double_grad"
,
"tanh_double_grad"
,
"sigmoid_double_grad"
,
"tanh_triple_grad"
,
"subtract_double_grad"
,
"divide_double_grad"
,
"sigmoid_triple_grad"
,
"log_double_grad"
,
"elu_double_grad"
"add_double_grad"
,
"add_triple_grad"
,
"multiply_double_grad"
,
"multiply_triple_grad"
,
"conv2d_grad_grad"
,
])
])
# For API dispatch used at python-level
# For API dispatch used at python-level
...
...
paddle/phi/api/lib/kernel_dispatch.h
浏览文件 @
24ec6ed0
...
@@ -96,8 +96,7 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
...
@@ -96,8 +96,7 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): add global device guard method to set backend
// TODO(chenweihang): add global device guard method to set backend
void
operator
()(
const
Tensor
&
x
)
{
inline
void
AssignKernelKeySet
(
const
phi
::
TensorBase
&
tensor
)
{
const
phi
::
TensorBase
&
tensor
=
*
x
.
impl
();
key_set
.
backend_set
=
key_set
.
backend_set
=
key_set
.
backend_set
|
detail
::
GetTensorBackendSet
(
tensor
);
key_set
.
backend_set
|
detail
::
GetTensorBackendSet
(
tensor
);
// TODO(chenweihang): select multi layout and dtype
// TODO(chenweihang): select multi layout and dtype
...
@@ -110,6 +109,8 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
...
@@ -110,6 +109,8 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
}
}
}
}
void
operator
()(
const
Tensor
&
x
)
{
AssignKernelKeySet
(
*
x
.
impl
());
}
void
operator
()(
const
std
::
vector
<
Tensor
>&
x
)
{
void
operator
()(
const
std
::
vector
<
Tensor
>&
x
)
{
const
phi
::
TensorBase
&
tensor
=
*
x
.
at
(
0
).
impl
();
const
phi
::
TensorBase
&
tensor
=
*
x
.
at
(
0
).
impl
();
key_set
.
backend_set
=
key_set
.
backend_set
=
...
@@ -119,6 +120,13 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
...
@@ -119,6 +120,13 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
key_set
.
dtype
=
tensor
.
dtype
();
key_set
.
dtype
=
tensor
.
dtype
();
}
}
void
operator
()(
const
paddle
::
optional
<
const
Tensor
&>
x
)
{
if
(
x
.
get_ptr
()
!=
nullptr
)
{
const
phi
::
TensorBase
&
tensor
=
*
(
x
.
get_ptr
()
->
impl
());
AssignKernelKeySet
(
tensor
);
}
}
// skip other type args, these args don't used in kernel selection
// skip other type args, these args don't used in kernel selection
template
<
typename
T
>
template
<
typename
T
>
void
operator
()(
const
T
&
x
)
{
void
operator
()(
const
T
&
x
)
{
...
...
paddle/phi/kernels/activation_grad_kernel.h
浏览文件 @
24ec6ed0
...
@@ -82,18 +82,18 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
...
@@ -82,18 +82,18 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
DenseTensor
*
dout_new
,
DenseTensor
*
dout_new
,
DenseTensor
*
ddout
);
DenseTensor
*
ddout
);
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
d
_ddout
,
const
DenseTensor
&
d
dx
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_ddout
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
);
DenseTensor
*
d_ddx
);
...
...
paddle/phi/kernels/batch_norm_grad_kernel.h
浏览文件 @
24ec6ed0
...
@@ -66,16 +66,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
...
@@ -66,16 +66,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
BatchNormDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
momentum
,
float
epsilon
,
float
epsilon
,
const
std
::
string
&
data_layout
,
const
std
::
string
&
data_layout
,
...
...
paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
浏览文件 @
24ec6ed0
...
@@ -341,16 +341,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
...
@@ -341,16 +341,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
momentum
,
float
epsilon
,
float
epsilon
,
const
std
::
string
&
data_layout_str
,
const
std
::
string
&
data_layout_str
,
...
...
paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc
浏览文件 @
24ec6ed0
...
@@ -38,9 +38,9 @@ void SubtractGradKernel(const Context& dev_ctx,
...
@@ -38,9 +38,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
int
axis
,
DenseTensor
*
ddout
)
{
DenseTensor
*
ddout
)
{
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
...
...
paddle/phi/kernels/elementwise_subtract_grad_kernel.h
浏览文件 @
24ec6ed0
...
@@ -30,9 +30,9 @@ void SubtractGradKernel(const Context& dev_ctx,
...
@@ -30,9 +30,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
int
axis
,
DenseTensor
*
ddout
);
DenseTensor
*
ddout
);
...
...
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
浏览文件 @
24ec6ed0
...
@@ -908,16 +908,16 @@ void BatchNormGradKernel(const Context &dev_ctx,
...
@@ -908,16 +908,16 @@ void BatchNormGradKernel(const Context &dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
void
BatchNormDoubleGradKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x
,
const
DenseTensor
&
x
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
scale
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
mean
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
paddle
::
optional
<
const
DenseTensor
&>
variance
,
const
DenseTensor
&
saved_mean
,
const
DenseTensor
&
saved_variance
,
const
DenseTensor
&
y_grad
,
const
DenseTensor
&
x_grad_grad
,
const
DenseTensor
&
scale_grad_grad
,
const
DenseTensor
&
bias_grad_grad
,
float
momentum
,
float
momentum
,
float
epsilon
,
float
epsilon
,
const
std
::
string
&
data_layout_str
,
const
std
::
string
&
data_layout_str
,
...
...
paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu
浏览文件 @
24ec6ed0
...
@@ -46,9 +46,9 @@ void SubtractGradKernel(const Context& dev_ctx,
...
@@ -46,9 +46,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
SubtractDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
y
,
const
DenseTensor
&
y
,
const
DenseTensor
&
dout
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddx
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
paddle
::
optional
<
const
DenseTensor
&>
ddy
,
const
DenseTensor
&
dout
,
int
axis
,
int
axis
,
DenseTensor
*
ddout
)
{
DenseTensor
*
ddout
)
{
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
phi
::
SubtractDoubleGradImpl
<
T
>
(
dev_ctx
,
y
,
ddx
,
ddy
,
dout
,
axis
,
ddout
);
...
...
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
24ec6ed0
...
@@ -152,8 +152,8 @@ void LeakyReluDoubleGradKernel(const Context& dev_ctx,
...
@@ -152,8 +152,8 @@ void LeakyReluDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
void
TanhDoubleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
ddx
,
DenseTensor
*
dout_new
,
DenseTensor
*
dout_new
,
DenseTensor
*
ddout
)
{
DenseTensor
*
ddout
)
{
if
(
dout_new
)
{
if
(
dout_new
)
{
...
@@ -171,10 +171,10 @@ void TanhDoubleGradKernel(const Context& dev_ctx,
...
@@ -171,10 +171,10 @@ void TanhDoubleGradKernel(const Context& dev_ctx,
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
void
TanhTripleGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
out
,
const
DenseTensor
&
ddx
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
d
_ddout
,
const
DenseTensor
&
d
dx
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_dout_new
,
const
DenseTensor
&
d_ddout
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_out_new
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_dout
,
DenseTensor
*
d_ddx
)
{
DenseTensor
*
d_ddx
)
{
...
...
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
24ec6ed0
...
@@ -121,13 +121,13 @@ KernelSignature ReluDoubleGradOpArgumentMapping(
...
@@ -121,13 +121,13 @@ KernelSignature ReluDoubleGradOpArgumentMapping(
KernelSignature
TanhDoubleGradOpArgumentMapping
(
KernelSignature
TanhDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
return
KernelSignature
(
"tanh_double_grad"
,
{
"Out"
,
"D
DX"
,
"DOut
"
},
{},
{
"DOutNew"
,
"DDOut"
});
"tanh_double_grad"
,
{
"Out"
,
"D
Out"
,
"DDX
"
},
{},
{
"DOutNew"
,
"DDOut"
});
}
}
KernelSignature
TanhTripleGradOpArgumentMapping
(
KernelSignature
TanhTripleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"tanh_triple_grad"
,
return
KernelSignature
(
"tanh_triple_grad"
,
{
"Out"
,
"D
DX"
,
"DOut"
,
"D_DDOut"
,
"D_DOut_New
"
},
{
"Out"
,
"D
Out"
,
"DDX"
,
"D_DOut_New"
,
"D_DDOut
"
},
{},
{},
{
"D_OutNew"
,
"D_DOut"
,
"D_DDx"
});
{
"D_OutNew"
,
"D_DOut"
,
"D_DDx"
});
}
}
...
...
paddle/phi/ops/compat/batch_norm_sig.cc
浏览文件 @
24ec6ed0
...
@@ -82,16 +82,16 @@ KernelSignature BatchNormGradOpArgumentMapping(
...
@@ -82,16 +82,16 @@ KernelSignature BatchNormGradOpArgumentMapping(
KernelSignature
BatchNormGradGradOpArgumentMapping
(
KernelSignature
BatchNormGradGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"batch_norm_grad_grad"
,
return
KernelSignature
(
"batch_norm_grad_grad"
,
{
"DDX"
,
{
"X"
,
"DDScale"
,
"DDBias"
,
"DY"
,
"X"
,
"Scale"
,
"Scale"
,
"Mean"
,
"Variance"
,
"SavedMean"
,
"SavedMean"
,
"SavedVariance"
,
"SavedVariance"
,
"Mean"
,
"DY"
,
"Variance"
},
"DDX"
,
"DDScale"
,
"DDBias"
},
{
"momentum"
,
{
"momentum"
,
"epsilon"
,
"epsilon"
,
"data_layout"
,
"data_layout"
,
...
...
paddle/phi/ops/compat/elementwise_sig.cc
浏览文件 @
24ec6ed0
...
@@ -133,7 +133,7 @@ KernelSignature ElementwiseSubGradOpArgumentMapping(
...
@@ -133,7 +133,7 @@ KernelSignature ElementwiseSubGradOpArgumentMapping(
KernelSignature
ElementwiseSubDoubleGradOpArgumentMapping
(
KernelSignature
ElementwiseSubDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
return
KernelSignature
(
"subtract_double_grad"
,
{
"Y"
,
"D
DX"
,
"DDY"
,
"DOut
"
},
{
"axis"
},
{
"DDOut"
});
"subtract_double_grad"
,
{
"Y"
,
"D
Out"
,
"DDX"
,
"DDY
"
},
{
"axis"
},
{
"DDOut"
});
}
}
KernelSignature
ElementwiseDivGradOpArgumentMapping
(
KernelSignature
ElementwiseDivGradOpArgumentMapping
(
...
...
python/paddle/fluid/tests/unittests/gradient_checker.py
浏览文件 @
24ec6ed0
...
@@ -560,7 +560,10 @@ def get_static_double_grad(x,
...
@@ -560,7 +560,10 @@ def get_static_double_grad(x,
# so, they are also the input of second-order backward.
# so, they are also the input of second-order backward.
x
+=
y_grads
x
+=
y_grads
x_init
+=
dy_init
x_init
+=
dy_init
y
=
dx
# filter None in dx for DX/DY may be None in kernel
filted_dx
=
[
dxi
for
dxi
in
dx
if
dxi
is
not
None
]
y
=
filted_dx
# check input arguments
# check input arguments
x
=
_as_list
(
x
)
x
=
_as_list
(
x
)
...
@@ -619,6 +622,7 @@ def get_static_double_grad(x,
...
@@ -619,6 +622,7 @@ def get_static_double_grad(x,
def
get_eager_double_grad
(
func
,
def
get_eager_double_grad
(
func
,
x_init
=
None
,
x_init
=
None
,
dy_init
=
None
,
dy_init
=
None
,
place
=
None
,
return_mid_result
=
False
):
return_mid_result
=
False
):
"""
"""
Get Double Grad result of dygraph.
Get Double Grad result of dygraph.
...
@@ -627,6 +631,7 @@ def get_eager_double_grad(func,
...
@@ -627,6 +631,7 @@ def get_eager_double_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (bool): A flag that controls the return content.
return_mid_result (bool): A flag that controls the return content.
Returns:
Returns:
If 'return_mid_result' set True.
If 'return_mid_result' set True.
...
@@ -635,6 +640,10 @@ def get_eager_double_grad(func,
...
@@ -635,6 +640,10 @@ def get_eager_double_grad(func,
If 'return_mid_result' set False.
If 'return_mid_result' set False.
A list of numpy array that stores second derivative result calulated by dygraph.
A list of numpy array that stores second derivative result calulated by dygraph.
"""
"""
if
isinstance
(
place
,
fluid
.
CPUPlace
):
paddle
.
set_device
(
"cpu"
)
if
isinstance
(
place
,
fluid
.
CUDAPlace
):
paddle
.
set_device
(
"gpu"
)
inputs
=
[]
inputs
=
[]
dys
=
[]
dys
=
[]
for
x
in
x_init
:
for
x
in
x_init
:
...
@@ -648,7 +657,12 @@ def get_eager_double_grad(func,
...
@@ -648,7 +657,12 @@ def get_eager_double_grad(func,
# calculate first derivative
# calculate first derivative
outputs
=
func
(
inputs
)
outputs
=
func
(
inputs
)
d_inputs
=
paddle
.
grad
(
d_inputs
=
paddle
.
grad
(
outputs
=
outputs
,
inputs
=
inputs
,
grad_outputs
=
dys
,
create_graph
=
True
)
outputs
=
outputs
,
inputs
=
inputs
,
grad_outputs
=
dys
,
create_graph
=
True
,
allow_unused
=
True
)
d_inputs
=
[
d_input
for
d_input
in
d_inputs
if
d_input
is
not
None
]
# calcluate second derivative
# calcluate second derivative
inputs
=
inputs
+
dys
inputs
=
inputs
+
dys
...
@@ -663,15 +677,20 @@ def get_eager_double_grad(func,
...
@@ -663,15 +677,20 @@ def get_eager_double_grad(func,
ddy
=
paddle
.
ones
(
shape
=
d_input
.
shape
,
dtype
=
d_input
.
dtype
)
ddy
=
paddle
.
ones
(
shape
=
d_input
.
shape
,
dtype
=
d_input
.
dtype
)
ddy
.
stop_gradient
=
False
ddy
.
stop_gradient
=
False
ddys
.
append
(
ddy
)
ddys
.
append
(
ddy
)
dd_inputs
=
paddle
.
grad
(
dd_inputs
=
paddle
.
grad
(
outputs
=
d_inputs
,
outputs
=
d_inputs
,
inputs
=
inputs
,
inputs
=
inputs
,
grad_outputs
=
ddys
,
grad_outputs
=
ddys
,
create_graph
=
create_graph
)
create_graph
=
create_graph
,
allow_unused
=
True
)
if
return_mid_result
:
if
return_mid_result
:
return
dd_inputs
,
inputs
+
ddys
return
dd_inputs
,
inputs
+
ddys
else
:
else
:
return
[
dd_input
.
numpy
()
for
dd_input
in
dd_inputs
]
return
[
dd_input
.
numpy
()
for
dd_input
in
dd_inputs
if
dd_input
is
not
None
]
def
double_grad_check_for_dygraph
(
func
,
def
double_grad_check_for_dygraph
(
func
,
...
@@ -693,7 +712,6 @@ def double_grad_check_for_dygraph(func,
...
@@ -693,7 +712,6 @@ def double_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
raise_exception (bool): whether to raise an exception if
...
@@ -722,19 +740,25 @@ def double_grad_check_for_dygraph(func,
...
@@ -722,19 +740,25 @@ def double_grad_check_for_dygraph(func,
paddle
.
disable_static
()
paddle
.
disable_static
()
with
_test_eager_guard
():
with
_test_eager_guard
():
eager_double_grad
=
get_eager_double_grad
(
func
,
x_init
,
y_grads_init
)
eager_double_grad
=
get_eager_double_grad
(
func
,
x_init
,
y_grads_init
,
place
)
paddle
.
enable_static
()
paddle
.
enable_static
()
static_double_grad
=
get_static_double_grad
(
x
,
y
,
x_init
,
y_grads_init
,
static_double_grad
=
get_static_double_grad
(
x
,
y
,
x_init
,
y_grads_init
,
place
)
place
)
if
len
(
static_double_grad
)
!=
len
(
eager_double_grad
):
msg
=
"The output grad tensor's number of static graph is different with dygraph, "
\
"please check the python api unit test used."
raise
RuntimeError
(
msg
)
for
i
in
six
.
moves
.
xrange
(
len
(
static_double_grad
)):
for
i
in
six
.
moves
.
xrange
(
len
(
static_double_grad
)):
if
not
np
.
allclose
(
static_double_grad
[
i
],
eager_double_grad
[
i
],
rtol
,
if
not
np
.
allclose
(
static_double_grad
[
i
],
eager_double_grad
[
i
],
rtol
,
atol
):
atol
):
msg
=
'Check eager double result fail. Mismatch between static_graph double grad
%s
'
\
msg
=
'Check eager double result fail. Mismatch between static_graph double grad '
\
'and eager double grad
%s on %s,
\n
'
\
'and eager double grad
on %s, the output double grad tensor
\'
s index is : %d
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
%
(
st
atic_double_grad
[
i
].
name
,
eager_double_grad
[
i
].
name
,
str
(
place
)
,
static_double_grad
[
i
],
eager_double_grad
[
i
])
%
(
st
r
(
place
),
i
,
static_double_grad
[
i
],
eager_double_grad
[
i
])
return
fail_test
(
msg
)
return
fail_test
(
msg
)
...
@@ -794,6 +818,7 @@ def get_static_triple_grad(x,
...
@@ -794,6 +818,7 @@ def get_static_triple_grad(x,
def
get_eager_triple_grad
(
func
,
def
get_eager_triple_grad
(
func
,
x_init
=
None
,
x_init
=
None
,
dy_init
=
None
,
dy_init
=
None
,
place
=
None
,
return_mid_result
=
False
):
return_mid_result
=
False
):
"""
"""
Get triple Grad result of dygraph.
Get triple Grad result of dygraph.
...
@@ -802,12 +827,13 @@ def get_eager_triple_grad(func,
...
@@ -802,12 +827,13 @@ def get_eager_triple_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (list[Tensor], list[Tensor]): If set True, the
return_mid_result (list[Tensor], list[Tensor]): If set True, the
Returns:
Returns:
A list of numpy array that stores second derivative result calulated by dygraph
A list of numpy array that stores second derivative result calulated by dygraph
"""
"""
dd_y
,
dd_x
=
get_eager_double_grad
(
dd_y
,
dd_x
=
get_eager_double_grad
(
func
,
x_init
,
dy_init
,
return_mid_result
=
True
)
func
,
x_init
,
dy_init
,
place
,
return_mid_result
=
True
)
# calcluate third derivative
# calcluate third derivative
dddys
=
[]
dddys
=
[]
...
@@ -839,7 +865,6 @@ def triple_grad_check_for_dygraph(func,
...
@@ -839,7 +865,6 @@ def triple_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
raise_exception (bool): whether to raise an exception if
...
@@ -868,17 +893,23 @@ def triple_grad_check_for_dygraph(func,
...
@@ -868,17 +893,23 @@ def triple_grad_check_for_dygraph(func,
paddle
.
disable_static
()
paddle
.
disable_static
()
with
_test_eager_guard
():
with
_test_eager_guard
():
eager_triple_grad
=
get_eager_triple_grad
(
func
,
x_init
,
y_grads_init
)
eager_triple_grad
=
get_eager_triple_grad
(
func
,
x_init
,
y_grads_init
,
place
)
paddle
.
enable_static
()
paddle
.
enable_static
()
static_triple_grad
=
get_static_triple_grad
(
x
,
y
,
x_init
,
y_grads_init
,
static_triple_grad
=
get_static_triple_grad
(
x
,
y
,
x_init
,
y_grads_init
,
place
)
place
)
if
len
(
static_triple_grad
)
!=
len
(
eager_triple_grad
):
msg
=
"The output grad tensor's number of static graph is different with dygraph, "
\
"please check the python api unit test used."
raise
RuntimeError
(
msg
)
for
i
in
six
.
moves
.
xrange
(
len
(
static_triple_grad
)):
for
i
in
six
.
moves
.
xrange
(
len
(
static_triple_grad
)):
if
not
np
.
allclose
(
static_triple_grad
[
i
],
eager_triple_grad
[
i
],
rtol
,
if
not
np
.
allclose
(
static_triple_grad
[
i
],
eager_triple_grad
[
i
],
rtol
,
atol
):
atol
):
msg
=
'Check eager double result fail. Mismatch between static_graph double grad
%s
'
\
msg
=
'Check eager double result fail. Mismatch between static_graph double grad '
\
'and eager double grad
%s on %s,
\n
'
\
'and eager double grad
on %s, the output double grad tensor
\'
s index is : %d
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
'static:%s
\n
eager:%s
\n
'
\
%
(
st
atic_triple_grad
[
i
].
name
,
eager_triple_grad
[
i
].
name
,
str
(
place
)
,
static_triple_grad
[
i
],
eager_triple_grad
[
i
])
%
(
st
r
(
place
),
i
,
static_triple_grad
[
i
],
eager_triple_grad
[
i
])
return
fail_test
(
msg
)
return
fail_test
(
msg
)
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
浏览文件 @
24ec6ed0
...
@@ -52,6 +52,9 @@ class TestSigmoidTripleGradCheck(unittest.TestCase):
...
@@ -52,6 +52,9 @@ class TestSigmoidTripleGradCheck(unittest.TestCase):
class
TestSigmoidDoubleGradCheck
(
unittest
.
TestCase
):
class
TestSigmoidDoubleGradCheck
(
unittest
.
TestCase
):
def
sigmoid_wrapper
(
self
,
x
):
return
fluid
.
layers
.
sigmoid
(
x
[
0
])
@
prog_scope
()
@
prog_scope
()
def
func
(
self
,
place
):
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
shape
=
[
2
,
3
,
7
,
9
]
...
@@ -64,6 +67,8 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
...
@@ -64,6 +67,8 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
double_grad_check
(
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
sigmoid_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
def
test_grad
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -75,6 +80,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
...
@@ -75,6 +80,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
class
TestTanhTripleGradCheck
(
unittest
.
TestCase
):
class
TestTanhTripleGradCheck
(
unittest
.
TestCase
):
def
tanh_wrapper
(
self
,
x
):
return
paddle
.
tanh
(
x
[
0
])
@
prog_scope
()
@
prog_scope
()
def
func
(
self
,
place
):
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
shape
=
[
2
,
3
,
7
,
9
]
...
@@ -87,6 +95,8 @@ class TestTanhTripleGradCheck(unittest.TestCase):
...
@@ -87,6 +95,8 @@ class TestTanhTripleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
triple_grad_check
(
gradient_checker
.
triple_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
triple_grad_check_for_dygraph
(
self
.
tanh_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
def
test_grad
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -98,6 +108,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
...
@@ -98,6 +108,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
class
TestTanhDoubleGradCheck
(
unittest
.
TestCase
):
class
TestTanhDoubleGradCheck
(
unittest
.
TestCase
):
def
tanh_wrapper
(
self
,
x
):
return
paddle
.
tanh
(
x
[
0
])
@
prog_scope
()
@
prog_scope
()
def
func
(
self
,
place
):
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
shape
=
[
2
,
3
,
7
,
9
]
...
@@ -110,6 +123,8 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
...
@@ -110,6 +123,8 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
x_arr
[
np
.
abs
(
x_arr
)
<
0.005
]
=
0.002
gradient_checker
.
double_grad_check
(
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
tanh_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
def
test_grad
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -173,6 +188,9 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
...
@@ -173,6 +188,9 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
class
TestELUDoubleGradCheck
(
unittest
.
TestCase
):
class
TestELUDoubleGradCheck
(
unittest
.
TestCase
):
def
elu_wrapper
(
self
,
x
):
return
paddle
.
nn
.
functional
.
elu
(
x
[
0
],
alpha
=
0.2
)
@
prog_scope
()
@
prog_scope
()
def
func
(
self
,
place
):
def
func
(
self
,
place
):
shape
=
[
2
,
4
,
4
,
4
]
shape
=
[
2
,
4
,
4
,
4
]
...
@@ -189,6 +207,8 @@ class TestELUDoubleGradCheck(unittest.TestCase):
...
@@ -189,6 +207,8 @@ class TestELUDoubleGradCheck(unittest.TestCase):
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
x_arr
=
np
.
random
.
uniform
(
-
1
,
1
,
shape
).
astype
(
dtype
)
gradient_checker
.
double_grad_check
(
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
elu_wrapper
,
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
)
def
test_grad
(
self
):
def
test_grad
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
浏览文件 @
24ec6ed0
...
@@ -139,6 +139,9 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase):
...
@@ -139,6 +139,9 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase):
class
TestElementwiseSubDoubleGradCheck
(
unittest
.
TestCase
):
class
TestElementwiseSubDoubleGradCheck
(
unittest
.
TestCase
):
def
subtract_wrapper
(
self
,
x
):
return
paddle
.
subtract
(
x
[
0
],
x
[
1
])
@
prog_scope
()
@
prog_scope
()
def
func
(
self
,
place
):
def
func
(
self
,
place
):
# the shape of input variable should be clearly specified, not inlcude -1.
# the shape of input variable should be clearly specified, not inlcude -1.
...
@@ -156,6 +159,11 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
...
@@ -156,6 +159,11 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
gradient_checker
.
double_grad_check
(
gradient_checker
.
double_grad_check
(
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
)
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
subtract_wrapper
,
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
)
def
test_grad
(
self
):
def
test_grad
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -195,6 +203,9 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase):
...
@@ -195,6 +203,9 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase):
class
TestElementwiseDivDoubleGradCheck
(
unittest
.
TestCase
):
class
TestElementwiseDivDoubleGradCheck
(
unittest
.
TestCase
):
def
divide_wrapper
(
self
,
x
):
return
paddle
.
divide
(
x
[
0
],
x
[
1
])
@
prog_scope
()
@
prog_scope
()
def
func
(
self
,
place
):
def
func
(
self
,
place
):
# the shape of input variable should be clearly specified, not inlcude -1.
# the shape of input variable should be clearly specified, not inlcude -1.
...
@@ -213,6 +224,12 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
...
@@ -213,6 +224,12 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
gradient_checker
.
double_grad_check
(
gradient_checker
.
double_grad_check
(
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
,
atol
=
1e-3
)
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
eps
=
eps
,
atol
=
1e-3
)
gradient_checker
.
double_grad_check_for_dygraph
(
self
.
divide_wrapper
,
[
x
,
y
],
out
,
x_init
=
[
x_arr
,
y_arr
],
place
=
place
,
atol
=
1e-3
)
def
test_grad
(
self
):
def
test_grad
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
...
python/paddle/nn/functional/activation.py
浏览文件 @
24ec6ed0
...
@@ -112,7 +112,10 @@ def elu(x, alpha=1.0, name=None):
...
@@ -112,7 +112,10 @@ def elu(x, alpha=1.0, name=None):
# [ 1. 15.6 ]]
# [ 1. 15.6 ]]
"""
"""
if
in_dynamic_mode
():
if
in_dygraph_mode
():
return
_C_ops
.
final_state_elu
(
x
,
alpha
)
if
_in_legacy_dygraph
():
return
_C_ops
.
elu
(
x
,
'alpha'
,
alpha
)
return
_C_ops
.
elu
(
x
,
'alpha'
,
alpha
)
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'elu'
)
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'elu'
)
...
...
python/paddle/utils/code_gen/api.yaml
浏览文件 @
24ec6ed0
...
@@ -466,6 +466,7 @@
...
@@ -466,6 +466,7 @@
func
:
DeformableConvInferMeta
func
:
DeformableConvInferMeta
kernel
:
kernel
:
func
:
deformable_conv
func
:
deformable_conv
data_type
:
x
optional
:
mask
optional
:
mask
backward
:
deformable_conv_grad
backward
:
deformable_conv_grad
...
@@ -546,6 +547,7 @@
...
@@ -546,6 +547,7 @@
func
:
DropoutInferMeta
func
:
DropoutInferMeta
kernel
:
kernel
:
func
:
dropout
func
:
dropout
data_type
:
x
optional
:
seed_tensor
optional
:
seed_tensor
backward
:
dropout_grad
backward
:
dropout_grad
...
@@ -1065,6 +1067,7 @@
...
@@ -1065,6 +1067,7 @@
func
:
LayerNormInferMeta
func
:
LayerNormInferMeta
kernel
:
kernel
:
func
:
layer_norm
func
:
layer_norm
data_type
:
x
backward
:
layer_norm_grad
backward
:
layer_norm_grad
optional
:
scale, bias
optional
:
scale, bias
...
@@ -1608,6 +1611,7 @@
...
@@ -1608,6 +1611,7 @@
func
:
PsroiPoolInferMeta
func
:
PsroiPoolInferMeta
kernel
:
kernel
:
func
:
psroi_pool
func
:
psroi_pool
data_type
:
x
optional
:
boxes_num
optional
:
boxes_num
backward
:
psroi_pool_grad
backward
:
psroi_pool_grad
...
@@ -1713,6 +1717,7 @@
...
@@ -1713,6 +1717,7 @@
func
:
RoiAlignInferMeta
func
:
RoiAlignInferMeta
kernel
:
kernel
:
func
:
roi_align
func
:
roi_align
data_type
:
x
optional
:
boxes_num
optional
:
boxes_num
backward
:
roi_align_grad
backward
:
roi_align_grad
...
@@ -1723,6 +1728,7 @@
...
@@ -1723,6 +1728,7 @@
func
:
RoiPoolInferMeta
func
:
RoiPoolInferMeta
kernel
:
kernel
:
func
:
roi_pool
func
:
roi_pool
data_type
:
x
optional
:
boxes_num
optional
:
boxes_num
intermediate
:
arg_max
intermediate
:
arg_max
backward
:
roi_pool_grad
backward
:
roi_pool_grad
...
...
python/paddle/utils/code_gen/backward.yaml
浏览文件 @
24ec6ed0
...
@@ -152,6 +152,18 @@
...
@@ -152,6 +152,18 @@
kernel
:
kernel
:
func
:
atanh_grad
func
:
atanh_grad
-
backward_api
:
batch_norm_double_grad
forward
:
batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias)
args
:
(Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output
:
Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
x
,
scale
,
x
]
kernel
:
func
:
batch_norm_grad_grad
data_type
:
x
optional
:
out_mean, out_variance
-
backward_api
:
batch_norm_grad
-
backward_api
:
batch_norm_grad
forward
:
batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
forward
:
batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
args
:
(Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
...
@@ -163,6 +175,7 @@
...
@@ -163,6 +175,7 @@
func
:
batch_norm_grad
func
:
batch_norm_grad
data_type
:
out_grad
data_type
:
out_grad
optional
:
mean_out, variance_out, reserve_space
optional
:
mean_out, variance_out, reserve_space
backward
:
batch_norm_double_grad
-
backward_api
:
bce_loss_grad
-
backward_api
:
bce_loss_grad
forward
:
bce_loss (Tensor input, Tensor label) -> Tensor(out)
forward
:
bce_loss (Tensor input, Tensor label) -> Tensor(out)
...
@@ -362,6 +375,7 @@
...
@@ -362,6 +375,7 @@
func
:
DeformableConvGradInferMeta
func
:
DeformableConvGradInferMeta
kernel
:
kernel
:
func
:
deformable_conv_grad
func
:
deformable_conv_grad
data_type
:
x
optional
:
mask
optional
:
mask
-
backward_api
:
depthwise_conv2d_transpose_grad
-
backward_api
:
depthwise_conv2d_transpose_grad
...
@@ -414,6 +428,18 @@
...
@@ -414,6 +428,18 @@
kernel
:
kernel
:
func
:
dist_grad
func
:
dist_grad
-
backward_api
:
divide_double_grad
forward
:
divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args
:
(Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output
:
Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
y
,
grad_x
,
grad_x
]
kernel
:
func
:
divide_double_grad
data_type
:
out
optional
:
grad_x_grad, grad_y_grad
-
backward_api
:
divide_grad
-
backward_api
:
divide_grad
forward
:
divide (Tensor x, Tensor y) -> Tensor(out)
forward
:
divide (Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1)
args
:
(Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1)
...
@@ -423,6 +449,7 @@
...
@@ -423,6 +449,7 @@
param
:
[
x
,
y
]
param
:
[
x
,
y
]
kernel
:
kernel
:
func
:
divide_grad
func
:
divide_grad
backward
:
divide_double_grad
-
backward_api
:
dropout_grad
-
backward_api
:
dropout_grad
forward
:
dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
forward
:
dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
...
@@ -455,6 +482,16 @@
...
@@ -455,6 +482,16 @@
kernel
:
kernel
:
func
:
elementwise_pow_grad
func
:
elementwise_pow_grad
-
backward_api
:
elu_double_grad
forward
:
elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args
:
(Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output
:
Tensor(x_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralBinaryGradInferMeta
param
:
[
x
,
x
]
kernel
:
func
:
elu_double_grad
-
backward_api
:
elu_grad
-
backward_api
:
elu_grad
forward
:
elu (Tensor x, float alpha) -> Tensor(out)
forward
:
elu (Tensor x, float alpha) -> Tensor(out)
args
:
(Tensor x, Tensor out, Tensor out_grad, float alpha)
args
:
(Tensor x, Tensor out, Tensor out_grad, float alpha)
...
@@ -464,6 +501,7 @@
...
@@ -464,6 +501,7 @@
param
:
[
x
]
param
:
[
x
]
kernel
:
kernel
:
func
:
elu_grad
func
:
elu_grad
backward
:
elu_double_grad
-
backward_api
:
erf_grad
-
backward_api
:
erf_grad
forward
:
erf (Tensor x) -> Tensor(out)
forward
:
erf (Tensor x) -> Tensor(out)
...
@@ -633,6 +671,7 @@
...
@@ -633,6 +671,7 @@
param
:
[
x
]
param
:
[
x
]
kernel
:
kernel
:
func
:
graph_send_recv_grad
func
:
graph_send_recv_grad
data_type
:
out_grad
optional
:
out, dst_count
optional
:
out, dst_count
-
backward_api
:
gumbel_softmax_grad
-
backward_api
:
gumbel_softmax_grad
...
@@ -1287,6 +1326,7 @@
...
@@ -1287,6 +1326,7 @@
param
:
[
x
]
param
:
[
x
]
kernel
:
kernel
:
func
:
psroi_pool_grad
func
:
psroi_pool_grad
data_type
:
x
optional
:
boxes_num
optional
:
boxes_num
# output is optional
# output is optional
...
@@ -1381,6 +1421,7 @@
...
@@ -1381,6 +1421,7 @@
param
:
[
x
]
param
:
[
x
]
kernel
:
kernel
:
func
:
roi_align_grad
func
:
roi_align_grad
data_type
:
boxes
optional
:
boxes_num
optional
:
boxes_num
-
backward_api
:
roi_pool_grad
-
backward_api
:
roi_pool_grad
...
@@ -1392,6 +1433,7 @@
...
@@ -1392,6 +1433,7 @@
param
:
[
x
]
param
:
[
x
]
kernel
:
kernel
:
func
:
roi_pool_grad
func
:
roi_pool_grad
data_type
:
x
optional
:
boxes_num
optional
:
boxes_num
-
backward_api
:
roll_grad
-
backward_api
:
roll_grad
...
@@ -1498,7 +1540,7 @@
...
@@ -1498,7 +1540,7 @@
func
:
UnchangedInferMeta
func
:
UnchangedInferMeta
param
:
[
x
]
param
:
[
x
]
kernel
:
kernel
:
func
:
sigmoid_cross_entropy_with_logits_grad
func
:
sigmoid_cross_entropy_with_logits_grad
-
backward_api
:
sigmoid_double_grad
-
backward_api
:
sigmoid_double_grad
forward
:
sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
forward
:
sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
...
@@ -1654,6 +1696,18 @@
...
@@ -1654,6 +1696,18 @@
func
:
strided_slice_grad
func
:
strided_slice_grad
no_need_buffer
:
x
no_need_buffer
:
x
-
backward_api
:
subtract_double_grad
forward
:
subtract_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args
:
(Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output
:
Tensor(grad_out_grad)
infer_meta
:
func
:
UnchangedInferMeta
param
:
[
grad_out
]
kernel
:
func
:
subtract_double_grad
optional
:
grad_x_grad, grad_y_grad
no_need_buffer
:
y, grad_out
-
backward_api
:
subtract_grad
-
backward_api
:
subtract_grad
forward
:
subtract (Tensor x, Tensor y) -> Tensor(out)
forward
:
subtract (Tensor x, Tensor y) -> Tensor(out)
args
:
(Tensor x, Tensor y, Tensor out_grad, int axis = -1)
args
:
(Tensor x, Tensor y, Tensor out_grad, int axis = -1)
...
@@ -1664,6 +1718,7 @@
...
@@ -1664,6 +1718,7 @@
kernel
:
kernel
:
func
:
subtract_grad
func
:
subtract_grad
no_need_buffer
:
x, y
no_need_buffer
:
x, y
backward
:
subtract_double_grad
-
backward_api
:
sum_double_grad
-
backward_api
:
sum_double_grad
forward
:
sum_grad (Tensor x, Tensor grad_out, int64_t[] dims, bool keep_dim, bool reduce_all=false) -> Tensor(grad_x)
forward
:
sum_grad (Tensor x, Tensor grad_out, int64_t[] dims, bool keep_dim, bool reduce_all=false) -> Tensor(grad_x)
...
@@ -1720,6 +1775,17 @@
...
@@ -1720,6 +1775,17 @@
kernel
:
kernel
:
func
:
tan_grad
func
:
tan_grad
-
backward_api
:
tanh_double_grad
forward
:
tanh_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args
:
(Tensor out, Tensor grad_out, Tensor grad_x_grad)
output
:
Tensor(out_grad), Tensor(grad_out_grad)
infer_meta
:
func
:
GeneralBinaryGradInferMeta
param
:
[
out
,
out
]
kernel
:
func
:
tanh_double_grad
backward
:
tanh_triple_grad
-
backward_api
:
tanh_grad
-
backward_api
:
tanh_grad
forward
:
tanh (Tensor x) -> Tensor(out)
forward
:
tanh (Tensor x) -> Tensor(out)
args
:
(Tensor out, Tensor out_grad)
args
:
(Tensor out, Tensor out_grad)
...
@@ -1729,6 +1795,7 @@
...
@@ -1729,6 +1795,7 @@
param
:
[
out
]
param
:
[
out
]
kernel
:
kernel
:
func
:
tanh_grad
func
:
tanh_grad
backward
:
tanh_double_grad
-
backward_api
:
tanh_shrink_grad
-
backward_api
:
tanh_shrink_grad
forward
:
tanh_shrink (Tensor x) -> Tensor(out)
forward
:
tanh_shrink (Tensor x) -> Tensor(out)
...
@@ -1740,6 +1807,16 @@
...
@@ -1740,6 +1807,16 @@
kernel
:
kernel
:
func
:
tanh_shrink_grad
func
:
tanh_shrink_grad
-
backward_api
:
tanh_triple_grad
forward
:
tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad)
args
:
(Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad)
output
:
Tensor(out_grad), Tensor(grad_out_forward_grad), Tensor(grad_x_grad_forward_grad)
infer_meta
:
func
:
GeneralTernaryGradInferMeta
param
:
[
out
,
out
,
grad_x_grad_forward
]
kernel
:
func
:
tanh_triple_grad
-
backward_api
:
thresholded_relu_grad
-
backward_api
:
thresholded_relu_grad
forward
:
thresholded_relu (Tensor x, float threshold) -> Tensor(out)
forward
:
thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args
:
(Tensor x, Tensor out_grad, float threshold)
args
:
(Tensor x, Tensor out_grad, float threshold)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录