Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
54e4360a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
54e4360a
编写于
4月 14, 2023
作者:
Z
zhangyuqin1998
提交者:
GitHub
4月 14, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
delete unused param from swish_grad and relu6_grad (#52805)
上级
b1bb7484
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
42 addition
and
79 deletion
+42
-79
paddle/phi/api/yaml/legacy_backward.yaml
paddle/phi/api/yaml/legacy_backward.yaml
+2
-2
paddle/phi/api/yaml/sparse_backward.yaml
paddle/phi/api/yaml/sparse_backward.yaml
+1
-1
paddle/phi/kernels/activation_grad_kernel.h
paddle/phi/kernels/activation_grad_kernel.h
+2
-2
paddle/phi/kernels/cpu/activation_grad_kernel.cc
paddle/phi/kernels/cpu/activation_grad_kernel.cc
+2
-4
paddle/phi/kernels/funcs/activation_functor.h
paddle/phi/kernels/funcs/activation_functor.h
+8
-16
paddle/phi/kernels/gpu/activation_grad_kernel.cu
paddle/phi/kernels/gpu/activation_grad_kernel.cu
+2
-6
paddle/phi/kernels/onednn/activation_grad_kernel.cc
paddle/phi/kernels/onednn/activation_grad_kernel.cc
+11
-4
paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h
paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h
+1
-1
paddle/phi/kernels/sparse/unary_grad_kernel.h
paddle/phi/kernels/sparse/unary_grad_kernel.h
+1
-1
paddle/phi/kernels/xpu/activation_grad_kernel.cc
paddle/phi/kernels/xpu/activation_grad_kernel.cc
+4
-15
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+8
-2
test/mkldnn/test_activation_mkldnn_op.py
test/mkldnn/test_activation_mkldnn_op.py
+0
-25
未找到文件。
paddle/phi/api/yaml/legacy_backward.yaml
浏览文件 @
54e4360a
...
@@ -801,7 +801,7 @@
...
@@ -801,7 +801,7 @@
-
backward_op
:
relu6_grad
-
backward_op
:
relu6_grad
forward
:
relu6 (Tensor x) -> Tensor(out)
forward
:
relu6 (Tensor x) -> Tensor(out)
args
:
(Tensor out, Tensor out_grad
, float threshold = 6
)
args
:
(Tensor out, Tensor out_grad)
output
:
Tensor(x_grad)
output
:
Tensor(x_grad)
infer_meta
:
infer_meta
:
func
:
UnchangedInferMeta
func
:
UnchangedInferMeta
...
@@ -1010,7 +1010,7 @@
...
@@ -1010,7 +1010,7 @@
-
backward_op
:
swish_grad
-
backward_op
:
swish_grad
forward
:
swish (Tensor x) -> Tensor(out)
forward
:
swish (Tensor x) -> Tensor(out)
args
:
(Tensor x, Tensor out_grad
, float bete=1.0
)
args
:
(Tensor x, Tensor out_grad)
output
:
Tensor(x_grad)
output
:
Tensor(x_grad)
infer_meta
:
infer_meta
:
func
:
GeneralUnaryGradInferMeta
func
:
GeneralUnaryGradInferMeta
...
...
paddle/phi/api/yaml/sparse_backward.yaml
浏览文件 @
54e4360a
...
@@ -252,7 +252,7 @@
...
@@ -252,7 +252,7 @@
-
backward_op
:
relu6_grad
-
backward_op
:
relu6_grad
forward
:
relu6(Tensor x) -> Tensor(out)
forward
:
relu6(Tensor x) -> Tensor(out)
args
:
(Tensor out, Tensor out_grad
, float threshold = 6
)
args
:
(Tensor out, Tensor out_grad)
output
:
Tensor(x_grad)
output
:
Tensor(x_grad)
infer_meta
:
infer_meta
:
func
:
UnchangedInferMeta
func
:
UnchangedInferMeta
...
...
paddle/phi/kernels/activation_grad_kernel.h
浏览文件 @
54e4360a
...
@@ -285,6 +285,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log);
...
@@ -285,6 +285,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Log10
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Log10
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Log1p
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Log1p
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Swish
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
);
...
@@ -294,6 +295,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu);
...
@@ -294,6 +295,7 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sigmoid
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sigmoid
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu6
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Round
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Round
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Floor
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Floor
);
...
@@ -303,11 +305,9 @@ DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, alpha);
...
@@ -303,11 +305,9 @@ DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, alpha);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
ThresholdedRelu
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
ThresholdedRelu
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
lambda
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
lambda
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
beta
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Logit
,
eps
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Logit
,
eps
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Celu
,
alpha
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Celu
,
alpha
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT
(
Relu6
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
HardTanh
,
t_min
,
t_max
);
DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
HardTanh
,
t_min
,
t_max
);
DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
STanh
,
scale_a
,
scale_b
);
DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
STanh
,
scale_a
,
scale_b
);
...
...
paddle/phi/kernels/cpu/activation_grad_kernel.cc
浏览文件 @
54e4360a
...
@@ -136,12 +136,14 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, Expm1GradFunctor);
...
@@ -136,12 +136,14 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, Expm1GradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
ReciprocalGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
ReciprocalGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
SqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
SqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
RsqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
RsqrtGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu6
,
Relu6GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Softsign
,
SoftsignGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Softsign
,
SoftsignGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
LogSigmoidGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
LogSigmoidGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
LogGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
LogGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
Log2GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
Log2GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log10
,
Log10GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log10
,
Log10GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log1p
,
Log1pGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log1p
,
Log1pGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Swish
,
SwishGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
ReluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
ReluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
TanhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
TanhGradFunctor
);
...
@@ -157,16 +159,12 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
...
@@ -157,16 +159,12 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
ThresholdedRelu
,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
ThresholdedRelu
,
ThresholdedReluGradFunctor
,
ThresholdedReluGradFunctor
,
threshold
);
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT
(
Relu6
,
Relu6GradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
SoftShrinkGradFunctor
,
SoftShrinkGradFunctor
,
lambda
);
lambda
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
HardShrinkGradFunctor
,
HardShrinkGradFunctor
,
threshold
);
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
SwishGradFunctor
,
beta
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
MishGradFunctor
,
MishGradFunctor
,
...
...
paddle/phi/kernels/funcs/activation_functor.h
浏览文件 @
54e4360a
...
@@ -1505,16 +1505,14 @@ struct Relu6Functor : public BaseActivationFunctor<T> {
...
@@ -1505,16 +1505,14 @@ struct Relu6Functor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
Relu6GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
Relu6GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{}};
}
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
template
<
typename
Device
,
typename
X
,
typename
X
,
typename
Out
,
typename
Out
,
typename
dOut
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
float
threshold
=
6
;
dx
.
device
(
d
)
=
dx
.
device
(
d
)
=
dout
*
((
out
>
static_cast
<
T
>
(
0
))
*
(
out
<
static_cast
<
T
>
(
threshold
)))
dout
*
((
out
>
static_cast
<
T
>
(
0
))
*
(
out
<
static_cast
<
T
>
(
threshold
)))
.
template
cast
<
T
>();
.
template
cast
<
T
>();
...
@@ -2188,10 +2186,7 @@ struct SwishFunctor : public BaseActivationFunctor<T> {
...
@@ -2188,10 +2186,7 @@ struct SwishFunctor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
SwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
SwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{}};
}
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
template
<
typename
Device
,
typename
X
,
typename
X
,
...
@@ -2199,6 +2194,7 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
...
@@ -2199,6 +2194,7 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
typename
dOut
,
typename
dOut
,
typename
dX
>
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
fake_out
,
dOut
dout
,
dX
dx
)
const
{
void
operator
()(
Device
d
,
X
x
,
Out
fake_out
,
dOut
dout
,
dX
dx
)
const
{
float
beta
=
1.0
;
auto
temp1
=
static_cast
<
T
>
(
1
)
/
auto
temp1
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
auto
out
=
x
*
temp1
;
auto
out
=
x
*
temp1
;
...
@@ -3285,14 +3281,12 @@ struct CudaRelu6Functor : public BaseActivationFunctor<T> {
...
@@ -3285,14 +3281,12 @@ struct CudaRelu6Functor : public BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
CudaRelu6GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
CudaRelu6GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
T
zero
=
static_cast
<
T
>
(
0.0
f
);
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{}};
}
return
{{
"threshold"
,
&
threshold
}};
}
// dx = (out > 0 && out < t) ? dout : 0
// dx = (out > 0 && out < t) ? dout : 0
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
float
threshold
=
6
;
T
t
=
static_cast
<
T
>
(
threshold
);
T
t
=
static_cast
<
T
>
(
threshold
);
return
(
out
>
zero
&&
out
<
t
)
?
dout
:
zero
;
return
(
out
>
zero
&&
out
<
t
)
?
dout
:
zero
;
}
}
...
@@ -3781,15 +3775,13 @@ template <typename T>
...
@@ -3781,15 +3775,13 @@ template <typename T>
struct
CudaSwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
struct
CudaSwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{}};
}
return
{{
"beta"
,
&
beta
}};
}
// dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2)
// dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2)
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
const
T
arg_x
)
const
{
float
beta
=
1.0
;
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
...
...
paddle/phi/kernels/gpu/activation_grad_kernel.cu
浏览文件 @
54e4360a
...
@@ -198,12 +198,14 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, CudaExpm1GradFunctor);
...
@@ -198,12 +198,14 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Expm1, CudaExpm1GradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
CudaReciprocalGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Reciprocal
,
CudaReciprocalGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
CudaSqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
CudaSqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
CudaRsqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Rsqrt
,
CudaRsqrtGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu6
,
CudaRelu6GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Softsign
,
CudaSoftsignGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Softsign
,
CudaSoftsignGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
CudaLogSigmoidGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
LogSigmoid
,
CudaLogSigmoidGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
CudaLogGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
CudaLogGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
CudaLog2GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log2
,
CudaLog2GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log10
,
CudaLog10GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log10
,
CudaLog10GradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log1p
,
CudaLog1pGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log1p
,
CudaLog1pGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Swish
,
CudaSwishGradFunctor
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
CudaLeakyReluGradFunctor
,
CudaLeakyReluGradFunctor
,
...
@@ -217,9 +219,6 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
...
@@ -217,9 +219,6 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
CudaHardShrinkGradFunctor
,
CudaHardShrinkGradFunctor
,
threshold
);
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
CudaSwishGradFunctor
,
beta
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
CudaMishGradFunctor
,
CudaMishGradFunctor
,
...
@@ -227,9 +226,6 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
...
@@ -227,9 +226,6 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Celu
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Celu
,
CudaCELUGradFunctor
,
CudaCELUGradFunctor
,
alpha
);
alpha
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT
(
Relu6
,
CudaRelu6GradFunctor
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT
(
LogitCUDA
,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT
(
LogitCUDA
,
CudaLogitGradFunctor
,
CudaLogitGradFunctor
,
eps
);
eps
);
...
...
paddle/phi/kernels/onednn/activation_grad_kernel.cc
浏览文件 @
54e4360a
...
@@ -204,9 +204,16 @@ DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
...
@@ -204,9 +204,16 @@ DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
MishOneDNNGradFunctor
,
MishOneDNNGradFunctor
,
threshold
);
threshold
);
DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
SwishOneDNNGradFunctor
,
template
<
typename
T
,
typename
Context
>
beta
);
void
SwishGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
DenseTensor
*
dx
)
{
SwishOneDNNGradFunctor
<
T
>
functor
;
float
beta
=
1.0
;
functor
(
dev_ctx
,
x
,
dout
,
beta
,
0
,
dx
);
}
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
EluGradKernel
(
const
Context
&
dev_ctx
,
void
EluGradKernel
(
const
Context
&
dev_ctx
,
...
@@ -247,9 +254,9 @@ template <typename T, typename Context>
...
@@ -247,9 +254,9 @@ template <typename T, typename Context>
void
Relu6GradKernel
(
const
Context
&
dev_ctx
,
void
Relu6GradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
out
,
const
DenseTensor
&
out
,
const
DenseTensor
&
dout
,
const
DenseTensor
&
dout
,
float
threshold
,
DenseTensor
*
dx
)
{
DenseTensor
*
dx
)
{
Relu6OneDNNGradUseOutFunctor
<
T
>
functor
;
Relu6OneDNNGradUseOutFunctor
<
T
>
functor
;
float
threshold
=
6
;
functor
(
dev_ctx
,
out
,
dout
,
0
,
threshold
,
dx
);
functor
(
dev_ctx
,
out
,
dout
,
0
,
threshold
,
dx
);
}
}
...
...
paddle/phi/kernels/sparse/impl/unary_grad_kernel_impl.h
浏览文件 @
54e4360a
...
@@ -94,9 +94,9 @@ DEFINE_SPARSE_UNARY_GRAD_KERNEL(Log1p)
...
@@ -94,9 +94,9 @@ DEFINE_SPARSE_UNARY_GRAD_KERNEL(Log1p)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Relu
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Relu
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Abs
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Abs
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Expm1
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Expm1
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL
(
Relu6
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
Pow
,
factor
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
Pow
,
factor
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
LeakyRelu
,
alpha
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
LeakyRelu
,
alpha
)
DEFINE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
Relu6
,
threshold
)
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
CastCooGradKernel
(
const
Context
&
dev_ctx
,
void
CastCooGradKernel
(
const
Context
&
dev_ctx
,
...
...
paddle/phi/kernels/sparse/unary_grad_kernel.h
浏览文件 @
54e4360a
...
@@ -62,9 +62,9 @@ DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sqrt)
...
@@ -62,9 +62,9 @@ DECLARE_SPARSE_UNARY_GRAD_KERNEL(Sqrt)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Log1p
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Log1p
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Abs
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Abs
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Expm1
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Expm1
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL
(
Relu6
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
Pow
,
factor
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
Pow
,
factor
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
LeakyRelu
,
alpha
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
LeakyRelu
,
alpha
)
DECLARE_SPARSE_UNARY_GRAD_KERNEL_WITH_ONE_ATTR
(
Relu6
,
threshold
)
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
Context
>
void
CastCooGradKernel
(
const
Context
&
dev_ctx
,
void
CastCooGradKernel
(
const
Context
&
dev_ctx
,
...
...
paddle/phi/kernels/xpu/activation_grad_kernel.cc
浏览文件 @
54e4360a
...
@@ -351,10 +351,7 @@ struct XPUReluGradFunctor : public funcs::BaseActivationFunctor<T> {
...
@@ -351,10 +351,7 @@ struct XPUReluGradFunctor : public funcs::BaseActivationFunctor<T> {
template
<
typename
T
>
template
<
typename
T
>
struct
XPURelu6GradFunctor
:
public
funcs
::
BaseActivationFunctor
<
T
>
{
struct
XPURelu6GradFunctor
:
public
funcs
::
BaseActivationFunctor
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
float
threshold
;
typename
funcs
::
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{}};
}
typename
funcs
::
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Context
>
template
<
typename
Context
>
void
operator
()(
const
Context
&
dev_ctx
,
void
operator
()(
const
Context
&
dev_ctx
,
const
DenseTensor
*
x
,
const
DenseTensor
*
x
,
...
@@ -481,10 +478,7 @@ void PowGradKernel(const Context& dev_ctx,
...
@@ -481,10 +478,7 @@ void PowGradKernel(const Context& dev_ctx,
template
<
typename
T
>
template
<
typename
T
>
struct
XPUSwishGradFunctor
:
public
funcs
::
BaseActivationFunctor
<
T
>
{
struct
XPUSwishGradFunctor
:
public
funcs
::
BaseActivationFunctor
<
T
>
{
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
using
XPUType
=
typename
XPUTypeTrait
<
T
>::
Type
;
float
beta
;
typename
funcs
::
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{}};
}
typename
funcs
::
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Context
>
template
<
typename
Context
>
void
operator
()(
const
Context
&
dev_ctx
,
void
operator
()(
const
Context
&
dev_ctx
,
...
@@ -571,14 +565,13 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor);
...
@@ -571,14 +565,13 @@ DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
XPUSqrtGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sqrt
,
XPUSqrtGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
XPUTanhGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
XPUTanhGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
XPUReluGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
XPUReluGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu6
,
XPURelu6GradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
XPUSiluGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
XPUSiluGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
XPULogGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Log
,
XPULogGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Square
,
XPUSquareGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Square
,
XPUSquareGradFunctor
);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Swish
,
XPUSwishGradFunctor
);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
XPUSwishGradFunctor
,
beta
);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Mish
,
XPUMishGradFunctor
,
XPUMishGradFunctor
,
threshold
);
threshold
);
...
@@ -586,10 +579,6 @@ DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
...
@@ -586,10 +579,6 @@ DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
XPULeakyReluGradFunctor
,
XPULeakyReluGradFunctor
,
alpha
);
alpha
);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT
(
Relu6
,
XPURelu6GradFunctor
,
threshold
);
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
Softplus
,
DEFINE_XPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
Softplus
,
XPUSoftPlusGradFunctor
,
XPUSoftPlusGradFunctor
,
beta
,
beta
,
...
...
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
54e4360a
...
@@ -41,8 +41,14 @@ namespace phi {
...
@@ -41,8 +41,14 @@ namespace phi {
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
HardTanh
,
"hardtanh"
,
"t_min"
comma
"t_max"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
HardTanh
,
"hardtanh"
,
"t_min"
comma
"t_max"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Mish
,
"mish"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Mish
,
"mish"
,
"threshold"
);
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Swish
,
"swish"
,
"beta"
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu6
,
"relu6"
,
"threshold"
);
// NOLINT
KernelSignature
SwishGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"swish_grad"
,
{
"X"
,
"Out@GRAD"
},
{},
{
"X@GRAD"
});
}
KernelSignature
Relu6GradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"relu6_grad"
,
{
"Out"
,
"Out@GRAD"
},
{},
{
"X@GRAD"
});
}
KernelSignature
HardSwishGradOpArgumentMapping
(
KernelSignature
HardSwishGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
const
ArgumentMappingContext
&
ctx
)
{
...
...
test/mkldnn/test_activation_mkldnn_op.py
浏览文件 @
54e4360a
...
@@ -16,7 +16,6 @@ import unittest
...
@@ -16,7 +16,6 @@ import unittest
import
numpy
as
np
import
numpy
as
np
from
mkldnn_op_test
import
check_if_mkldnn_primitives_exist_in_bwd
from
mkldnn_op_test
import
check_if_mkldnn_primitives_exist_in_bwd
from
scipy.special
import
expit
import
paddle
import
paddle
import
paddle.nn.functional
as
F
import
paddle.nn.functional
as
F
...
@@ -416,30 +415,6 @@ class TestMKLDNNAbsDim4(TestAbs):
...
@@ -416,30 +415,6 @@ class TestMKLDNNAbsDim4(TestAbs):
self
.
dtype
=
np
.
float32
self
.
dtype
=
np
.
float32
class
TestMKLDNNSwishDim4
(
TestSwish
):
def
setUp
(
self
):
super
().
setUp
()
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
self
.
dtype
)
beta
=
2.3
out
=
x
*
expit
(
beta
*
x
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
self
.
attrs
=
{
"use_mkldnn"
:
True
,
"beta"
:
beta
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
self
.
check_output
(
check_dygraph
=
False
)
def
test_check_grad
(
self
):
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad
([
'X'
],
'Out'
,
check_dygraph
=
False
)
def
ref_hardswish
(
x
,
threshold
=
6.0
,
scale
=
6.0
,
offset
=
3.0
):
def
ref_hardswish
(
x
,
threshold
=
6.0
,
scale
=
6.0
,
offset
=
3.0
):
x_dtype
=
x
.
dtype
x_dtype
=
x
.
dtype
if
x_dtype
==
'float16'
:
if
x_dtype
==
'float16'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录