Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
21beb082
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
21beb082
编写于
3月 17, 2022
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add some grad kernel; test=develop
上级
4be77e53
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
81 addition
and
38 deletion
+81
-38
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+0
-13
paddle/fluid/operators/activation_op.kps
paddle/fluid/operators/activation_op.kps
+0
-12
paddle/phi/kernels/cpu/activation_grad_kernel.cc
paddle/phi/kernels/cpu/activation_grad_kernel.cc
+9
-0
paddle/phi/kernels/cpu/activation_kernel.cc
paddle/phi/kernels/cpu/activation_kernel.cc
+2
-2
paddle/phi/kernels/funcs/activation_functor.h
paddle/phi/kernels/funcs/activation_functor.h
+60
-11
paddle/phi/kernels/gpu/activation_grad_kernel.cu
paddle/phi/kernels/gpu/activation_grad_kernel.cu
+10
-0
未找到文件。
paddle/fluid/operators/activation_op.h
浏览文件 @
21beb082
...
...
@@ -440,19 +440,6 @@ struct LogSigmoidGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
Expm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
+
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// relu(x) = max(x, 0)
template
<
typename
T
>
...
...
paddle/fluid/operators/activation_op.kps
浏览文件 @
21beb082
...
...
@@ -140,18 +140,6 @@ struct CudaReciprocalGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaExpm1GradFunctor : public BaseActivationFunctor<T> {
// dx = dout * out
__device__ __forceinline__ T operator()(const T dout, const T out) const {
return dout * out + dout;
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
};
template <typename T>
struct CudaLogFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
...
...
paddle/phi/kernels/cpu/activation_grad_kernel.cc
浏览文件 @
21beb082
...
...
@@ -104,6 +104,7 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
TanhShrinkGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
SiluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
,
ExpGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
,
Expm1GradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
ReluGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
TanhGradFunctor
);
...
...
@@ -214,3 +215,11 @@ PD_REGISTER_KERNEL(exp_grad,
double
,
int
,
int64_t
)
{}
PD_REGISTER_KERNEL
(
expm1_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
Expm1GradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/cpu/activation_kernel.cc
浏览文件 @
21beb082
...
...
@@ -120,12 +120,12 @@ PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
tanh_shrink
,
TanhShrinkKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
elu
,
EluKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
silu
,
SiluKernel
)
// PD_REGISTER_ACTIVATION_KERNEL(mish, Mish
)
PD_REGISTER_ACTIVATION_KERNEL
(
mish
,
MishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
stanh
,
STanhKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
reciprocal
,
ReciprocalKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
sqrt
,
SqrtKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
rsqrt
,
RsqrtKernel
)
// PD_REGISTER_ACTIVATION_KERNEL(softplus, Softplus
)
PD_REGISTER_ACTIVATION_KERNEL
(
softplus
,
SoftplusKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
softsign
,
SoftsignKernel
)
PD_REGISTER_KERNEL
(
...
...
paddle/phi/kernels/funcs/activation_functor.h
浏览文件 @
21beb082
...
...
@@ -157,9 +157,10 @@ struct LogitFunctor {
}
};
// mish(x) = x * tanh(softplus(x))
// softplus(x) = x, if x > threshold
// = ln(1 + exp(x)), otherwise
// // mish(x) = x * tanh(softplus(x))
// // softplus(x) = x, if x > threshold
// // = ln(1 + exp(x)), otherwise
template
<
typename
T
>
struct
MishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
...
...
@@ -168,7 +169,7 @@ struct MishFunctor : public BaseActivationFunctor<T> {
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
sp
=
(
x
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
(
static_cast
<
T
>
(
1
)
+
x
.
exp
()).
log
());
out
.
device
(
d
)
=
x
*
sp
.
tanh
();
...
...
@@ -244,20 +245,41 @@ struct RsqrtFunctor : public BaseActivationFunctor<T> {
}
};
// For numerical stability, using the following formula instead of softplus(x) =
// log(1 + exp(x))
// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta =
// 1, threshold = 20 by default), otherwise x
// // For numerical stability, using the following formula instead of
// softplus(x) =
// // log(1 + exp(x))
// // softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <=
// threshold(beta =
// // 1, threshold = 20 by default), otherwise x
// template <typename T>
// struct SoftplusFunctor : public BaseActivationFunctor<T> {
// float beta;
// float threshold;
// typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
// return {{"beta", &beta}, {"threshold", &threshold}};
// }
// template <typename Device, typename X, typename Out>
// void operator()(Device d, X x, Out out) {
// auto x_beta = static_cast<T>(beta) * x;
// out.device(d) = (x_beta > static_cast<T>(threshold))
// .select(x,
// (static_cast<T>(1) + x_beta.exp()).log() /
// static_cast<T>(beta));
// }
// };
template
<
typename
T
>
struct
SoftplusFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
float
threshold
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
},
{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
x_beta
=
static_cast
<
T
>
(
beta
)
*
x
;
out
.
device
(
d
)
=
(
x_beta
>
static_cast
<
T
>
(
threshold
))
.
select
(
x
,
...
...
@@ -602,6 +624,22 @@ struct Expm1Functor : public BaseActivationFunctor<T> {
}
};
template
<
typename
T
>
struct
Expm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
+
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
// relu(x) = max(x, 0)
template
<
typename
T
>
struct
ReluCPUFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -822,11 +860,10 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// softsign(x) = x / (1 + |x|)
template
<
typename
T
>
struct
SoftsignFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
());
}
};
...
...
@@ -1264,6 +1301,18 @@ struct CudaExpm1Functor : public BaseActivationFunctor<T> {
}
};
template
<
typename
T
>
struct
CudaExpm1GradFunctor
:
public
BaseActivationFunctor
<
T
>
{
// dx = dout * out
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
out
)
const
{
return
dout
*
out
+
dout
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepOut
;
}
};
template
<
typename
T
>
struct
CudaSinFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
...
...
paddle/phi/kernels/gpu/activation_grad_kernel.cu
浏览文件 @
21beb082
...
...
@@ -158,6 +158,7 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
TanhShrink
,
CudaTanhShrinkGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Silu
,
CudaSiluGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Exp
,
CudaExpGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Expm1
,
CudaExpm1GradFunctor
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
CudaLeakyReluGradFunctor
,
...
...
@@ -274,9 +275,18 @@ PD_REGISTER_KERNEL(exp_grad,
double
,
int
,
int64_t
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
soft_shrink_grad
,
SoftShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_shrink_grad
,
HardShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
tanh_shrink_grad
,
TanhShrinkGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
silu_grad
,
SiluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_grad
,
EluGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
elu_double_grad
,
EluDoubleGradKernel
)
PD_REGISTER_KERNEL
(
expm1_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
Expm1GradKernel
,
float
,
double
,
phi
::
dtype
::
float16
)
{}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录