Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
be5918e0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
be5918e0
编写于
3月 25, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
3月 25, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move activation (#40913)
上级
c33b4f95
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
625 addition
and
434 deletion
+625
-434
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+6
-12
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+16
-257
paddle/fluid/operators/activation_op.kps
paddle/fluid/operators/activation_op.kps
+15
-165
paddle/phi/kernels/activation_grad_kernel.h
paddle/phi/kernels/activation_grad_kernel.h
+27
-0
paddle/phi/kernels/activation_kernel.h
paddle/phi/kernels/activation_kernel.h
+20
-0
paddle/phi/kernels/cpu/activation_grad_kernel.cc
paddle/phi/kernels/cpu/activation_grad_kernel.cc
+45
-0
paddle/phi/kernels/cpu/activation_kernel.cc
paddle/phi/kernels/cpu/activation_kernel.cc
+27
-0
paddle/phi/kernels/funcs/activation_functor.h
paddle/phi/kernels/funcs/activation_functor.h
+300
-0
paddle/phi/kernels/gpu/activation_grad_kernel.cu
paddle/phi/kernels/gpu/activation_grad_kernel.cu
+48
-0
paddle/phi/kernels/gpu/activation_kernel.cu
paddle/phi/kernels/gpu/activation_kernel.cu
+34
-0
paddle/phi/kernels/impl/activation_grad_impl.h
paddle/phi/kernels/impl/activation_grad_impl.h
+24
-0
paddle/phi/kernels/impl/activation_impl.h
paddle/phi/kernels/impl/activation_impl.h
+19
-0
paddle/phi/ops/compat/activation_sig.cc
paddle/phi/ops/compat/activation_sig.cc
+44
-0
未找到文件。
paddle/fluid/operators/activation_op.cc
浏览文件 @
be5918e0
...
...
@@ -1499,6 +1499,12 @@ REGISTER_ACTIVATION_OP(logsigmoid, LogSigmoid, LogSigmoidFunctor,
REGISTER_ACTIVATION_OP
(
log2
,
Log2
,
Log2Functor
,
Log2GradFunctor
);
REGISTER_ACTIVATION_OP
(
log10
,
Log10
,
Log10Functor
,
Log10GradFunctor
);
REGISTER_ACTIVATION_OP
(
log1p
,
Log1p
,
Log1pFunctor
,
Log1pGradFunctor
);
REGISTER_ACTIVATION_OP
(
hard_swish
,
HardSwish
,
HardSwishFunctor
,
HardSwishGradFunctor
);
REGISTER_ACTIVATION_OP
(
swish
,
Swish
,
SwishFunctor
,
SwishGradFunctor
);
REGISTER_ACTIVATION_OP
(
round
,
Round
,
RoundFunctor
,
ZeroGradFunctor
);
REGISTER_ACTIVATION_OP
(
floor
,
Floor
,
FloorFunctor
,
ZeroGradFunctor
);
REGISTER_ACTIVATION_OP
(
ceil
,
Ceil
,
CeilFunctor
,
ZeroGradFunctor
);
/* ========================== sigmoid register =============================
*/
...
...
@@ -1778,18 +1784,6 @@ REGISTER_OPERATOR(
ops
::
ActFwdInplaceInferer
,
void
>::
type
);
REGISTER_OPERATOR
(
pow_grad
,
ops
::
PowOpGrad
,
ops
::
ActivationGradOpInplaceInferer
);
REGISTER_OP_CPU_KERNEL
(
pow
,
ops
::
PowKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowFunctor
<
float
>>
,
ops
::
PowKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowFunctor
<
double
>>
,
ops
::
PowKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowFunctor
<
int
>>
,
ops
::
PowKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowFunctor
<
int64_t
>>
);
REGISTER_OP_CPU_KERNEL
(
pow_grad
,
ops
::
PowGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowGradFunctor
<
float
>>
,
ops
::
PowGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowGradFunctor
<
double
>>
,
ops
::
PowGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowGradFunctor
<
int
>>
,
ops
::
PowGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
PowGradFunctor
<
int64_t
>>
);
/* ========================================================================== */
/* ========================== exp register ============================ */
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
be5918e0
...
...
@@ -286,10 +286,25 @@ USE_PHI_DOUBLE_GRAD_FUNCTOR(Log)
USE_PHI_FUNCTOR
(
Log2
)
USE_PHI_FUNCTOR
(
Log10
)
USE_PHI_FUNCTOR
(
Log1p
)
USE_PHI_FUNCTOR
(
Swish
)
USE_PHI_FUNCTOR
(
HardSwish
)
USE_PHI_FUNCTOR
(
Pow
)
template
<
typename
T
>
using
ELUGradNegativeAlphaFunctor
=
phi
::
funcs
::
ELUGradNegativeAlphaFunctor
<
T
>
;
template
<
typename
T
>
using
RoundFunctor
=
phi
::
funcs
::
RoundFunctor
<
T
>
;
template
<
typename
T
>
using
FloorFunctor
=
phi
::
funcs
::
FloorFunctor
<
T
>
;
template
<
typename
T
>
using
CeilFunctor
=
phi
::
funcs
::
CeilFunctor
<
T
>
;
template
<
typename
T
>
using
ZeroGradFunctor
=
phi
::
funcs
::
ZeroGradFunctor
<
T
>
;
// exp(x) = e^x
template
<
typename
T
>
struct
ExpFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -391,46 +406,6 @@ struct RsqrtGradFunctor : public BaseActivationFunctor<T> {
}
};
// ceil(x) = ceiling(x)
template
<
typename
T
>
struct
CeilFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
ceil
();
}
};
template
<
typename
T
>
struct
ZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0
)
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kNoDeps
;
}
};
// floor(x) = flooring(x)
template
<
typename
T
>
struct
FloorFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
floor
();
}
};
// round(x) = [x]
template
<
typename
T
>
struct
RoundFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
round
();
}
};
// reciprocal(x) = 1 / x
template
<
typename
T
>
struct
ReciprocalFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -509,51 +484,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
}
};
// HardSwish = min(max(0, x+3), 6) * x / 6
template
<
typename
T
>
struct
HardSwishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
float
scale
;
float
offset
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
},
{
"scale"
,
&
scale
},
{
"offset"
,
&
offset
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
(
x
+
static_cast
<
T
>
(
offset
))
.
cwiseMax
(
static_cast
<
T
>
(
0
))
.
cwiseMin
(
static_cast
<
T
>
(
threshold
))
*
x
/
static_cast
<
T
>
(
scale
);
}
};
template
<
typename
T
>
struct
HardSwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
float
scale
;
float
offset
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
},
{
"scale"
,
&
scale
},
{
"offset"
,
&
offset
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
tmp
=
((
x
+
static_cast
<
T
>
(
offset
))
<
static_cast
<
T
>
(
threshold
))
.
template
cast
<
T
>();
dx
.
device
(
d
)
=
dout
*
(((
x
+
static_cast
<
T
>
(
offset
))
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>()
*
(
static_cast
<
T
>
(
2
)
*
x
+
static_cast
<
T
>
(
offset
))
/
static_cast
<
T
>
(
scale
)
*
tmp
+
static_cast
<
T
>
(
1
)
*
(
static_cast
<
T
>
(
1
)
-
tmp
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// For numerical stability, using the following formula instead of softplus(x) =
// log(1 + exp(x))
// softplus(x) = log(1 + exp(beta * x)) / beta when beta * x <= threshold(beta =
...
...
@@ -776,35 +706,6 @@ struct CELUGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5198
template
<
typename
T
>
struct
PowFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
factor
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"factor"
,
&
factor
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
pow
(
static_cast
<
T
>
(
factor
));
}
};
template
<
typename
T
>
struct
PowGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
factor
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"factor"
,
&
factor
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
factor
)
*
x
.
pow
(
static_cast
<
T
>
(
factor
)
-
static_cast
<
T
>
(
1
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
LogitFunctor
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
P
>
...
...
@@ -870,39 +771,6 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
SwishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
}
};
template
<
typename
T
>
struct
SwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
fake_out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
auto
out
=
x
*
temp1
;
auto
temp2
=
temp1
*
(
static_cast
<
T
>
(
1
)
-
(
static_cast
<
T
>
(
beta
)
*
out
));
dx
.
device
(
d
)
=
dout
*
((
static_cast
<
T
>
(
beta
)
*
out
)
+
temp2
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
AbsGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
...
...
@@ -1267,110 +1135,6 @@ class RsqrtDoubleGradKernel
}
};
template
<
typename
DeviceContext
,
typename
Functor
>
class
PowKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
public:
using
T
=
typename
Functor
::
ELEMENT_TYPE
;
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
framework
::
Tensor
*
X
=
nullptr
;
framework
::
Tensor
*
Out
=
nullptr
;
ExtractActivationTensor
(
context
,
&
X
,
&
Out
);
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
X
,
"Input"
,
"X"
,
"Pow"
));
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
Out
,
"Output"
,
"Out"
,
"Pow"
));
auto
*
place
=
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Functor
functor
;
auto
attrs
=
functor
.
GetAttrs
();
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
// get FactorTensor
auto
*
factor_tensor
=
context
.
HasInput
(
"FactorTensor"
)
?
context
.
Input
<
framework
::
Tensor
>
(
"FactorTensor"
)
:
nullptr
;
if
(
factor_tensor
)
{
auto
*
factor_data
=
factor_tensor
->
data
<
float
>
();
framework
::
Tensor
cpu_factor_tensor
;
if
(
platform
::
is_gpu_place
(
factor_tensor
->
place
()))
{
framework
::
TensorCopySync
(
*
factor_tensor
,
platform
::
CPUPlace
(),
&
cpu_factor_tensor
);
factor_data
=
cpu_factor_tensor
.
data
<
float
>
();
}
auto
factor
=
std
::
vector
<
float
>
(
factor_data
,
factor_data
+
factor_tensor
->
numel
());
PADDLE_ENFORCE_EQ
(
factor
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The shape of factor(tensor) must be [1] rather than %d"
,
factor
.
size
()));
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
factor
[
0
];
}
}
functor
(
*
place
,
x
,
out
);
}
};
template
<
typename
DeviceContext
,
typename
Functor
>
class
PowGradKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
public:
using
T
=
typename
Functor
::
ELEMENT_TYPE
;
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
framework
::
Tensor
*
X
,
*
Out
,
*
dOut
;
framework
::
Tensor
*
dX
=
nullptr
;
X
=
Out
=
dOut
=
nullptr
;
ExtractActivationGradTensor
<
Functor
::
FwdDeps
()
>
(
context
,
&
X
,
&
Out
,
&
dOut
,
&
dX
);
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Input"
,
"Out@GRAD"
,
"PowGrad"
));
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
Out
,
"Input"
,
"Out"
,
"PowGrad"
));
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"X@GRAD"
,
"PowGrad"
));
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
X
,
"Input"
,
"X"
,
"PowGrad"
));
auto
*
place
=
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Functor
functor
;
auto
attrs
=
functor
.
GetAttrs
();
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
// get FactorTensor
auto
*
factor_tensor
=
context
.
HasInput
(
"FactorTensor"
)
?
context
.
Input
<
framework
::
LoDTensor
>
(
"FactorTensor"
)
:
nullptr
;
if
(
factor_tensor
)
{
auto
*
factor_data
=
factor_tensor
->
data
<
float
>
();
framework
::
Tensor
cpu_factor_tensor
;
if
(
platform
::
is_gpu_place
(
factor_tensor
->
place
()))
{
framework
::
TensorCopySync
(
*
factor_tensor
,
platform
::
CPUPlace
(),
&
cpu_factor_tensor
);
factor_data
=
cpu_factor_tensor
.
data
<
float
>
();
}
auto
factor
=
std
::
vector
<
float
>
(
factor_data
,
factor_data
+
factor_tensor
->
numel
());
PADDLE_ENFORCE_EQ
(
factor
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The shape of factor(tensor) must be [1] rather than %d"
,
factor
.
size
()));
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
factor
[
0
];
}
}
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
LogitKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -1418,15 +1182,10 @@ class LogitGradKernel : public framework::OpKernel<T> {
}
// namespace paddle
#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \
__macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, Softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6, Relu6Functor, Relu6GradFunctor); \
__macro(swish, Swish, SwishFunctor, SwishGradFunctor); \
__macro(mish, Mish, MishFunctor, MishGradFunctor); \
__macro(hard_swish, HardSwish, HardSwishFunctor, HardSwishGradFunctor);
__macro(mish, Mish, MishFunctor, MishGradFunctor);
paddle/fluid/operators/activation_op.kps
浏览文件 @
be5918e0
...
...
@@ -20,51 +20,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
struct CudaCeilFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// ceil(x) = ceil(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(ceil(x));
}
};
template <typename T>
struct CudaFloorFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// floor(x) = floor(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(floor(x));
}
};
template <typename T>
struct CudaRoundFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
// round(x) = round(x)
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
return static_cast<T>(round(x));
}
};
// GradFunctor for ceil, floor and round
template <typename T>
struct CudaZeroGradFunctor : public BaseActivationFunctor<T> {
__device__ __forceinline__ T operator()(const T x) const {
return static_cast<T>(0.0f);
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kNoDeps;
}
};
template <typename T>
struct CudaReciprocalFunctor : public BaseActivationFunctor<T> {
T one = static_cast<T>(1.0f);
...
...
@@ -395,50 +350,6 @@ struct CudaRelu6GradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename T>
struct CudaSwishFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}};
}
// swish(x) = x / (1 + exp(-beta * x))
__device__ __forceinline__ T operator()(const T arg_x) const {
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
return static_cast<T>(x / (one + exp(-b * x)));
}
};
template <typename T>
struct CudaSwishGradFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
MPType one = static_cast<MPType>(1.0f);
float beta;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"beta", &beta}};
}
// dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2)
__device__ __forceinline__ T operator()(const T arg_dout,
const T arg_x) const {
MPType dout = static_cast<MPType>(arg_dout);
MPType x = static_cast<MPType>(arg_x);
MPType b = static_cast<MPType>(beta);
MPType temp1 = one / (one + exp(-b * x));
MPType out = x * temp1;
MPType temp2 = b * out;
MPType temp3 = temp1 * (one - temp2);
return static_cast<T>(dout * (temp2 + temp3));
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaMishFunctor : public BaseActivationFunctor<T> {
using MPType = typename details::MPTypeTrait<T>::Type;
...
...
@@ -488,58 +399,6 @@ struct CudaMishGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaHardSwishFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
float threshold;
float scale;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}};
}
// hard_swish(x) = 0, when x <= -offset
// x , when x >= threshold - offset
// x * (x + offset) / scale, otherwise
// threshold = scale = 6, offset = 3 by default
__device__ __forceinline__ T operator()(const T x) const {
T t = static_cast<T>(threshold);
T temp = x + static_cast<T>(offset);
T temp_max = temp > zero ? temp : zero;
T temp_min = temp_max < t ? temp_max : t;
return temp_min * x / static_cast<T>(scale);
}
};
template <typename T>
struct CudaHardSwishGradFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
T one = static_cast<T>(1.0f);
T two = static_cast<T>(2.0f);
float threshold;
float scale;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}, {"scale", &scale}, {"offset", &offset}};
}
// dx = 0, when x <= -offset
// dout , when x >= threshold - offset
// dout * (2 * x / scale + offset / scale), otherwise
// threshold = scale = 6, offset = 3 by default
__device__ __forceinline__ T operator()(const T dout, const T x) const {
T o = static_cast<T>(offset);
T s = static_cast<T>(scale);
T temp1 = static_cast<T>(x + o > zero);
T temp2 = static_cast<T>(x + o < static_cast<T>(threshold));
return dout * (temp1 * temp2 * (two * x + o) / s + one - temp2);
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};
template <typename T>
struct CudaCELUFunctor : public BaseActivationFunctor<T> {
using CT = typename details::MPTypeTrait<T>::Type;
...
...
@@ -684,6 +543,20 @@ USE_PHI_FUNCTOR(CudaLog)
USE_PHI_FUNCTOR(CudaLog2)
USE_PHI_FUNCTOR(CudaLog10)
USE_PHI_FUNCTOR(CudaLog1p)
USE_PHI_FUNCTOR(CudaSwish)
USE_PHI_FUNCTOR(CudaHardSwish)
template <typename T>
using CudaRoundFunctor = phi::funcs::CudaRoundFunctor<T>;
template <typename T>
using CudaFloorFunctor = phi::funcs::CudaFloorFunctor<T>;
template <typename T>
using CudaCeilFunctor = phi::funcs::CudaCeilFunctor<T>;
template <typename T>
using CudaZeroGradFunctor = phi::funcs::CudaZeroGradFunctor<T>;
template <typename T>
using CudaELUGradNegativeAlphaFunctor =
...
...
@@ -813,23 +686,6 @@ REGISTER_OP_CUDA_KERNEL(
ops::SquareGradGradFunctor<int64_t>>);
/* ========================================================================== */
/* ========================== pow register ============================ */
REGISTER_OP_CUDA_KERNEL(
pow, ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<float>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<double>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<int>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<int64_t>>,
ops::PowKernel<plat::CUDADeviceContext, ops::PowFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
pow_grad,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<float>>,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<double>>,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<int>>,
ops::PowGradKernel<plat::CUDADeviceContext, ops::PowGradFunctor<int64_t>>,
ops::PowGradKernel<plat::CUDADeviceContext,
ops::PowGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== logit register ============================ */
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
...
...
@@ -889,9 +745,6 @@ REGISTER_OP_CUDA_KERNEL(
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \
CudaSoftShrinkGradFunctor); \
__macro(ceil, Ceil, CudaCeilFunctor, CudaZeroGradFunctor); \
__macro(floor, Floor, CudaFloorFunctor, CudaZeroGradFunctor); \
__macro(round, Round, CudaRoundFunctor, CudaZeroGradFunctor); \
__macro(reciprocal, Reciprocal, CudaReciprocalFunctor, \
CudaReciprocalGradFunctor); \
__macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \
...
...
@@ -903,10 +756,7 @@ REGISTER_OP_CUDA_KERNEL(
CudaTanhShrinkGradFunctor); \
__macro(hard_shrink, HardShrink, CudaHardShrinkFunctor, \
CudaHardShrinkGradFunctor); \
__macro(swish, Swish, CudaSwishFunctor, CudaSwishGradFunctor); \
__macro(mish, Mish, CudaMishFunctor, CudaMishGradFunctor); \
__macro(hard_swish, HardSwish, CudaHardSwishFunctor, \
CudaHardSwishGradFunctor);
__macro(mish, Mish, CudaMishFunctor, CudaMishGradFunctor);
FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
#ifdef PADDLE_WITH_XPU_KP
...
...
paddle/phi/kernels/activation_grad_kernel.h
浏览文件 @
be5918e0
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h"
...
...
@@ -50,6 +51,11 @@ namespace phi {
const DenseTensor& dout, \
DenseTensor* dx);
#define DECLARE_ACTIVATION_GRAD_KERNEL_NODEP(name) \
template <typename T, typename Context> \
void name##GradKernel( \
const Context& dev_ctx, const DenseTensor& dout, DenseTensor* dx);
#define DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPOUT(name, attr) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
...
...
@@ -143,6 +149,22 @@ void LogDoubleGradKernel(const Context& dev_ctx,
DenseTensor
*
dx
,
DenseTensor
*
ddout
);
template
<
typename
T
,
typename
Context
>
void
HardSwishGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
float
threshold
,
float
scale
,
float
offset
,
DenseTensor
*
dx
);
template
<
typename
T
,
typename
Context
>
void
PowGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
const
Scalar
&
factor
,
DenseTensor
*
dx
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Cos
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Tan
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPX
(
Acos
);
...
...
@@ -166,10 +188,15 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
);
DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sigmoid
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Round
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Floor
);
DECLARE_ACTIVATION_GRAD_KERNEL_NODEP
(
Ceil
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
alpha
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
ThresholdedRelu
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
SoftShrink
,
lambda
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
threshold
);
DECLARE_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
beta
);
DECLARE_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
t_min
,
t_max
);
...
...
paddle/phi/kernels/activation_kernel.h
浏览文件 @
be5918e0
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h"
...
...
@@ -60,13 +61,32 @@ DECLARE_ACTIVATION_KERNEL(Log)
DECLARE_ACTIVATION_KERNEL
(
Log2
)
DECLARE_ACTIVATION_KERNEL
(
Log10
)
DECLARE_ACTIVATION_KERNEL
(
Log1p
)
DECLARE_ACTIVATION_KERNEL
(
Round
)
DECLARE_ACTIVATION_KERNEL
(
Floor
)
DECLARE_ACTIVATION_KERNEL
(
Ceil
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
alpha
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
threshold
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
lambda
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
threshold
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
Elu
,
alpha
)
DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS
(
Swish
,
beta
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
t_min
,
t_max
)
DECLARE_ACTIVATION_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
slope
,
offset
)
template
<
typename
T
,
typename
Context
>
void
HardSwishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
threshold
,
float
scale
,
float
offset
,
DenseTensor
*
out
);
template
<
typename
T
,
typename
Context
>
void
PowKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
Scalar
&
factor
,
DenseTensor
*
out
);
}
// namespace phi
paddle/phi/kernels/cpu/activation_grad_kernel.cc
浏览文件 @
be5918e0
...
...
@@ -107,6 +107,15 @@ namespace phi {
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_NODEP(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel( \
const Context& dev_ctx, const DenseTensor& dout, DenseTensor* dx) { \
funcs::functor_class<T> functor; \
ActivationGradImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, nullptr, nullptr, &dout, dx, functor); \
}
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Cos
,
CosGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Tan
,
TanGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Acos
,
AcosGradFunctor
);
...
...
@@ -130,6 +139,10 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, ReluGradFunctor);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
TanhGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sigmoid
,
SigmoidGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_NODEP
(
Round
,
ZeroGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_NODEP
(
Floor
,
ZeroGradFunctor
);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_NODEP
(
Ceil
,
ZeroGradFunctor
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
LeakyRelu
,
LeakyReluGradFunctor
,
alpha
);
...
...
@@ -142,6 +155,7 @@ DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
HardShrinkGradFunctor
,
threshold
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
SwishGradFunctor
,
beta
);
DEFINE_CPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
BReluGradFunctor
,
...
...
@@ -183,6 +197,23 @@ void EluGradKernel(const Context& dev_ctx,
}
}
template
<
typename
T
,
typename
Context
>
void
HardSwishGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
float
threshold
,
float
scale
,
float
offset
,
DenseTensor
*
dx
)
{
funcs
::
HardSwishGradFunctor
<
T
>
functor
;
auto
attrs
=
functor
.
GetAttrs
();
*
(
attrs
[
0
].
second
)
=
threshold
;
*
(
attrs
[
1
].
second
)
=
scale
;
*
(
attrs
[
2
].
second
)
=
offset
;
ActivationGradImpl
<
T
,
Context
,
funcs
::
HardSwishGradFunctor
<
T
>>
(
dev_ctx
,
&
x
,
nullptr
,
&
dout
,
dx
,
functor
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
...
...
@@ -242,3 +273,17 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
log10_grad
,
Log10GradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
log1p_grad
,
Log1pGradKernel
)
PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL
(
log_double_grad
,
LogDoubleGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_swish_grad
,
HardSwishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
swish_grad
,
SwishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
round_grad
,
RoundGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
floor_grad
,
FloorGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
ceil_grad
,
CeilGradKernel
)
PD_REGISTER_KERNEL
(
pow_grad
,
CPU
,
ALL_LAYOUT
,
phi
::
PowGradKernel
,
float
,
double
,
int
,
int64_t
)
{}
paddle/phi/kernels/cpu/activation_kernel.cc
浏览文件 @
be5918e0
...
...
@@ -78,6 +78,9 @@ DEFINE_CPU_ACTIVATION_KERNEL(Log, LogFunctor)
DEFINE_CPU_ACTIVATION_KERNEL
(
Log2
,
Log2Functor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Log10
,
Log10Functor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Log1p
,
Log1pFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Round
,
RoundFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Floor
,
FloorFunctor
)
DEFINE_CPU_ACTIVATION_KERNEL
(
Ceil
,
CeilFunctor
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
LeakyReluFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
...
...
@@ -86,6 +89,7 @@ DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu,
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
HardShrink
,
HardShrinkFunctor
,
threshold
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
SoftShrinkFunctor
,
lambda
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
ELUFunctor
,
alpha
)
DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Swish
,
SwishFunctor
,
beta
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
BReluFunctor
,
t_min
,
t_max
)
DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
...
...
@@ -93,6 +97,22 @@ DEFINE_CPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
slope
,
offset
)
template
<
typename
T
,
typename
Context
>
void
HardSwishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
threshold
,
float
scale
,
float
offset
,
DenseTensor
*
out
)
{
funcs
::
HardSwishFunctor
<
T
>
functor
;
auto
attrs
=
functor
.
GetAttrs
();
*
(
attrs
[
0
].
second
)
=
threshold
;
*
(
attrs
[
1
].
second
)
=
scale
;
*
(
attrs
[
2
].
second
)
=
offset
;
ActivationImpl
<
T
,
Context
,
funcs
::
HardSwishFunctor
<
T
>>
(
dev_ctx
,
x
,
out
,
functor
);
}
}
// namespace phi
PD_REGISTER_KERNEL
(
relu
,
CPU
,
ALL_LAYOUT
,
phi
::
ReluKernel
,
float
,
double
)
{}
...
...
@@ -126,3 +146,10 @@ PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
log2
,
Log2Kernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
log10
,
Log10Kernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
log1p
,
Log1pKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
hard_swish
,
HardSwishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
swish
,
SwishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
round
,
RoundKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
floor
,
FloorKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
ceil
,
CeilKernel
)
PD_REGISTER_KERNEL
(
pow
,
CPU
,
ALL_LAYOUT
,
phi
::
PowKernel
,
float
,
double
,
int
,
int64_t
)
{}
paddle/phi/kernels/funcs/activation_functor.h
浏览文件 @
be5918e0
...
...
@@ -1350,6 +1350,165 @@ struct LogGradGradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// HardSwish = min(max(0, x+3), 6) * x / 6
template
<
typename
T
>
struct
HardSwishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
float
scale
;
float
offset
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
},
{
"scale"
,
&
scale
},
{
"offset"
,
&
offset
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
(
x
+
static_cast
<
T
>
(
offset
))
.
cwiseMax
(
static_cast
<
T
>
(
0
))
.
cwiseMin
(
static_cast
<
T
>
(
threshold
))
*
x
/
static_cast
<
T
>
(
scale
);
}
};
template
<
typename
T
>
struct
HardSwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
threshold
;
float
scale
;
float
offset
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
},
{
"scale"
,
&
scale
},
{
"offset"
,
&
offset
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
tmp
=
((
x
+
static_cast
<
T
>
(
offset
))
<
static_cast
<
T
>
(
threshold
))
.
template
cast
<
T
>();
dx
.
device
(
d
)
=
dout
*
(((
x
+
static_cast
<
T
>
(
offset
))
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>()
*
(
static_cast
<
T
>
(
2
)
*
x
+
static_cast
<
T
>
(
offset
))
/
static_cast
<
T
>
(
scale
)
*
tmp
+
static_cast
<
T
>
(
1
)
*
(
static_cast
<
T
>
(
1
)
-
tmp
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
SwishFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
}
};
template
<
typename
T
>
struct
SwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
fake_out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
auto
out
=
x
*
temp1
;
auto
temp2
=
temp1
*
(
static_cast
<
T
>
(
1
)
-
(
static_cast
<
T
>
(
beta
)
*
out
));
dx
.
device
(
d
)
=
dout
*
((
static_cast
<
T
>
(
beta
)
*
out
)
+
temp2
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5198
template
<
typename
T
>
struct
PowFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
factor
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"factor"
,
&
factor
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
pow
(
static_cast
<
T
>
(
factor
));
}
};
template
<
typename
T
>
struct
PowGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
float
factor
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"factor"
,
&
factor
}};
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
factor
)
*
x
.
pow
(
static_cast
<
T
>
(
factor
)
-
static_cast
<
T
>
(
1
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
// floor(x) = flooring(x)
template
<
typename
T
>
struct
FloorFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
floor
();
}
};
// round(x) = [x]
template
<
typename
T
>
struct
RoundFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
round
();
}
};
// ceil(x) = ceiling(x)
template
<
typename
T
>
struct
CeilFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
ceil
();
}
};
template
<
typename
T
>
struct
ZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0
)
*
out
;
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kNoDeps
;
}
};
#if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__)
template
<
typename
T
>
struct
CudaReluFunctor
:
public
BaseActivationFunctor
<
T
>
{
...
...
@@ -2190,6 +2349,147 @@ struct CudaLog10GradFunctor : public BaseActivationFunctor<T> {
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaSwishFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
// swish(x) = x / (1 + exp(-beta * x))
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
return
static_cast
<
T
>
(
x
/
(
one
+
exp
(
-
b
*
x
)));
}
};
template
<
typename
T
>
struct
CudaSwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
MPType
one
=
static_cast
<
MPType
>
(
1.0
f
);
float
beta
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"beta"
,
&
beta
}};
}
// dx = dout * (1 + exp(-b * x) + b * x * exp(-b * x) / (1 + exp(-b * x))^2)
__device__
__forceinline__
T
operator
()(
const
T
arg_dout
,
const
T
arg_x
)
const
{
MPType
dout
=
static_cast
<
MPType
>
(
arg_dout
);
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
MPType
b
=
static_cast
<
MPType
>
(
beta
);
MPType
temp1
=
one
/
(
one
+
exp
(
-
b
*
x
));
MPType
out
=
x
*
temp1
;
MPType
temp2
=
b
*
out
;
MPType
temp3
=
temp1
*
(
one
-
temp2
);
return
static_cast
<
T
>
(
dout
*
(
temp2
+
temp3
));
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaHardSwishFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
float
threshold
;
float
scale
;
float
offset
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
},
{
"scale"
,
&
scale
},
{
"offset"
,
&
offset
}};
}
// hard_swish(x) = 0, when x <= -offset
// x , when x >= threshold - offset
// x * (x + offset) / scale, otherwise
// threshold = scale = 6, offset = 3 by default
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
T
t
=
static_cast
<
T
>
(
threshold
);
T
temp
=
x
+
static_cast
<
T
>
(
offset
);
T
temp_max
=
temp
>
zero
?
temp
:
zero
;
T
temp_min
=
temp_max
<
t
?
temp_max
:
t
;
return
temp_min
*
x
/
static_cast
<
T
>
(
scale
);
}
};
template
<
typename
T
>
struct
CudaHardSwishGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
T
zero
=
static_cast
<
T
>
(
0.0
f
);
T
one
=
static_cast
<
T
>
(
1.0
f
);
T
two
=
static_cast
<
T
>
(
2.0
f
);
float
threshold
;
float
scale
;
float
offset
;
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
},
{
"scale"
,
&
scale
},
{
"offset"
,
&
offset
}};
}
// dx = 0, when x <= -offset
// dout , when x >= threshold - offset
// dout * (2 * x / scale + offset / scale), otherwise
// threshold = scale = 6, offset = 3 by default
__device__
__forceinline__
T
operator
()(
const
T
dout
,
const
T
x
)
const
{
T
o
=
static_cast
<
T
>
(
offset
);
T
s
=
static_cast
<
T
>
(
scale
);
T
temp1
=
static_cast
<
T
>
(
x
+
o
>
zero
);
T
temp2
=
static_cast
<
T
>
(
x
+
o
<
static_cast
<
T
>
(
threshold
));
return
dout
*
(
temp1
*
temp2
*
(
two
*
x
+
o
)
/
s
+
one
-
temp2
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kDepX
;
}
};
template
<
typename
T
>
struct
CudaCeilFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// ceil(x) = ceil(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
ceil
(
x
));
}
};
template
<
typename
T
>
struct
CudaFloorFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// floor(x) = floor(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
floor
(
x
));
}
};
template
<
typename
T
>
struct
CudaRoundFunctor
:
public
BaseActivationFunctor
<
T
>
{
using
MPType
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
// round(x) = round(x)
__device__
__forceinline__
T
operator
()(
const
T
arg_x
)
const
{
MPType
x
=
static_cast
<
MPType
>
(
arg_x
);
return
static_cast
<
T
>
(
round
(
x
));
}
};
// GradFunctor for ceil, floor and round
template
<
typename
T
>
struct
CudaZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
__device__
__forceinline__
T
operator
()(
const
T
x
)
const
{
return
static_cast
<
T
>
(
0.0
f
);
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
ActBwdOpFwdDeps
::
kNoDeps
;
}
};
#endif
}
// namespace funcs
...
...
paddle/phi/kernels/gpu/activation_grad_kernel.cu
浏览文件 @
be5918e0
...
...
@@ -159,10 +159,23 @@ void ActivationGradGPUImpl(const Context& dev_ctx,
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_NODEP(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel( \
const Context& dev_ctx, const DenseTensor& dout, DenseTensor* dx) { \
funcs::functor_class<T> functor; \
ActivationGradGPUImpl<T, Context, funcs::functor_class<T>>( \
dev_ctx, nullptr, nullptr, &dout, dx, functor); \
}
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Relu
,
CudaReluGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Tanh
,
CudaTanhGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPOUT
(
Sigmoid
,
CudaSigmoidGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_NODEP
(
Round
,
CudaZeroGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_NODEP
(
Floor
,
CudaZeroGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_NODEP
(
Ceil
,
CudaZeroGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Cos
,
CudaCosGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Tan
,
CudaTanGradFunctor
);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX
(
Acos
,
CudaAcosGradFunctor
);
...
...
@@ -194,6 +207,9 @@ DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(SoftShrink,
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
HardShrink
,
CudaHardShrinkGradFunctor
,
threshold
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX
(
Swish
,
CudaSwishGradFunctor
,
beta
);
DEFINE_GPU_ACT_GRAD_KERNEL_WITH_TWO_ATTRS_DEPX
(
BRelu
,
CudaBReluGradFunctor
,
...
...
@@ -227,6 +243,23 @@ void EluGradKernel(const Context& dev_ctx,
}
}
template
<
typename
T
,
typename
Context
>
void
HardSwishGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
float
threshold
,
float
scale
,
float
offset
,
DenseTensor
*
dx
)
{
funcs
::
CudaHardSwishGradFunctor
<
T
>
functor
;
auto
attrs
=
functor
.
GetAttrs
();
*
(
attrs
[
0
].
second
)
=
threshold
;
*
(
attrs
[
1
].
second
)
=
scale
;
*
(
attrs
[
2
].
second
)
=
offset
;
ActivationGradGPUImpl
<
T
,
Context
,
funcs
::
CudaHardSwishGradFunctor
<
T
>>
(
dev_ctx
,
&
x
,
nullptr
,
&
dout
,
dx
,
functor
);
}
}
// namespace phi
#ifdef PADDLE_WITH_HIP
...
...
@@ -315,3 +348,18 @@ PD_REGISTER_KERNEL(log_double_grad,
float
,
double
,
phi
::
dtype
::
float16
)
{}
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
hard_swish_grad
,
HardSwishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
swish_grad
,
SwishGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
round_grad
,
RoundGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
floor_grad
,
FloorGradKernel
)
PD_REGISTER_ACTIVATION_GRAD_KERNEL
(
ceil_grad
,
CeilGradKernel
)
PD_REGISTER_KERNEL
(
pow_grad
,
GPU
,
ALL_LAYOUT
,
phi
::
PowGradKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/gpu/activation_kernel.cu
浏览文件 @
be5918e0
...
...
@@ -97,6 +97,9 @@ DEFINE_GPU_ACTIVATION_KERNEL(Log, CudaLogFunctor)
DEFINE_GPU_ACTIVATION_KERNEL
(
Log2
,
CudaLog2Functor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Log10
,
CudaLog10Functor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Log1p
,
CudaLog1pFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Round
,
CudaRoundFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Floor
,
CudaFloorFunctor
)
DEFINE_GPU_ACTIVATION_KERNEL
(
Ceil
,
CudaCeilFunctor
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
LeakyRelu
,
CudaLeakyReluFunctor
,
alpha
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
ThresholdedRelu
,
...
...
@@ -107,6 +110,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(HardShrink,
threshold
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
SoftShrink
,
CudaSoftShrinkFunctor
,
lambda
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Elu
,
CudaELUFunctor
,
alpha
)
DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS
(
Swish
,
CudaSwishFunctor
,
beta
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
BRelu
,
CudaBReluFunctor
,
t_min
,
t_max
)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS
(
HardSigmoid
,
...
...
@@ -114,6 +118,22 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
slope
,
offset
)
template
<
typename
T
,
typename
Context
>
void
HardSwishKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
float
threshold
,
float
scale
,
float
offset
,
DenseTensor
*
out
)
{
funcs
::
CudaHardSwishFunctor
<
T
>
functor
;
auto
attrs
=
functor
.
GetAttrs
();
*
(
attrs
[
0
].
second
)
=
threshold
;
*
(
attrs
[
1
].
second
)
=
scale
;
*
(
attrs
[
2
].
second
)
=
offset
;
ActivationGPUImpl
<
T
,
Context
,
funcs
::
CudaHardSwishFunctor
<
T
>>
(
dev_ctx
,
x
,
out
,
functor
);
}
}
// namespace phi
#ifdef PADDLE_WITH_HIP
...
...
@@ -172,3 +192,17 @@ PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel)
PD_REGISTER_ACTIVATION_KERNEL
(
log2
,
Log2Kernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
log10
,
Log10Kernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
log1p
,
Log1pKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
hard_swish
,
HardSwishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
swish
,
SwishKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
round
,
RoundKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
floor
,
FloorKernel
)
PD_REGISTER_ACTIVATION_KERNEL
(
ceil
,
CeilKernel
)
PD_REGISTER_KERNEL
(
pow
,
GPU
,
ALL_LAYOUT
,
phi
::
PowKernel
,
float
,
double
,
int
,
int64_t
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/impl/activation_grad_impl.h
浏览文件 @
be5918e0
...
...
@@ -293,4 +293,28 @@ void LogDoubleGradKernel(const Context& dev_ctx,
functor
(
dev_ctx
,
&
x
,
&
ddx
,
ddout
,
&
dout
,
dx
);
}
template
<
typename
T
,
typename
Context
>
void
PowGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
DenseTensor
&
dout
,
const
Scalar
&
factor
,
DenseTensor
*
dx
)
{
PADDLE_ENFORCE_NOT_NULL
(
dx
,
errors
::
NotFound
(
"The output DenseTensor dX can not be nullptr"
));
if
(
dx
)
{
dev_ctx
.
template
Alloc
<
T
>(
dx
);
}
auto
dout_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
&
dout
,
"Input"
,
"Out@GRAD"
,
"PowGrad"
));
auto
dx_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dx
,
"Output"
,
"X@GRAD"
,
"PowGrad"
));
auto
x_flatten
=
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
&
x
,
"Input"
,
"X"
,
"PowGrad"
));
auto
*
place
=
dev_ctx
.
eigen_device
();
phi
::
funcs
::
PowGradFunctor
<
T
>
functor
;
auto
attrs
=
functor
.
GetAttrs
();
*
(
attrs
[
0
].
second
)
=
factor
.
to
<
float
>
();
functor
(
*
place
,
x_flatten
,
nullptr
,
dout_flatten
,
dx_flatten
);
}
}
// namespace phi
paddle/phi/kernels/impl/activation_impl.h
浏览文件 @
be5918e0
...
...
@@ -47,4 +47,23 @@ void ActivationImpl(const Context& dev_ctx,
}
}
template
<
typename
T
,
typename
Context
>
void
PowKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
Scalar
&
factor
,
DenseTensor
*
out
)
{
PADDLE_ENFORCE_NOT_NULL
(
out
,
errors
::
NotFound
(
"Output Out should not be nullptr"
));
dev_ctx
.
template
Alloc
<
T
>(
out
);
auto
x_flatten
=
phi
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
&
x
,
"Input"
,
"X"
,
"Activation"
));
auto
out_flatten
=
phi
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
out
,
"Output"
,
"Out"
,
"Activation"
));
auto
*
place
=
dev_ctx
.
eigen_device
();
phi
::
funcs
::
PowFunctor
<
T
>
functor
;
auto
attrs
=
functor
.
GetAttrs
();
*
(
attrs
[
0
].
second
)
=
factor
.
to
<
float
>
();
functor
(
*
place
,
x_flatten
,
out_flatten
);
}
}
// namespace phi
paddle/phi/ops/compat/activation_sig.cc
浏览文件 @
be5918e0
...
...
@@ -34,6 +34,13 @@ namespace phi {
{GradVarName("X")}); \
}
#define DEFINE_ACT_GRAD_NODEP_OP_ARGMAP(func_name, op_name, attrs) \
KernelSignature func_name##GradOpArgumentMapping( \
const ArgumentMappingContext& ctx) { \
return KernelSignature( \
op_name "_grad", {GradVarName("Out")}, {attrs}, {GradVarName("X")}); \
}
#define comma ,
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Cos
,
"cos"
,
);
// NOLINT
...
...
@@ -61,6 +68,11 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log, "log", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Log2
,
"log2"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Log10
,
"log10"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Log1p
,
"log1p"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
HardSwish
,
"hard_swish"
,
"threshold"
comma
"scale"
comma
"offset"
);
// NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP
(
Swish
,
"swish"
,
"beta"
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Relu
,
"relu"
,
);
// NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP
(
Tanh
,
"tanh"
,
);
// NOLINT
...
...
@@ -69,6 +81,10 @@ DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(HardSigmoid,
"hard_sigmoid"
,
"slope"
comma
"offset"
);
// NOLINT
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Round
,
"round"
,
);
// NOLINT
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Floor
,
"floor"
,
);
// NOLINT
DEFINE_ACT_GRAD_NODEP_OP_ARGMAP
(
Ceil
,
"ceil"
,
);
// NOLINT
KernelSignature
ReluDoubleGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
return
KernelSignature
(
"relu_double_grad"
,
{
"Out"
,
"DDX"
},
{},
{
"DDOut"
});
...
...
@@ -135,6 +151,26 @@ KernelSignature LogDoubleGradOpArgumentMapping(
"log_double_grad"
,
{
"X"
,
"DOut"
,
"DDX"
},
{},
{
"DX"
,
"DDOut"
});
}
KernelSignature
PowOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
HasInput
(
"FactorTensor"
))
{
return
KernelSignature
(
"pow"
,
{
"X"
},
{
"FactorTensor"
},
{
"Out"
});
}
else
{
return
KernelSignature
(
"pow"
,
{
"X"
},
{
"factor"
},
{
"Out"
});
}
}
KernelSignature
PowGradOpArgumentMapping
(
const
ArgumentMappingContext
&
ctx
)
{
if
(
ctx
.
HasInput
(
"FactorTensor"
))
{
return
KernelSignature
(
"pow_grad"
,
{
"X"
,
GradVarName
(
"Out"
)},
{
"FactorTensor"
},
{
GradVarName
(
"X"
)});
}
else
{
return
KernelSignature
(
"pow_grad"
,
{
"X"
,
GradVarName
(
"Out"
)},
{
"factor"
},
{
GradVarName
(
"X"
)});
}
}
}
// namespace phi
PD_REGISTER_BASE_KERNEL_NAME
(
relu_grad_grad
,
relu_double_grad
);
...
...
@@ -197,3 +233,11 @@ PD_REGISTER_ARG_MAPPING_FN(log_grad_grad, phi::LogDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN
(
log2_grad
,
phi
::
Log2GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log10_grad
,
phi
::
Log10GradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
log1p_grad
,
phi
::
Log1pGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
hard_swish_grad
,
phi
::
HardSwishGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
swish_grad
,
phi
::
SwishGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
round_grad
,
phi
::
RoundGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
floor_grad
,
phi
::
FloorGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
ceil_grad
,
phi
::
CeilGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
pow_grad
,
phi
::
PowGradOpArgumentMapping
);
PD_REGISTER_ARG_MAPPING_FN
(
pow
,
phi
::
PowOpArgumentMapping
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录