Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
bff0cbfc
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bff0cbfc
编写于
12月 26, 2017
作者:
F
fengjiayi
提交者:
GitHub
12月 26, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7025 from JiayiFeng/rename_output_of_softmax_and_activitions
Rename output of softmax and activations
上级
073746f5
e41a71ce
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
353 addition
and
323 deletion
+353
-323
paddle/operators/activation_op.cc
paddle/operators/activation_op.cc
+59
-59
paddle/operators/activation_op.h
paddle/operators/activation_op.h
+208
-179
paddle/operators/softmax_op.cc
paddle/operators/softmax_op.cc
+11
-11
paddle/operators/softmax_op.h
paddle/operators/softmax_op.h
+6
-6
python/paddle/v2/fluid/layer_helper.py
python/paddle/v2/fluid/layer_helper.py
+1
-1
python/paddle/v2/fluid/layers/nn.py
python/paddle/v2/fluid/layers/nn.py
+10
-9
python/paddle/v2/fluid/tests/test_activation_op.py
python/paddle/v2/fluid/tests/test_activation_op.py
+54
-54
python/paddle/v2/fluid/tests/test_net.py
python/paddle/v2/fluid/tests/test_net.py
+2
-2
python/paddle/v2/fluid/tests/test_softmax_op.py
python/paddle/v2/fluid/tests/test_softmax_op.py
+2
-2
未找到文件。
paddle/operators/activation_op.cc
浏览文件 @
bff0cbfc
...
...
@@ -22,8 +22,8 @@ class ActivationOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
"
Y
"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"
Y
"
);
ctx
->
SetOutputDim
(
"
Out
"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"
Out
"
);
}
};
...
...
@@ -32,7 +32,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"
Y
"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"
Out
"
));
}
};
...
...
@@ -41,11 +41,11 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
SigmoidOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Sigmoid operator"
);
AddOutput
(
"
Y
"
,
"Output of Sigmoid operator"
);
AddOutput
(
"
Out
"
,
"Output of Sigmoid operator"
);
AddComment
(
R"DOC(
Sigmoid Activation Operator
$$
y
= \frac{1}{1 + e^{-x}}$$
$$
out
= \frac{1}{1 + e^{-x}}$$
)DOC"
);
}
...
...
@@ -56,11 +56,11 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
LogSigmoidOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of LogSigmoid operator"
);
AddOutput
(
"
Y
"
,
"Output of LogSigmoid operator"
);
AddOutput
(
"
Out
"
,
"Output of LogSigmoid operator"
);
AddComment
(
R"DOC(
Logsigmoid Activation Operator
$$
y
= \log \frac{1}{1 + e^{-x}}$$
$$
out
= \log \frac{1}{1 + e^{-x}}$$
)DOC"
);
}
...
...
@@ -71,11 +71,11 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
ExpOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Exp operator"
);
AddOutput
(
"
Y
"
,
"Output of Exp operator"
);
AddOutput
(
"
Out
"
,
"Output of Exp operator"
);
AddComment
(
R"DOC(
Exp Activation Operator.
$
y
= e^x$
$
out
= e^x$
)DOC"
);
}
...
...
@@ -86,11 +86,11 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
ReluOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Relu operator"
);
AddOutput
(
"
Y
"
,
"Output of Relu operator"
);
AddOutput
(
"
Out
"
,
"Output of Relu operator"
);
AddComment
(
R"DOC(
Relu Activation Operator.
$
y
= \max(x, 0)$
$
out
= \max(x, 0)$
)DOC"
);
}
...
...
@@ -101,12 +101,12 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
LeakyReluOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of LeakyRelu operator"
);
AddOutput
(
"
Y
"
,
"Output of LeakyRelu operator"
);
AddOutput
(
"
Out
"
,
"Output of LeakyRelu operator"
);
AddAttr
<
float
>
(
"alpha"
,
"The small negative slope"
).
SetDefault
(
0.02
f
);
AddComment
(
R"DOC(
LeakyRelu Activation Operator.
$
y
= \max(x, \alpha * x)$
$
out
= \max(x, \alpha * x)$
)DOC"
);
}
...
...
@@ -117,13 +117,13 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
SoftShrinkOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Softshrink operator"
);
AddOutput
(
"
Y
"
,
"Output of Softshrink operator"
);
AddOutput
(
"
Out
"
,
"Output of Softshrink operator"
);
AddAttr
<
float
>
(
"lambda"
,
"non-negative offset"
).
SetDefault
(
0.5
f
);
AddComment
(
R"DOC(
Softshrink Activation Operator.
$$
y
= \begin{cases}
out
= \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
...
...
@@ -139,11 +139,11 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
TanhOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Tanh operator"
);
AddOutput
(
"
Y
"
,
"Output of Tanh operator"
);
AddOutput
(
"
Out
"
,
"Output of Tanh operator"
);
AddComment
(
R"DOC(
Tanh Activation Operator.
$$
y
= \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$
out
= \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
);
}
...
...
@@ -154,11 +154,11 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
TanhShrinkOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of TanhShrink operator"
);
AddOutput
(
"
Y
"
,
"Output of TanhShrink operator"
);
AddOutput
(
"
Out
"
,
"Output of TanhShrink operator"
);
AddComment
(
R"DOC(
TanhShrink Activation Operator.
$$
y
= x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$
out
= x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
);
}
...
...
@@ -169,14 +169,14 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
HardShrinkOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of HardShrink operator"
);
AddOutput
(
"
Y
"
,
"Output of HardShrink operator"
);
AddOutput
(
"
Out
"
,
"Output of HardShrink operator"
);
AddAttr
<
float
>
(
"threshold"
,
"The value of threshold for HardShrink"
)
.
SetDefault
(
0.5
f
);
AddComment
(
R"DOC(
HardShrink Activation Operator.
$$
y
= \begin{cases}
out
= \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
...
...
@@ -192,11 +192,11 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
SqrtOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Sqrt operator"
);
AddOutput
(
"
Y
"
,
"Output of Sqrt operator"
);
AddOutput
(
"
Out
"
,
"Output of Sqrt operator"
);
AddComment
(
R"DOC(
Sqrt Activation Operator.
$
y
= \sqrt{x}$
$
out
= \sqrt{x}$
)DOC"
);
}
...
...
@@ -207,11 +207,11 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
AbsOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Abs operator"
);
AddOutput
(
"
Y
"
,
"Output of Abs operator"
);
AddOutput
(
"
Out
"
,
"Output of Abs operator"
);
AddComment
(
R"DOC(
Abs Activation Operator.
$
y
= |x|$
$
out
= |x|$
)DOC"
);
}
...
...
@@ -222,11 +222,11 @@ class CeilOpMaker : public framework::OpProtoAndCheckerMaker {
CeilOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Ceil operator"
);
AddOutput
(
"
Y
"
,
"Output of Ceil operator"
);
AddOutput
(
"
Out
"
,
"Output of Ceil operator"
);
AddComment
(
R"DOC(
Ceil Activation Operator.
$
y
= ceil(x)$
$
out
= ceil(x)$
)DOC"
);
}
...
...
@@ -237,11 +237,11 @@ class FloorOpMaker : public framework::OpProtoAndCheckerMaker {
FloorOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Floor operator"
);
AddOutput
(
"
Y
"
,
"Output of Floor operator"
);
AddOutput
(
"
Out
"
,
"Output of Floor operator"
);
AddComment
(
R"DOC(
Floor Activation Operator.
$
y
= floor(x)$
$
out
= floor(x)$
)DOC"
);
}
...
...
@@ -252,11 +252,11 @@ class RoundOpMaker : public framework::OpProtoAndCheckerMaker {
RoundOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Round operator"
);
AddOutput
(
"
Y
"
,
"Output of Round operator"
);
AddOutput
(
"
Out
"
,
"Output of Round operator"
);
AddComment
(
R"DOC(
Round Activation Operator.
$
y
= [x]$
$
out
= [x]$
)DOC"
);
}
...
...
@@ -267,11 +267,11 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
ReciprocalOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Reciprocal operator"
);
AddOutput
(
"
Y
"
,
"Output of Reciprocal operator"
);
AddOutput
(
"
Out
"
,
"Output of Reciprocal operator"
);
AddComment
(
R"DOC(
Reciprocal Activation Operator.
$$
y
= \frac{1}{x}$$
$$
out
= \frac{1}{x}$$
)DOC"
);
}
...
...
@@ -282,11 +282,11 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker {
LogOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Log operator"
);
AddOutput
(
"
Y
"
,
"Output of Log operator"
);
AddOutput
(
"
Out
"
,
"Output of Log operator"
);
AddComment
(
R"DOC(
Log Activation Operator.
$
y
= \ln(x)$
$
out
= \ln(x)$
Natural logarithm of x.
...
...
@@ -299,11 +299,11 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
SquareOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Square operator"
);
AddOutput
(
"
Y
"
,
"Output of Square operator"
);
AddOutput
(
"
Out
"
,
"Output of Square operator"
);
AddComment
(
R"DOC(
Square Activation Operator.
$
y
= x^2$
$
out
= x^2$
)DOC"
);
}
...
...
@@ -314,11 +314,11 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
SoftplusOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Softplus operator"
);
AddOutput
(
"
Y
"
,
"Output of Softplus operator"
);
AddOutput
(
"
Out
"
,
"Output of Softplus operator"
);
AddComment
(
R"DOC(
Softplus Activation Operator.
$
y
= \ln(1 + e^{x})$
$
out
= \ln(1 + e^{x})$
)DOC"
);
}
...
...
@@ -329,11 +329,11 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
SoftsignOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Softsign operator"
);
AddOutput
(
"
Y
"
,
"Output of Softsign operator"
);
AddOutput
(
"
Out
"
,
"Output of Softsign operator"
);
AddComment
(
R"DOC(
Softsign Activation Operator.
$$
y
= \frac{x}{1 + |x|}$$
$$
out
= \frac{x}{1 + |x|}$$
)DOC"
);
}
...
...
@@ -344,7 +344,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
BReluOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of BRelu operator"
);
AddOutput
(
"
Y
"
,
"Output of BRelu operator"
);
AddOutput
(
"
Out
"
,
"Output of BRelu operator"
);
AddAttr
<
float
>
(
"t_min"
,
"The min marginal value of BRelu"
)
.
SetDefault
(
static_cast
<
float
>
(
0
));
AddAttr
<
float
>
(
"t_max"
,
"The max marginal value of BRelu"
)
...
...
@@ -352,7 +352,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
BRelu Activation Operator.
$
y
= \max(\min(x, t_{min}), t_{max})$
$
out
= \max(\min(x, t_{min}), t_{max})$
)DOC"
);
}
...
...
@@ -363,13 +363,13 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
SoftReluOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of SoftRelu operator"
);
AddOutput
(
"
Y
"
,
"Output of SoftRelu operator"
);
AddOutput
(
"
Out
"
,
"Output of SoftRelu operator"
);
AddAttr
<
float
>
(
"threshold"
,
"The threshold value of SoftRelu"
)
.
SetDefault
(
40.0
f
);
AddComment
(
R"DOC(
SoftRelu Activation Operator.
$
y
= \ln(1 + \exp(\max(\min(x, threshold), threshold))$
$
out
= \ln(1 + \exp(\max(\min(x, threshold), threshold))$
)DOC"
);
}
...
...
@@ -380,7 +380,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
ELUOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of ELU operator"
);
AddOutput
(
"
Y
"
,
"Output of ELU operator"
);
AddOutput
(
"
Out
"
,
"Output of ELU operator"
);
AddAttr
<
float
>
(
"alpha"
,
"The alpha value of ELU"
).
SetDefault
(
1.0
f
);
AddComment
(
R"DOC(
ELU Activation Operator.
...
...
@@ -388,7 +388,7 @@ ELU Activation Operator.
Applies the following element-wise computation on the input according to
https://arxiv.org/abs/1511.07289.
$
y
= \max(0, x) + \min(0, \alpha * (e^x - 1))$
$
out
= \max(0, x) + \min(0, \alpha * (e^x - 1))$
)DOC"
);
}
...
...
@@ -399,13 +399,13 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
Relu6OpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Relu6 operator"
);
AddOutput
(
"
Y
"
,
"Output of Relu6 operator"
);
AddOutput
(
"
Out
"
,
"Output of Relu6 operator"
);
AddAttr
<
float
>
(
"threshold"
,
"The threshold value of Relu6"
)
.
SetDefault
(
6.0
f
);
AddComment
(
R"DOC(
Relu6 Activation Operator.
$
y
= \min(\max(0, x), 6)$
$
out
= \min(\max(0, x), 6)$
)DOC"
);
}
...
...
@@ -416,12 +416,12 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker {
PowOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Pow operator"
);
AddOutput
(
"
Y
"
,
"Output of Pow operator"
);
AddOutput
(
"
Out
"
,
"Output of Pow operator"
);
AddAttr
<
float
>
(
"factor"
,
"The exponential factor of Pow"
).
SetDefault
(
1.0
f
);
AddComment
(
R"DOC(
Pow Activation Operator.
$
y
= x^{factor}$
$
out
= x^{factor}$
)DOC"
);
}
...
...
@@ -432,7 +432,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
STanhOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of STanh operator"
);
AddOutput
(
"
Y
"
,
"Output of STanh operator"
);
AddOutput
(
"
Out
"
,
"Output of STanh operator"
);
AddAttr
<
float
>
(
"scale_a"
,
"The scale parameter of a for the input"
)
.
SetDefault
(
2.0
f
/
3.0
f
);
AddAttr
<
float
>
(
"scale_b"
,
"The scale parameter of b for the input"
)
...
...
@@ -440,7 +440,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
STanh Activation Operator.
$$
y
= b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
$$
out
= b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
)DOC"
);
}
...
...
@@ -451,14 +451,14 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
ThresholdedReluOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of ThresholdedRelu operator"
);
AddOutput
(
"
Y
"
,
"Output of ThresholdedRelu operator"
);
AddOutput
(
"
Out
"
,
"Output of ThresholdedRelu operator"
);
AddAttr
<
float
>
(
"threshold"
,
"The threshold location of activation"
)
.
SetDefault
(
1.0
f
);
AddComment
(
R"DOC(
ThresholdedRelu Activation Operator.
$$
y
= \begin{cases}
out
= \begin{cases}
x, \text{if } x > threshold \\
0, \text{otherwise}
\end{cases}
...
...
@@ -473,7 +473,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
HardSigmoidOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of HardSigmoid operator"
);
AddOutput
(
"
Y
"
,
"Output of HardSigmoid operator"
);
AddOutput
(
"
Out
"
,
"Output of HardSigmoid operator"
);
AddAttr
<
float
>
(
"slope"
,
"Slope for linear approximation of sigmoid"
)
.
SetDefault
(
0.2
f
);
AddAttr
<
float
>
(
"offset"
,
"Offset for linear approximation of sigmoid"
)
...
...
@@ -484,7 +484,7 @@ HardSigmoid Activation Operator.
Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391),
which is much faster than sigmoid.
$
y
= \max(0, \min(1, slope * x + shift))$
$
out
= \max(0, \min(1, slope * x + shift))$
The slope should be positive. The offset can be either positive or negative.
The default slope and shift are set according to the above reference.
...
...
@@ -499,12 +499,12 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
SwishOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of Swish operator"
);
AddOutput
(
"
Y
"
,
"Output of Swish operator"
);
AddOutput
(
"
Out
"
,
"Output of Swish operator"
);
AddAttr
<
float
>
(
"beta"
,
"Constant beta of swish operator"
).
SetDefault
(
1.0
f
);
AddComment
(
R"DOC(
Swish Activation Operator.
$$
y
= \frac{x}{1 + e^{- \beta x}}$$
$$
out
= \frac{x}{1 + e^{- \beta x}}$$
)DOC"
);
}
...
...
paddle/operators/activation_op.h
浏览文件 @
bff0cbfc
...
...
@@ -27,11 +27,11 @@ class ActivationKernel
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
X
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
Y
=
context
.
Output
<
framework
::
Tensor
>
(
"Y
"
);
Y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
Out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out
"
);
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
X
);
auto
y
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
Y
);
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
Out
);
auto
*
place
=
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Functor
functor
;
...
...
@@ -40,7 +40,7 @@ class ActivationKernel
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
functor
(
*
place
,
x
,
y
);
functor
(
*
place
,
x
,
out
);
}
};
...
...
@@ -51,14 +51,15 @@ class ActivationGradKernel
using
T
=
typename
Functor
::
ELEMENT_TYPE
;
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
X
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
Y
=
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
*
dY
=
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
Out
=
context
.
Input
<
framework
::
Tensor
>
(
"Out"
);
auto
*
dOut
=
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dX
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
d
y
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dY
);
auto
d
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dOut
);
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
X
);
auto
y
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
Y
);
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
Out
);
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dX
);
auto
*
place
=
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
...
...
@@ -67,7 +68,7 @@ class ActivationGradKernel
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
functor
(
*
place
,
x
,
y
,
dy
,
dx
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
};
...
...
@@ -83,17 +84,18 @@ struct BaseActivationFunctor {
// sigmoid(x) = 1 / (1 + exp(-x))
template
<
typename
T
>
struct
SigmoidFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
-
x
).
exp
());
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
-
x
).
exp
());
}
};
template
<
typename
T
>
struct
SigmoidGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
y
*
(
static_cast
<
T
>
(
1
)
-
y
);
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
*
(
static_cast
<
T
>
(
1
)
-
out
);
}
};
...
...
@@ -101,7 +103,7 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
// For numerical stability, we can use the log-sum-exp trick:
// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
// We can rewrite the above equation as:
//
y
= -log( exp(0) + exp(-x)) [since exp(0) = 1]
//
out
= -log( exp(0) + exp(-x)) [since exp(0) = 1]
// = -log( exp(max(-x, 0) - max(-x, 0)) + exp(-x + max(-x, 0) - max(-x, 0)))
// = -log( exp(max(-x, 0)) * exp(-max(-x, 0)) - exp(max(-x, 0)) * exp(-x -
// max(-x, 0)))
...
...
@@ -112,10 +114,10 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
// + exp(-x - max(-x, 0))))
template
<
typename
T
>
struct
LogSigmoidFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp
=
(
-
x
).
cwiseMax
(
static_cast
<
T
>
(
0
));
// temp = max(-x, 0)
y
.
device
(
d
)
=
-
temp
-
(((
-
temp
).
exp
()
+
(
-
x
-
temp
).
exp
()).
log
());
out
.
device
(
d
)
=
-
temp
-
(((
-
temp
).
exp
()
+
(
-
x
-
temp
).
exp
()).
log
());
}
};
...
...
@@ -124,62 +126,66 @@ struct LogSigmoidFunctor : public BaseActivationFunctor<T> {
// exp(-x - max(-x, 0)))
template
<
typename
T
>
struct
LogSigmoidGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp
=
(
-
x
).
cwiseMax
(
static_cast
<
T
>
(
0
));
// temp = max(-x, 0)
dx
.
device
(
d
)
=
d
y
*
((
-
x
-
temp
).
exp
()
/
((
-
temp
).
exp
()
+
(
-
x
-
temp
).
exp
()));
d
out
*
((
-
x
-
temp
).
exp
()
/
((
-
temp
).
exp
()
+
(
-
x
-
temp
).
exp
()));
}
};
// exp(x) = e^x
template
<
typename
T
>
struct
ExpFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
exp
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
exp
();
}
};
template
<
typename
T
>
struct
ExpGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
y
;
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
out
;
}
};
// relu(x) = max(x, 0)
template
<
typename
T
>
struct
ReluFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
));
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
));
}
};
template
<
typename
T
>
struct
ReluGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
(
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
}
};
// tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template
<
typename
T
>
struct
TanhFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
tanh
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
tanh
();
}
};
template
<
typename
T
>
struct
TanhGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
(
static_cast
<
T
>
(
1
)
-
y
*
y
);
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
static_cast
<
T
>
(
1
)
-
out
*
out
);
}
};
...
...
@@ -187,17 +193,18 @@ struct TanhGradFunctor : public BaseActivationFunctor<T> {
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template
<
typename
T
>
struct
TanhShrinkFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
-
x
.
tanh
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
-
x
.
tanh
();
}
};
template
<
typename
T
>
struct
TanhShrinkGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
(
x
.
tanh
()
*
x
.
tanh
());
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
x
.
tanh
()
*
x
.
tanh
());
}
};
...
...
@@ -210,11 +217,11 @@ struct HardShrinkFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp1
=
(
x
<
static_cast
<
T
>
(
threshold
*
-
1
)).
template
cast
<
T
>().
eval
();
auto
temp2
=
(
x
>
static_cast
<
T
>
(
threshold
)).
template
cast
<
T
>().
eval
();
y
.
device
(
d
)
=
x
*
(
temp1
+
temp2
);
out
.
device
(
d
)
=
x
*
(
temp1
+
temp2
);
}
};
...
...
@@ -226,11 +233,12 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor<T> {
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
(
x
<
static_cast
<
T
>
(
threshold
*
-
1
)).
template
cast
<
T
>().
eval
();
auto
temp2
=
(
x
>
static_cast
<
T
>
(
threshold
)).
template
cast
<
T
>().
eval
();
dx
.
device
(
d
)
=
d
y
*
(
temp1
+
temp2
).
template
cast
<
T
>();
dx
.
device
(
d
)
=
d
out
*
(
temp1
+
temp2
).
template
cast
<
T
>();
}
};
...
...
@@ -243,12 +251,12 @@ struct SoftShrinkFunctor : public BaseActivationFunctor<T> {
return
{{
"lambda"
,
&
lambda
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
lambdaT
=
static_cast
<
T
>
(
lambda
);
auto
temp1
=
(
x
>
lambdaT
).
template
cast
<
T
>().
eval
();
auto
temp2
=
(
x
<
-
lambdaT
).
template
cast
<
T
>().
eval
();
y
.
device
(
d
)
=
temp1
*
(
x
-
lambdaT
)
+
temp2
*
(
x
+
lambdaT
);
out
.
device
(
d
)
=
temp1
*
(
x
-
lambdaT
)
+
temp2
*
(
x
+
lambdaT
);
}
};
...
...
@@ -258,46 +266,49 @@ struct SoftShrinkGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"lambda"
,
&
lambda
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
lambdaT
=
static_cast
<
T
>
(
lambda
);
auto
temp1
=
(
x
>
lambdaT
).
template
cast
<
T
>().
eval
();
auto
temp2
=
(
x
<
-
lambdaT
).
template
cast
<
T
>().
eval
();
dx
.
device
(
d
)
=
d
y
*
(
temp1
+
temp2
).
template
cast
<
T
>();
dx
.
device
(
d
)
=
d
out
*
(
temp1
+
temp2
).
template
cast
<
T
>();
}
};
// sqrt(x) = x^(1/2)
template
<
typename
T
>
struct
SqrtFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
sqrt
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
sqrt
();
}
};
template
<
typename
T
>
struct
SqrtGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
const
Y
y_conj
=
Eigen
::
numext
::
conj
(
y
);
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0.5
)
*
dy
/
y_conj
;
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
const
Out
out_conj
=
Eigen
::
numext
::
conj
(
out
);
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0.5
)
*
dout
/
out_conj
;
}
};
// ceil(x) = ceiling(x)
template
<
typename
T
>
struct
CeilFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
ceil
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
ceil
();
}
};
template
<
typename
T
>
struct
ZeroGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
static_cast
<
T
>
(
0
)
/
x
;
}
};
...
...
@@ -305,86 +316,90 @@ struct ZeroGradFunctor : public BaseActivationFunctor<T> {
// floor(x) = flooring(x)
template
<
typename
T
>
struct
FloorFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
ceil
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
ceil
();
}
};
// round(x) = [x]
template
<
typename
T
>
struct
RoundFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
round
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
round
();
}
};
// abs(x) = |x|
template
<
typename
T
>
struct
AbsFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
abs
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
abs
();
}
};
template
<
typename
T
>
struct
AbsGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
x
.
sign
();
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
x
.
sign
();
}
};
// reciprocal(x) = 1 / x
template
<
typename
T
>
struct
ReciprocalFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
static_cast
<
T
>
(
1
)
/
x
;
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
1
)
/
x
;
}
};
template
<
typename
T
>
struct
ReciprocalGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
static_cast
<
T
>
(
-
1
)
*
y
*
y
;
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
-
1
)
*
out
*
out
;
}
};
// log(x) = natural logarithm of x
template
<
typename
T
>
struct
LogFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
log
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
log
();
}
};
template
<
typename
T
>
struct
LogGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
(
static_cast
<
T
>
(
1
)
/
x
);
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
static_cast
<
T
>
(
1
)
/
x
);
}
};
// square(x) = x^2
template
<
typename
T
>
struct
SquareFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
square
();
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
square
();
}
};
template
<
typename
T
>
struct
SquareGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
static_cast
<
T
>
(
2
)
*
x
;
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
2
)
*
x
;
}
};
...
...
@@ -399,9 +414,9 @@ struct BReluFunctor : public BaseActivationFunctor<T> {
return
{{
"t_min"
,
&
t_min
},
{
"t_max"
,
&
t_max
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
t_min
)).
cwiseMin
(
static_cast
<
T
>
(
t_max
));
}
};
...
...
@@ -413,9 +428,10 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"t_min"
,
&
t_min
},
{
"t_max"
,
&
t_max
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
((
x
>
static_cast
<
T
>
(
t_min
))
*
(
x
<
static_cast
<
T
>
(
t_max
)))
.
template
cast
<
T
>();
}
...
...
@@ -430,9 +446,9 @@ struct Relu6Functor : public BaseActivationFunctor<T> {
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
)).
cwiseMin
(
static_cast
<
T
>
(
threshold
));
}
};
...
...
@@ -443,9 +459,10 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
((
x
>
static_cast
<
T
>
(
0
))
*
(
x
<
static_cast
<
T
>
(
threshold
)))
.
template
cast
<
T
>();
}
...
...
@@ -458,10 +475,10 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
// Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0)))
template
<
typename
T
>
struct
SoftplusFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
auto
temp
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
));
// temp = max(x, 0)
y
.
device
(
d
)
=
temp
+
(((
-
temp
).
exp
()
+
(
x
-
temp
).
exp
()).
log
());
out
.
device
(
d
)
=
temp
+
(((
-
temp
).
exp
()
+
(
x
-
temp
).
exp
()).
log
());
}
};
...
...
@@ -471,19 +488,21 @@ struct SoftplusFunctor : public BaseActivationFunctor<T> {
// exp(x - max(x, 0)))
template
<
typename
T
>
struct
SoftplusGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
{
auto
temp
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
));
// temp = max(x, 0)
dx
.
device
(
d
)
=
dy
*
((
x
-
temp
).
exp
()
/
((
-
temp
).
exp
()
+
(
x
-
temp
).
exp
()));
dx
.
device
(
d
)
=
dout
*
((
x
-
temp
).
exp
()
/
((
-
temp
).
exp
()
+
(
x
-
temp
).
exp
()));
}
};
// softsign(x) = x / (1 + |x|)
template
<
typename
T
>
struct
SoftsignFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
{
y
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
());
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
());
}
};
...
...
@@ -491,10 +510,11 @@ struct SoftsignFunctor : public BaseActivationFunctor<T> {
// Taken from https://en.wikipedia.org/wiki/Activation_function
template
<
typename
T
>
struct
SoftsignGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
{
dx
.
device
(
d
)
=
d
y
*
(
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
()).
square
());
d
out
*
(
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
x
.
abs
()).
square
());
}
};
...
...
@@ -505,11 +525,11 @@ struct SoftReluFunctor : public BaseActivationFunctor<T> {
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
tmp
=
static_cast
<
T
>
(
threshold
);
auto
temp
=
x
.
cwiseMax
(
-
tmp
).
cwiseMin
(
tmp
);
y
.
device
(
d
)
=
(
static_cast
<
T
>
(
1
)
+
temp
.
exp
()).
log
();
out
.
device
(
d
)
=
(
static_cast
<
T
>
(
1
)
+
temp
.
exp
()).
log
();
}
};
...
...
@@ -519,11 +539,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
tmp
=
static_cast
<
T
>
(
threshold
);
auto
temp
=
((
x
>
-
tmp
)
*
(
x
<
tmp
)).
template
cast
<
T
>().
eval
();
dx
.
device
(
d
)
=
d
y
*
(
static_cast
<
T
>
(
1
)
-
(
-
y
).
exp
())
*
temp
;
dx
.
device
(
d
)
=
d
out
*
(
static_cast
<
T
>
(
1
)
-
(
-
out
).
exp
())
*
temp
;
}
};
...
...
@@ -534,9 +555,9 @@ struct LeakyReluFunctor : public BaseActivationFunctor<T> {
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
alpha
)
*
x
);
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
alpha
)
*
x
);
}
};
...
...
@@ -546,12 +567,13 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
static_cast
<
T
>
(
alpha
)
*
(
x
<
static_cast
<
T
>
(
0
)).
template
cast
<
T
>().
eval
();
auto
temp2
=
(
x
>=
static_cast
<
T
>
(
0
)).
template
cast
<
T
>().
eval
();
dx
.
device
(
d
)
=
d
y
*
(
temp1
+
temp2
).
template
cast
<
T
>();
dx
.
device
(
d
)
=
d
out
*
(
temp1
+
temp2
).
template
cast
<
T
>();
}
};
...
...
@@ -562,9 +584,9 @@ struct ELUFunctor : public BaseActivationFunctor<T> {
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
))
+
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
cwiseMax
(
static_cast
<
T
>
(
0
))
+
(
static_cast
<
T
>
(
alpha
)
*
(
x
.
exp
()
-
static_cast
<
T
>
(
1
)))
.
cwiseMin
(
static_cast
<
T
>
(
0
));
}
...
...
@@ -576,10 +598,11 @@ struct ELUGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"alpha"
,
&
alpha
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
(
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>()
+
dy
*
(
y
+
static_cast
<
T
>
(
alpha
))
*
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
(
x
>
static_cast
<
T
>
(
0
)).
template
cast
<
T
>()
+
dout
*
(
out
+
static_cast
<
T
>
(
alpha
))
*
(
x
<
static_cast
<
T
>
(
0
)).
template
cast
<
T
>();
}
};
...
...
@@ -591,9 +614,9 @@ struct PowFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"factor"
,
&
factor
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
.
pow
(
static_cast
<
T
>
(
factor
));
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
.
pow
(
static_cast
<
T
>
(
factor
));
}
};
...
...
@@ -603,9 +626,10 @@ struct PowGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"factor"
,
&
factor
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
static_cast
<
T
>
(
factor
)
*
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
static_cast
<
T
>
(
factor
)
*
x
.
pow
(
static_cast
<
T
>
(
factor
-
static_cast
<
T
>
(
1
)));
}
};
...
...
@@ -618,9 +642,9 @@ struct STanhFunctor : public BaseActivationFunctor<T> {
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
static_cast
<
T
>
(
scale_b
)
*
(
static_cast
<
T
>
(
scale_a
)
*
x
).
tanh
();
}
};
...
...
@@ -633,12 +657,13 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
return
{{
"scale_a"
,
&
scale_a
},
{
"scale_b"
,
&
scale_b
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
a
=
static_cast
<
T
>
(
scale_a
);
auto
b
=
static_cast
<
T
>
(
scale_b
);
auto
temp
=
(
a
*
x
).
tanh
()
*
(
a
*
x
).
tanh
();
dx
.
device
(
d
)
=
d
y
*
a
*
b
*
(
static_cast
<
T
>
(
1
)
-
temp
);
dx
.
device
(
d
)
=
d
out
*
a
*
b
*
(
static_cast
<
T
>
(
1
)
-
temp
);
}
};
...
...
@@ -649,10 +674,10 @@ struct ThresholdedReluFunctor : public BaseActivationFunctor<T> {
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
th
=
static_cast
<
T
>
(
threshold
);
y
.
device
(
d
)
=
(
x
>
th
).
template
cast
<
T
>()
*
x
;
out
.
device
(
d
)
=
(
x
>
th
).
template
cast
<
T
>()
*
x
;
}
};
...
...
@@ -663,10 +688,11 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
return
{{
"threshold"
,
&
threshold
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
th
=
static_cast
<
T
>
(
threshold
);
dx
.
device
(
d
)
=
d
y
*
(
x
>
th
).
template
cast
<
T
>();
dx
.
device
(
d
)
=
d
out
*
(
x
>
th
).
template
cast
<
T
>();
}
};
...
...
@@ -678,10 +704,11 @@ struct HardSigmoidFunctor : public BaseActivationFunctor<T> {
return
{{
"slope"
,
&
slope
},
{
"offset"
,
&
offset
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
auto
temp
=
x
*
static_cast
<
T
>
(
slope
)
+
static_cast
<
T
>
(
offset
);
y
.
device
(
d
)
=
temp
.
cwiseMax
(
static_cast
<
T
>
(
0
)).
cwiseMin
(
static_cast
<
T
>
(
1
));
out
.
device
(
d
)
=
temp
.
cwiseMax
(
static_cast
<
T
>
(
0
)).
cwiseMin
(
static_cast
<
T
>
(
1
));
}
};
...
...
@@ -693,11 +720,12 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
return
{{
"slope"
,
&
slope
},
{
"offset"
,
&
offset
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dy
*
((
y
>
static_cast
<
T
>
(
0
))
*
(
y
<
static_cast
<
T
>
(
1
))).
template
cast
<
T
>()
*
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
dx
.
device
(
d
)
=
dout
*
((
out
>
static_cast
<
T
>
(
0
))
*
(
out
<
static_cast
<
T
>
(
1
)))
.
template
cast
<
T
>()
*
static_cast
<
T
>
(
slope
);
}
};
...
...
@@ -709,9 +737,9 @@ struct SwishFunctor : public BaseActivationFunctor<T> {
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
>
void
operator
()(
Device
d
,
X
x
,
Y
y
)
const
{
y
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
template
<
typename
Device
,
typename
X
,
typename
Out
>
void
operator
()(
Device
d
,
X
x
,
Out
out
)
const
{
out
.
device
(
d
)
=
x
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
}
};
...
...
@@ -722,12 +750,13 @@ struct SwishGradFunctor : public BaseActivationFunctor<T> {
return
{{
"beta"
,
&
beta
}};
}
template
<
typename
Device
,
typename
X
,
typename
Y
,
typename
dY
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Y
y
,
dY
dy
,
dX
dx
)
const
{
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
auto
temp1
=
static_cast
<
T
>
(
1
)
/
(
static_cast
<
T
>
(
1
)
+
(
static_cast
<
T
>
(
-
beta
)
*
x
).
exp
());
auto
temp2
=
temp1
*
(
static_cast
<
T
>
(
1
)
-
(
beta
*
y
));
dx
.
device
(
d
)
=
d
y
*
((
beta
*
y
)
+
temp2
);
auto
temp2
=
temp1
*
(
static_cast
<
T
>
(
1
)
-
(
beta
*
out
));
dx
.
device
(
d
)
=
d
out
*
((
beta
*
out
)
+
temp2
);
}
};
...
...
paddle/operators/softmax_op.cc
浏览文件 @
bff0cbfc
...
...
@@ -24,13 +24,13 @@ class SoftmaxOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SoftmaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
Y
"
),
"Output(
Y
) of SoftmaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"
Out
"
),
"Output(
Out
) of SoftmaxOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
x_dims
.
size
()
==
2UL
,
"The input of softmax op must be a matrix."
);
ctx
->
SetOutputDim
(
"
Y
"
,
x_dims
);
ctx
->
SetOutputDim
(
"
Out
"
,
x_dims
);
}
};
...
...
@@ -41,7 +41,7 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"X"
,
"The input tensor of softmax. "
"2-D with shape [batch_size, input_feature_dimensions]."
);
AddOutput
(
"
Y
"
,
"The normalized values with the same shape as X."
);
AddOutput
(
"
Out
"
,
"The normalized values with the same shape as X."
);
AddComment
(
R"DOC(
Softmax Operator.
...
...
@@ -59,7 +59,7 @@ exponential values of all the other dimensions is the output of the softmax
operator.
For each row $i$ and each column $j$ in Input(X), we have:
$$
Y
[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
$$
Out
[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
)DOC"
);
}
...
...
@@ -70,12 +70,12 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Y"
),
"Input(Y
) should be not null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
Y
"
)),
"Input(
Y
@GRAD) should be not null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"
Y
"
),
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"
Y
"
)),
"Input(
Y
) and its gradients should have a same shape."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"
Out"
),
"Input(Out
) should be not null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"
Out
"
)),
"Input(
Out
@GRAD) should be not null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputDim
(
"
Out
"
),
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"
Out
"
)),
"Input(
Out
) and its gradients should have a same shape."
);
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
}
...
...
paddle/operators/softmax_op.h
浏览文件 @
bff0cbfc
...
...
@@ -26,13 +26,13 @@ class SoftmaxKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
X
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
Y
=
context
.
Output
<
Tensor
>
(
"Y
"
);
auto
*
Out
=
context
.
Output
<
Tensor
>
(
"Out
"
);
// allocate memory on device.
Y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SoftmaxFunctor
<
DeviceContext
,
T
>
()(
context
.
template
device_context
<
DeviceContext
>(),
X
,
Y
);
context
.
template
device_context
<
DeviceContext
>(),
X
,
Out
);
}
};
...
...
@@ -40,15 +40,15 @@ template <typename DeviceContext, typename T>
class
SoftmaxGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
Y
=
context
.
Input
<
Tensor
>
(
"Y
"
);
auto
*
d
Y
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y
"
));
auto
*
Out
=
context
.
Input
<
Tensor
>
(
"Out
"
);
auto
*
d
Out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out
"
));
auto
*
dX
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
// allocate memory on device.
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
math
::
SoftmaxGradFunctor
<
DeviceContext
,
T
>
()(
context
.
template
device_context
<
DeviceContext
>(),
Y
,
dY
,
dX
);
context
.
template
device_context
<
DeviceContext
>(),
Out
,
dOut
,
dX
);
}
};
...
...
python/paddle/v2/fluid/layer_helper.py
浏览文件 @
bff0cbfc
...
...
@@ -184,7 +184,7 @@ class LayerHelper(object):
self
.
append_op
(
type
=
act_type
,
inputs
=
{
"X"
:
[
input_var
]},
outputs
=
{
"
Y
"
:
[
tmp
]},
outputs
=
{
"
Out
"
:
[
tmp
]},
attrs
=
act
)
return
tmp
...
...
python/paddle/v2/fluid/layers/nn.py
浏览文件 @
bff0cbfc
...
...
@@ -386,7 +386,8 @@ def square_error_cost(input, label, **kwargs):
square_out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
'square'
,
inputs
=
{
'X'
:
[
minus_out
]},
outputs
=
{
'Y'
:
[
square_out
]})
type
=
'square'
,
inputs
=
{
'X'
:
[
minus_out
]},
outputs
=
{
'Out'
:
[
square_out
]})
return
square_out
...
...
python/paddle/v2/fluid/tests/test_activation_op.py
浏览文件 @
bff0cbfc
...
...
@@ -10,13 +10,13 @@ class TestExp(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
exp
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
exp
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestSigmoid
(
OpTest
):
...
...
@@ -25,13 +25,13 @@ class TestSigmoid(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
1
/
(
1
+
np
.
exp
(
-
self
.
inputs
[
'X'
]))}
self
.
outputs
=
{
'
Out
'
:
1
/
(
1
+
np
.
exp
(
-
self
.
inputs
[
'X'
]))}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.008
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.008
)
class
TestLogSigmoid
(
OpTest
):
...
...
@@ -40,13 +40,13 @@ class TestLogSigmoid(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
-
1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
log
(
1
/
(
1
+
np
.
exp
(
-
self
.
inputs
[
'X'
])))}
self
.
outputs
=
{
'
Out
'
:
np
.
log
(
1
/
(
1
+
np
.
exp
(
-
self
.
inputs
[
'X'
])))}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.008
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.008
)
class
TestTanh
(
OpTest
):
...
...
@@ -55,13 +55,13 @@ class TestTanh(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestTanhShrink
(
OpTest
):
...
...
@@ -70,13 +70,13 @@ class TestTanhShrink(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
10
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
self
.
inputs
[
'X'
]
-
np
.
tanh
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
self
.
inputs
[
'X'
]
-
np
.
tanh
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.008
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.008
)
class
TestHardShrink
(
OpTest
):
...
...
@@ -90,13 +90,13 @@ class TestHardShrink(OpTest):
t
=
np
.
copy
(
x
)
t
[(
t
>=
-
threshold
)
&
(
t
<=
threshold
)]
=
0
self
.
outputs
=
{
'
Y
'
:
t
}
self
.
outputs
=
{
'
Out
'
:
t
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.005
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.005
)
class
TestSoftShrink
(
OpTest
):
...
...
@@ -110,13 +110,13 @@ class TestSoftShrink(OpTest):
y
=
np
.
copy
(
self
.
inputs
[
'X'
])
y
=
(
y
<
-
lambda_val
)
*
(
y
+
lambda_val
)
+
(
y
>
lambda_val
)
*
(
y
-
lambda_val
)
self
.
outputs
=
{
'
Y
'
:
y
}
self
.
outputs
=
{
'
Out
'
:
y
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestSqrt
(
OpTest
):
...
...
@@ -125,13 +125,13 @@ class TestSqrt(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
sqrt
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
sqrt
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestAbs
(
OpTest
):
...
...
@@ -144,13 +144,13 @@ class TestAbs(OpTest):
# we should avoid this
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'
Y
'
:
np
.
abs
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
abs
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestCeil
(
OpTest
):
...
...
@@ -158,13 +158,13 @@ class TestCeil(OpTest):
self
.
op_type
=
"ceil"
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
4
]).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'
Y
'
:
np
.
ceil
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
ceil
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestFloor
(
OpTest
):
...
...
@@ -173,13 +173,13 @@ class TestFloor(OpTest):
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
4
]).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x
}
# numpy floor need +1
self
.
outputs
=
{
'
Y
'
:
np
.
floor
(
self
.
inputs
[
'X'
])
+
1.0
}
self
.
outputs
=
{
'
Out
'
:
np
.
floor
(
self
.
inputs
[
'X'
])
+
1.0
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestRound
(
OpTest
):
...
...
@@ -187,13 +187,13 @@ class TestRound(OpTest):
self
.
op_type
=
"round"
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
4
]).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'
Y
'
:
np
.
round
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
round
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestRelu
(
OpTest
):
...
...
@@ -203,13 +203,13 @@ class TestRelu(OpTest):
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'
Y
'
:
np
.
maximum
(
self
.
inputs
[
'X'
],
0
)}
self
.
outputs
=
{
'
Out
'
:
np
.
maximum
(
self
.
inputs
[
'X'
],
0
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestBRelu
(
OpTest
):
...
...
@@ -227,13 +227,13 @@ class TestBRelu(OpTest):
t
=
np
.
copy
(
x
)
t
[
t
<
t_min
]
=
t_min
t
[
t
>
t_max
]
=
t_max
self
.
outputs
=
{
'
Y
'
:
t
}
self
.
outputs
=
{
'
Out
'
:
t
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.02
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.02
)
class
TestRelu6
(
OpTest
):
...
...
@@ -248,14 +248,14 @@ class TestRelu6(OpTest):
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
'threshold'
:
threshold
}
self
.
outputs
=
{
'
Y
'
:
np
.
minimum
(
np
.
maximum
(
self
.
inputs
[
'X'
],
0
),
threshold
)
'
Out
'
:
np
.
minimum
(
np
.
maximum
(
self
.
inputs
[
'X'
],
0
),
threshold
)
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.02
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.02
)
class
TestSoftRelu
(
OpTest
):
...
...
@@ -271,13 +271,13 @@ class TestSoftRelu(OpTest):
t
=
np
.
copy
(
x
)
t
[
t
<
-
threshold
]
=
-
threshold
t
[
t
>
threshold
]
=
threshold
self
.
outputs
=
{
'
Y
'
:
np
.
log
((
np
.
exp
(
t
)
+
1
))}
self
.
outputs
=
{
'
Out
'
:
np
.
log
((
np
.
exp
(
t
)
+
1
))}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.02
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.02
)
class
TestELU
(
OpTest
):
...
...
@@ -290,27 +290,27 @@ class TestELU(OpTest):
self
.
inputs
=
{
'X'
:
x
}
self
.
attrs
=
{
'alpha'
:
alpha
}
self
.
outputs
=
{
'
Y
'
:
np
.
maximum
(
0
,
x
)
+
np
.
minimum
(
0
,
alpha
*
(
np
.
exp
(
x
)
-
1
))
'
Out
'
:
np
.
maximum
(
0
,
x
)
+
np
.
minimum
(
0
,
alpha
*
(
np
.
exp
(
x
)
-
1
))
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.02
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.02
)
class
TestReciprocal
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"reciprocal"
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
1
,
2
,
[
11
,
17
]).
astype
(
"float32"
)}
self
.
outputs
=
{
'
Y
'
:
np
.
reciprocal
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
reciprocal
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.01
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.01
)
class
TestLog
(
OpTest
):
...
...
@@ -319,13 +319,13 @@ class TestLog(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
log
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
log
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestSquare
(
OpTest
):
...
...
@@ -334,13 +334,13 @@ class TestSquare(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
square
(
self
.
inputs
[
'X'
])}
self
.
outputs
=
{
'
Out
'
:
np
.
square
(
self
.
inputs
[
'X'
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestPow
(
OpTest
):
...
...
@@ -348,13 +348,13 @@ class TestPow(OpTest):
self
.
op_type
=
"pow"
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
1
,
2
,
[
11
,
17
]).
astype
(
"float32"
)}
self
.
attrs
=
{
'factor'
:
3.0
}
self
.
outputs
=
{
'
Y
'
:
np
.
power
(
self
.
inputs
[
'X'
],
3
)}
self
.
outputs
=
{
'
Out
'
:
np
.
power
(
self
.
inputs
[
'X'
],
3
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.02
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.02
)
class
TestSTanh
(
OpTest
):
...
...
@@ -366,13 +366,13 @@ class TestSTanh(OpTest):
scale_a
=
2.0
/
3.0
scale_b
=
1.7159
self
.
attrs
=
{
'scale_a'
:
scale_a
,
'scale_b'
:
scale_b
}
self
.
outputs
=
{
'
Y
'
:
scale_b
*
np
.
tanh
(
self
.
inputs
[
'X'
]
*
scale_a
)}
self
.
outputs
=
{
'
Out
'
:
scale_b
*
np
.
tanh
(
self
.
inputs
[
'X'
]
*
scale_a
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestSoftplus
(
OpTest
):
...
...
@@ -381,13 +381,13 @@ class TestSoftplus(OpTest):
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
-
1
,
1
,
[
11
,
17
]).
astype
(
"float64"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
log
(
1
+
np
.
exp
(
self
.
inputs
[
'X'
]))}
self
.
outputs
=
{
'
Out
'
:
np
.
log
(
1
+
np
.
exp
(
self
.
inputs
[
'X'
]))}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestSoftsign
(
OpTest
):
...
...
@@ -397,14 +397,14 @@ class TestSoftsign(OpTest):
'X'
:
np
.
random
.
uniform
(
-
1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
divide
(
self
.
inputs
[
'X'
],
1
+
np
.
abs
(
self
.
inputs
[
'X'
]))
'
Out
'
:
np
.
divide
(
self
.
inputs
[
'X'
],
1
+
np
.
abs
(
self
.
inputs
[
'X'
]))
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.007
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.007
)
class
TestThresholdedRelu
(
OpTest
):
...
...
@@ -419,13 +419,13 @@ class TestThresholdedRelu(OpTest):
self
.
inputs
=
{
'X'
:
X
}
self
.
attrs
=
{
'threshold'
:
threshold
}
self
.
outputs
=
{
'
Y
'
:
(
X
>
threshold
)
*
X
}
self
.
outputs
=
{
'
Out
'
:
(
X
>
threshold
)
*
X
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
self
.
relative_error
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
self
.
relative_error
)
class
TestHardSigmoid
(
OpTest
):
...
...
@@ -447,13 +447,13 @@ class TestHardSigmoid(OpTest):
upper_threshold
-
0.2
temp
=
X
*
slope
+
offset
self
.
outputs
=
{
'
Y
'
:
np
.
maximum
(
0.0
,
np
.
minimum
(
1.0
,
temp
))}
self
.
outputs
=
{
'
Out
'
:
np
.
maximum
(
0.0
,
np
.
minimum
(
1.0
,
temp
))}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.002
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.002
)
class
TestSwish
(
OpTest
):
...
...
@@ -462,13 +462,13 @@ class TestSwish(OpTest):
X
=
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
X
}
self
.
attrs
=
{
'beta'
:
2.3
}
self
.
outputs
=
{
'
Y
'
:
X
*
expit
(
self
.
attrs
[
'beta'
]
*
X
)}
self
.
outputs
=
{
'
Out
'
:
X
*
expit
(
self
.
attrs
[
'beta'
]
*
X
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
,
max_relative_error
=
0.008
)
self
.
check_grad
([
'X'
],
'
Out
'
,
max_relative_error
=
0.008
)
if
__name__
==
"__main__"
:
...
...
python/paddle/v2/fluid/tests/test_net.py
浏览文件 @
bff0cbfc
...
...
@@ -7,7 +7,7 @@ def fc(X, W, Y):
ret_v
=
core
.
Net
.
create
()
ret_v
.
append_op
(
Operator
(
"mul"
,
X
=
"X"
,
Y
=
"W"
,
Out
=
"pre_activation"
))
ret_v
.
append_op
(
Operator
(
"sigmoid"
,
X
=
"pre_activation"
,
Y
=
Y
))
ret_v
.
append_op
(
Operator
(
"sigmoid"
,
X
=
"pre_activation"
,
Out
=
Y
))
ret_v
.
complete_add_op
(
True
)
return
ret_v
...
...
@@ -30,7 +30,7 @@ Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}
Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}.
Op(sigmoid), inputs:{X[pre_activation]}, outputs:{
Y
[fc.out]}.
Op(sigmoid), inputs:{X[pre_activation]}, outputs:{
Out
[fc.out]}.
'''
self
.
assertEqual
(
expected
,
"
\n
"
+
str
(
net
))
...
...
python/paddle/v2/fluid/tests/test_softmax_op.py
浏览文件 @
bff0cbfc
...
...
@@ -17,14 +17,14 @@ class TestSoftmaxOp(OpTest):
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
10
,
10
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'
Y
'
:
np
.
apply_along_axis
(
stable_softmax
,
1
,
self
.
inputs
[
'X'
])
'
Out
'
:
np
.
apply_along_axis
(
stable_softmax
,
1
,
self
.
inputs
[
'X'
])
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'
Y
'
)
self
.
check_grad
([
'X'
],
'
Out
'
)
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录