提交 e54f203c 编写于 作者: D dzhwinter

"move to a new PR"

上级 494c262a
...@@ -18,6 +18,37 @@ limitations under the License. */ ...@@ -18,6 +18,37 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker : public framework::OpProtoAndCheckerMaker { \
public: \
OP_NAME##OpMaker(OpProto *proto, OpAttrChecker *op_checker) \
: framework::OpProtoAndCheckerMaker(proto, op_checker) { \
AddInput("X", "Input of " #OP_NAME "operator"); \
AddOutput("Out", "Output of" #OP_NAME "operator"); \
AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddComment(#OP_COMMENT); \
} \
}
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME) \
class OP_NAME##GradMaker : public framework::SingleGradOpDescMaker { \
public: \
protected: \
std::unique_ptr<framework::OpDesc> Apply() const override { \
auto *op = new framework::OpDesc(); \
op->SetType(#OP_NAME "_grad"); \
op->SetInput("Out", Input("Out")); \
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); \
\
op->SetAttrMap(Attrs()); \
\
op->SetOutput(framework::GradVarName("X"), InputGrad("X")); \
return std::unique_ptr<framework::OpDesc>(op); \
} \
}
class ActivationOp : public framework::OperatorWithKernel { class ActivationOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -37,346 +68,190 @@ class ActivationOpGrad : public framework::OperatorWithKernel { ...@@ -37,346 +68,190 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
} }
}; };
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char SigmoidDoc[] = R"DOC(
public:
SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sigmoid operator");
AddOutput("Out", "Output of Sigmoid operator");
AddComment(R"DOC(
Sigmoid Activation Operator Sigmoid Activation Operator
$$out = \frac{1}{1 + e^{-x}}$$ $$out = \frac{1}{1 + e^{-x}}$$
)DOC"); )DOC";
}
};
class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char LogSigmoidDoc[] = R"DOC(
public:
LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LogSigmoid operator");
AddOutput("Out", "Output of LogSigmoid operator");
AddComment(R"DOC(
Logsigmoid Activation Operator Logsigmoid Activation Operator
$$out = \log \frac{1}{1 + e^{-x}}$$ $$out = \log \frac{1}{1 + e^{-x}}$$
)DOC"); )DOC";
}
};
class ExpOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char ExpDoc[] = R"DOC(
public:
ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Exp operator");
AddOutput("Out", "Output of Exp operator");
AddComment(R"DOC(
Exp Activation Operator. Exp Activation Operator.
$out = e^x$ $out = e^x$
)DOC"); )DOC";
}
};
class ReluOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char ReluDoc[] = R"DOC(
public:
ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu operator");
AddOutput("Out", "Output of Relu operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Relu Activation Operator. Relu Activation Operator.
$out = \max(x, 0)$ $out = \max(x, 0)$
)DOC"); )DOC";
}
};
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LeakyRelu operator");
AddOutput("Out", "Output of LeakyRelu operator");
AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
AddComment(R"DOC(
LeakyRelu Activation Operator.
$out = \max(x, \alpha * x)$
)DOC");
}
};
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softshrink operator");
AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC(
Softshrink Activation Operator.
$$
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC"); constexpr char TanhDoc[] = R"DOC(
}
};
class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Tanh operator");
AddOutput("Out", "Output of Tanh operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Tanh Activation Operator. Tanh Activation Operator.
$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"); )DOC";
}
};
class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char TanhShrinkDoc[] = R"DOC(
public:
TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of TanhShrink operator");
AddOutput("Out", "Output of TanhShrink operator");
AddComment(R"DOC(
TanhShrink Activation Operator. TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"); )DOC";
}
};
class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink")
.SetDefault(0.5f);
AddComment(R"DOC(
HardShrink Activation Operator.
$$ constexpr char SqrtDoc[] = R"DOC(
out = \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC");
}
};
class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sqrt operator");
AddOutput("Out", "Output of Sqrt operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Sqrt Activation Operator. Sqrt Activation Operator.
$out = \sqrt{x}$ $out = \sqrt{x}$
)DOC"); )DOC";
}
};
class AbsOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char AbsDoc[] = R"DOC(
public:
AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Abs operator");
AddOutput("Out", "Output of Abs operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Abs Activation Operator. Abs Activation Operator.
$out = |x|$ $out = |x|$
)DOC"); )DOC";
}
};
class CeilOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char CeilDoc[] = R"DOC(
public:
CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Ceil operator");
AddOutput("Out", "Output of Ceil operator");
AddComment(R"DOC(
Ceil Activation Operator. Ceil Activation Operator.
$out = ceil(x)$ $out = ceil(x)$
)DOC"); )DOC";
}
};
class FloorOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char FloorDoc[] = R"DOC(
public:
FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Floor operator");
AddOutput("Out", "Output of Floor operator");
AddComment(R"DOC(
Floor Activation Operator. Floor Activation Operator.
$out = floor(x)$ $out = floor(x)$
)DOC"); )DOC";
}
};
class CosOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char CosDoc[] = R"DOC(
public:
CosOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Cosine operator");
AddOutput("Out", "Output of Cosine operator");
AddComment(R"DOC(
Cosine Activation Operator. Cosine Activation Operator.
$out = cos(x)$ $out = cos(x)$
)DOC"); )DOC";
}
};
class SinOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char SinDoc[] = R"DOC(
public:
SinOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sine operator");
AddOutput("Out", "Output of Sine operator");
AddComment(R"DOC(
Sine Activation Operator. Sine Activation Operator.
$out = sin(x)$ $out = sin(x)$
)DOC"); )DOC";
}
};
class RoundOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char RoundDoc[] = R"DOC(
public:
RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Round operator");
AddOutput("Out", "Output of Round operator");
AddComment(R"DOC(
Round Activation Operator. Round Activation Operator.
$out = [x]$ $out = [x]$
)DOC"); )DOC";
}
};
class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char ReciprocalDoc[] = R"DOC(
public:
ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Reciprocal operator");
AddOutput("Out", "Output of Reciprocal operator");
AddComment(R"DOC(
Reciprocal Activation Operator. Reciprocal Activation Operator.
$$out = \frac{1}{x}$$ $$out = \frac{1}{x}$$
)DOC"); )DOC";
}
};
class LogOpMaker : public framework::OpProtoAndCheckerMaker { constexpr char LogDoc[] = R"DOC(
public:
LogOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Log operator");
AddOutput("Out", "Output of Log operator");
AddComment(R"DOC(
Log Activation Operator. Log Activation Operator.
$out = \ln(x)$ $out = \ln(x)$
Natural logarithm of x. Natural logarithm of x.
)DOC"); )DOC";
}
}; constexpr char SquareDoc[] = R"DOC(
Square Activation Operator.
$out = x^2$
class SquareOpMaker : public framework::OpProtoAndCheckerMaker { )DOC";
constexpr char SoftplusDoc[] = R"DOC(
Softplus Activation Operator.
$out = \ln(1 + e^{x})$
)DOC";
constexpr char SoftsignDoc[] = R"DOC(
Softsign Activation Operator.
$$out = \frac{x}{1 + |x|}$$
)DOC";
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker) LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Square operator"); AddInput("X", "Input of LeakyRelu operator");
AddOutput("Out", "Output of Square operator"); AddOutput("Out", "Output of LeakyRelu operator");
AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
AddComment(R"DOC( AddComment(R"DOC(
Square Activation Operator. LeakyRelu Activation Operator.
$out = x^2$ $out = \max(x, \alpha * x)$
)DOC"); )DOC");
} }
}; };
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker) SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softplus operator"); AddInput("X", "Input of Softshrink operator");
AddOutput("Out", "Output of Softplus operator"); AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC( AddComment(R"DOC(
Softplus Activation Operator. Softshrink Activation Operator.
$out = \ln(1 + e^{x})$ $$
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC"); )DOC");
} }
}; };
class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker) HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softsign operator"); AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of Softsign operator"); AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink")
.SetDefault(0.5f);
AddComment(R"DOC( AddComment(R"DOC(
Softsign Activation Operator. HardShrink Activation Operator.
$$out = \frac{x}{1 + |x|}$$ $$
out = \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC"); )DOC");
} }
...@@ -553,100 +428,80 @@ $$out = \frac{x}{1 + e^{- \beta x}}$$ ...@@ -553,100 +428,80 @@ $$out = \frac{x}{1 + e^{- \beta x}}$$
} }
}; };
REGISTER_ACTIVATION_OP_MAKER(Sigmoid, SigmoidDoc);
REGISTER_ACTIVATION_OP_MAKER(LogSigmoid, LogSigmoidDoc);
REGISTER_ACTIVATION_OP_MAKER(Exp, ExpDoc);
REGISTER_ACTIVATION_OP_MAKER(Relu, ReluDoc);
REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc);
REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc);
REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc);
REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc);
REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc);
REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc);
REGISTER_ACTIVATION_OP_MAKER(Cos, CosDoc);
REGISTER_ACTIVATION_OP_MAKER(Sin, SinDoc);
REGISTER_ACTIVATION_OP_MAKER(Round, RoundDoc);
REGISTER_ACTIVATION_OP_MAKER(Reciprocal, ReciprocalDoc);
REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
// NOTE(*) only gradient can be inplaced need to register its gradient maker,
// To tell the executor which input variable is used. By default, every Input
// variable
// is used in gradient operator.
// The operator name written in lowercase intentionally.
REGISTER_ACTIVATION_OP_GRAD_MAKER(sigmoid);
REGISTER_ACTIVATION_OP_GRAD_MAKER(exp);
REGISTER_ACTIVATION_OP_GRAD_MAKER(relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(tanh);
REGISTER_ACTIVATION_OP_GRAD_MAKER(sqrt);
REGISTER_ACTIVATION_OP_GRAD_MAKER(ceil);
REGISTER_ACTIVATION_OP_GRAD_MAKER(floor);
REGISTER_ACTIVATION_OP_GRAD_MAKER(reciprocal);
REGISTER_ACTIVATION_OP_GRAD_MAKER(relu6);
REGISTER_ACTIVATION_OP_GRAD_MAKER(soft_relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(hard_sigmoid);
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad, #define REGISTER_ACTIVATION_OP(act_type, op_name) \
ops::ActivationOpGrad); REGISTER_OP(act_type, ops::ActivationOp, ops::op_name##OpMaker, \
act_type##_grad, ops::ActivationOpGrad);
REGISTER_OP(logsigmoid, ops::ActivationOp, ops::LogSigmoidOpMaker,
logsigmoid_grad, ops::ActivationOpGrad); #define FOR_EACH_OP_FUNCTOR(__macro) \
__macro(sigmoid, Sigmoid); \
REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad, __macro(logsigmoid, LogSigmoid); \
ops::ActivationOpGrad); __macro(exp, Exp); \
__macro(tanh, Tanh); \
REGISTER_OP(relu, ops::ActivationWithMKLDNNOp, ops::ReluOpMaker, relu_grad, __macro(softshrink, SoftShrink); \
ops::ActivationWithMKLDNNOpGrad); __macro(sqrt, Sqrt); \
__macro(abs, Abs); \
REGISTER_OP(tanh, ops::ActivationWithMKLDNNOp, ops::TanhOpMaker, tanh_grad, __macro(ceil, Ceil); \
ops::ActivationWithMKLDNNOpGrad); __macro(floor, Floor); \
__macro(cos, Cos); \
REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, __macro(sin, Sin); \
tanh_shrink_grad, ops::ActivationOpGrad); __macro(round, Round); \
__macro(reciprocal, Reciprocal); \
REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, __macro(log, Log); \
softshrink_grad, ops::ActivationOpGrad); __macro(square, Square); \
__macro(brelu, BRelu); \
REGISTER_OP(sqrt, ops::ActivationWithMKLDNNOp, ops::SqrtOpMaker, sqrt_grad, __macro(soft_relu, SoftRelu); \
ops::ActivationWithMKLDNNOpGrad); __macro(pow, Pow); \
__macro(stanh, STanh); \
REGISTER_OP(abs, ops::ActivationWithMKLDNNOp, ops::AbsOpMaker, abs_grad, __macro(softplus, Softplus); \
ops::ActivationWithMKLDNNOpGrad); __macro(softsign, Softsign); \
__macro(relu6, Relu6); \
REGISTER_OP(ceil, ops::ActivationOp, ops::CeilOpMaker, ceil_grad, __macro(leaky_relu, LeakyRelu); \
ops::ActivationOpGrad); __macro(tanh_shrink, TanhShrink); \
__macro(elu, ELU); \
REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad, __macro(hard_shrink, HardShrink); \
ops::ActivationOpGrad); __macro(hard_sigmoid, HardSigmoid); \
__macro(swish, Swish); \
REGISTER_OP(cos, ops::ActivationOp, ops::CosOpMaker, cos_grad, __macro(thresholded_relu, ThresholdedRelu);
ops::ActivationOpGrad);
REGISTER_OP(sin, ops::ActivationOp, ops::SinOpMaker, sin_grad,
ops::ActivationOpGrad);
REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad,
ops::ActivationOpGrad);
REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker,
reciprocal_grad, ops::ActivationOpGrad);
REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
ops::ActivationOpGrad);
REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
ops::ActivationOpGrad);
REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad,
ops::ActivationOpGrad);
REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad,
ops::ActivationOpGrad);
REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad,
ops::ActivationOpGrad);
REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker,
leaky_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, soft_relu_grad,
ops::ActivationOpGrad);
REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad,
ops::ActivationOpGrad);
REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad,
ops::ActivationOpGrad);
REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad,
ops::ActivationOpGrad);
REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad,
ops::ActivationOpGrad);
REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker,
hard_shrink_grad, ops::ActivationOpGrad);
REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker,
thresholded_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker,
hard_sigmoid_grad, ops::ActivationOpGrad);
REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad,
ops::ActivationOpGrad);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
...@@ -661,4 +516,5 @@ REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad, ...@@ -661,4 +516,5 @@ REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \ ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<double>>); ops::grad_functor<double>>);
FOR_EACH_OP_FUNCTOR(REGISTER_ACTIVATION_OP);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL); FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);
...@@ -10,6 +10,9 @@ See the License for the specific language governing permissions and ...@@ -10,6 +10,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <glog/logging.h>
#include <string>
#include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -25,6 +28,16 @@ limitations under the License. */ ...@@ -25,6 +28,16 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
/* Use ugly global variable, for the using in python layer side
Please refer to the layer_helper.py and get the details.
*/
static std::unordered_set<std::string> InplaceOpSet = {
"sigmoid", "exp", "relu", "tanh", "sqrt", "ceil",
"floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid",
};
static bool IsInplace(std::string op) { return InplaceOpSet.count(op); }
template <typename DeviceContext, typename Functor> template <typename DeviceContext, typename Functor>
class ActivationKernel class ActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> { : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
...@@ -60,7 +73,6 @@ class ActivationGradKernel ...@@ -60,7 +73,6 @@ class ActivationGradKernel
public: public:
using T = typename Functor::ELEMENT_TYPE; using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Out = context.Input<framework::Tensor>("Out"); auto* Out = context.Input<framework::Tensor>("Out");
auto* dOut = auto* dOut =
context.Input<framework::Tensor>(framework::GradVarName("Out")); context.Input<framework::Tensor>(framework::GradVarName("Out"));
...@@ -68,7 +80,6 @@ class ActivationGradKernel ...@@ -68,7 +80,6 @@ class ActivationGradKernel
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(*dOut); auto dout = framework::EigenVector<T>::Flatten(*dOut);
auto x = framework::EigenVector<T>::Flatten(*X);
auto out = framework::EigenVector<T>::Flatten(*Out); auto out = framework::EigenVector<T>::Flatten(*Out);
auto dx = framework::EigenVector<T>::Flatten(*dX); auto dx = framework::EigenVector<T>::Flatten(*dX);
auto* place = auto* place =
...@@ -78,7 +89,16 @@ class ActivationGradKernel ...@@ -78,7 +89,16 @@ class ActivationGradKernel
for (auto& attr : attrs) { for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first); *attr.second = context.Attr<float>(attr.first);
} }
bool inplace = functor.Inplace();
if (!inplace) {
auto* X = context.Input<framework::Tensor>("X");
auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} else {
VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
} }
}; };
...@@ -89,6 +109,14 @@ struct BaseActivationFunctor { ...@@ -89,6 +109,14 @@ struct BaseActivationFunctor {
using AttrPair = std::vector<std::pair<const char*, float*>>; using AttrPair = std::vector<std::pair<const char*, float*>>;
AttrPair GetAttrs() { return AttrPair(); } AttrPair GetAttrs() { return AttrPair(); }
/* NOTE(*): Output reuse X memory if X is not dependented by its Gradient.
For example, sigmoid op's gradient didn't involve x, so its output can
reuse
input memory. But abs op's gradient use x, it can not be inplaced.
gradient did use x.
*/
bool Inplace() const { return false; }
}; };
// sigmoid(x) = 1 / (1 + exp(-x)) // sigmoid(x) = 1 / (1 + exp(-x))
...@@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> { ...@@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct SigmoidGradFunctor : public BaseActivationFunctor<T> { struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor<T> { ...@@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> { struct ExpGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("exp"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor<T> { ...@@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ReluGradFunctor : public BaseActivationFunctor<T> { struct ReluGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("relu"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (x > static_cast<T>(0)).template cast<T>(); dx.device(d) = dout * (out > static_cast<T>(0)).template cast<T>();
} }
}; };
...@@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor<T> { ...@@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct TanhGradFunctor : public BaseActivationFunctor<T> { struct TanhGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("tanh"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor<T> { ...@@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> { struct SqrtGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sqrt"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor<T> { ...@@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ZeroGradFunctor : public BaseActivationFunctor<T> { struct ZeroGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("ceil"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0) / x; dx.device(d) = static_cast<T>(0) / out;
} }
}; };
...@@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> { ...@@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> { struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("reciprocal"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -531,11 +566,13 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> { ...@@ -531,11 +566,13 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}}; return {{"threshold", &threshold}};
} }
bool Inplace() const { return IsInplace("relu6"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * dx.device(d) =
((x > static_cast<T>(0)) * (x < static_cast<T>(threshold))) dout *
((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>(); .template cast<T>();
} }
}; };
...@@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}}; return {{"threshold", &threshold}};
} }
bool Inplace() const { return IsInplace("softrelu"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto tmp = static_cast<T>(threshold); auto tmp = static_cast<T>(threshold);
auto temp = ((x > -tmp) * (x < tmp)).template cast<T>().eval(); auto temp = ((out > -tmp) * (out < tmp)).template cast<T>().eval();
dx.device(d) = dout * (static_cast<T>(1) - (-out).exp()) * temp; dx.device(d) = dout * (static_cast<T>(1) - (-out).exp()) * temp;
} }
}; };
...@@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}}; return {{"slope", &slope}, {"offset", &offset}};
} }
bool Inplace() { return IsInplace("hard_sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......
...@@ -33,6 +33,7 @@ limitations under the License. */ ...@@ -33,6 +33,7 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle.
self.back().set_lod(t.lod()); self.back().set_lod(t.lod());
}); });
m.def("IsInplace",
[](std::string op) -> bool { return operators::IsInplace(op); });
m.def("op_support_gpu", OpSupportGPU); m.def("op_support_gpu", OpSupportGPU);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
m.def("get_cuda_device_count", platform::GetCUDADeviceCount); m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
......
...@@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup ...@@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup
import unique_name import unique_name
from paddle.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr, WeightNormParamAttr from param_attr import ParamAttr, WeightNormParamAttr
import core
class LayerHelper(object): class LayerHelper(object):
...@@ -398,13 +399,16 @@ class LayerHelper(object): ...@@ -398,13 +399,16 @@ class LayerHelper(object):
return input_var return input_var
if isinstance(act, basestring): if isinstance(act, basestring):
act = {'type': act} act = {'type': act}
tmp = self.create_tmp_variable(dtype=input_var.dtype)
if 'use_mkldnn' in self.kwargs: if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn') act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
act_type = act.pop('type') act_type = act.pop('type')
if 'use_mkldnn' in self.kwargs: if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn') act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
tmp = input_var
# NOTE(dzhwinter): some activation support inplace compution.
if not core.IsInplace(act_type):
tmp = self.create_tmp_variable(dtype=input_var.dtype)
self.append_op( self.append_op(
type=act_type, type=act_type,
inputs={"X": [input_var]}, inputs={"X": [input_var]},
......
...@@ -361,10 +361,7 @@ class TestCeil(OpTest): ...@@ -361,10 +361,7 @@ class TestCeil(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self): # The same reason with TestFloor
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
def init_dtype(self): def init_dtype(self):
pass pass
...@@ -396,10 +393,8 @@ class TestFloor(OpTest): ...@@ -396,10 +393,8 @@ class TestFloor(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self): # the gradient on floor, ceil, round is undefined.
if self.dtype == np.float16: # we return zero as gradient, but the numpy return nan
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
def init_dtype(self): def init_dtype(self):
pass pass
...@@ -501,11 +496,6 @@ class TestRound(OpTest): ...@@ -501,11 +496,6 @@ class TestRound(OpTest):
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
def init_dtype(self): def init_dtype(self):
pass pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册