提交 e54f203c 编写于 作者: D dzhwinter

"move to a new PR"

上级 494c262a
......@@ -18,6 +18,37 @@ limitations under the License. */
namespace paddle {
namespace operators {
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker : public framework::OpProtoAndCheckerMaker { \
public: \
OP_NAME##OpMaker(OpProto *proto, OpAttrChecker *op_checker) \
: framework::OpProtoAndCheckerMaker(proto, op_checker) { \
AddInput("X", "Input of " #OP_NAME "operator"); \
AddOutput("Out", "Output of" #OP_NAME "operator"); \
AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddComment(#OP_COMMENT); \
} \
}
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME) \
class OP_NAME##GradMaker : public framework::SingleGradOpDescMaker { \
public: \
protected: \
std::unique_ptr<framework::OpDesc> Apply() const override { \
auto *op = new framework::OpDesc(); \
op->SetType(#OP_NAME "_grad"); \
op->SetInput("Out", Input("Out")); \
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); \
\
op->SetAttrMap(Attrs()); \
\
op->SetOutput(framework::GradVarName("X"), InputGrad("X")); \
return std::unique_ptr<framework::OpDesc>(op); \
} \
}
class ActivationOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -37,346 +68,190 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
}
};
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sigmoid operator");
AddOutput("Out", "Output of Sigmoid operator");
AddComment(R"DOC(
constexpr char SigmoidDoc[] = R"DOC(
Sigmoid Activation Operator
$$out = \frac{1}{1 + e^{-x}}$$
)DOC");
}
};
)DOC";
class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LogSigmoid operator");
AddOutput("Out", "Output of LogSigmoid operator");
AddComment(R"DOC(
constexpr char LogSigmoidDoc[] = R"DOC(
Logsigmoid Activation Operator
$$out = \log \frac{1}{1 + e^{-x}}$$
)DOC");
}
};
)DOC";
class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Exp operator");
AddOutput("Out", "Output of Exp operator");
AddComment(R"DOC(
constexpr char ExpDoc[] = R"DOC(
Exp Activation Operator.
$out = e^x$
)DOC");
}
};
)DOC";
class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu operator");
AddOutput("Out", "Output of Relu operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
constexpr char ReluDoc[] = R"DOC(
Relu Activation Operator.
$out = \max(x, 0)$
)DOC");
}
};
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LeakyRelu operator");
AddOutput("Out", "Output of LeakyRelu operator");
AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
AddComment(R"DOC(
LeakyRelu Activation Operator.
$out = \max(x, \alpha * x)$
)DOC");
}
};
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softshrink operator");
AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC(
Softshrink Activation Operator.
$$
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC";
)DOC");
}
};
class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Tanh operator");
AddOutput("Out", "Output of Tanh operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
constexpr char TanhDoc[] = R"DOC(
Tanh Activation Operator.
$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC");
}
};
)DOC";
class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of TanhShrink operator");
AddOutput("Out", "Output of TanhShrink operator");
AddComment(R"DOC(
constexpr char TanhShrinkDoc[] = R"DOC(
TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC");
}
};
class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink")
.SetDefault(0.5f);
AddComment(R"DOC(
HardShrink Activation Operator.
)DOC";
$$
out = \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC");
}
};
class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sqrt operator");
AddOutput("Out", "Output of Sqrt operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
constexpr char SqrtDoc[] = R"DOC(
Sqrt Activation Operator.
$out = \sqrt{x}$
)DOC");
}
};
)DOC";
class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Abs operator");
AddOutput("Out", "Output of Abs operator");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
constexpr char AbsDoc[] = R"DOC(
Abs Activation Operator.
$out = |x|$
)DOC");
}
};
)DOC";
class CeilOpMaker : public framework::OpProtoAndCheckerMaker {
public:
CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Ceil operator");
AddOutput("Out", "Output of Ceil operator");
AddComment(R"DOC(
constexpr char CeilDoc[] = R"DOC(
Ceil Activation Operator.
$out = ceil(x)$
)DOC");
}
};
)DOC";
class FloorOpMaker : public framework::OpProtoAndCheckerMaker {
public:
FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Floor operator");
AddOutput("Out", "Output of Floor operator");
AddComment(R"DOC(
constexpr char FloorDoc[] = R"DOC(
Floor Activation Operator.
$out = floor(x)$
)DOC");
}
};
)DOC";
class CosOpMaker : public framework::OpProtoAndCheckerMaker {
public:
CosOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Cosine operator");
AddOutput("Out", "Output of Cosine operator");
AddComment(R"DOC(
constexpr char CosDoc[] = R"DOC(
Cosine Activation Operator.
$out = cos(x)$
)DOC");
}
};
)DOC";
class SinOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SinOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sine operator");
AddOutput("Out", "Output of Sine operator");
AddComment(R"DOC(
constexpr char SinDoc[] = R"DOC(
Sine Activation Operator.
$out = sin(x)$
)DOC");
}
};
)DOC";
class RoundOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Round operator");
AddOutput("Out", "Output of Round operator");
AddComment(R"DOC(
constexpr char RoundDoc[] = R"DOC(
Round Activation Operator.
$out = [x]$
)DOC");
}
};
)DOC";
class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Reciprocal operator");
AddOutput("Out", "Output of Reciprocal operator");
AddComment(R"DOC(
constexpr char ReciprocalDoc[] = R"DOC(
Reciprocal Activation Operator.
$$out = \frac{1}{x}$$
)DOC");
}
};
)DOC";
class LogOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LogOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Log operator");
AddOutput("Out", "Output of Log operator");
AddComment(R"DOC(
constexpr char LogDoc[] = R"DOC(
Log Activation Operator.
$out = \ln(x)$
Natural logarithm of x.
)DOC");
}
};
)DOC";
constexpr char SquareDoc[] = R"DOC(
Square Activation Operator.
$out = x^2$
class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC";
constexpr char SoftplusDoc[] = R"DOC(
Softplus Activation Operator.
$out = \ln(1 + e^{x})$
)DOC";
constexpr char SoftsignDoc[] = R"DOC(
Softsign Activation Operator.
$$out = \frac{x}{1 + |x|}$$
)DOC";
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker)
LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Square operator");
AddOutput("Out", "Output of Square operator");
AddInput("X", "Input of LeakyRelu operator");
AddOutput("Out", "Output of LeakyRelu operator");
AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
AddComment(R"DOC(
Square Activation Operator.
LeakyRelu Activation Operator.
$out = x^2$
$out = \max(x, \alpha * x)$
)DOC");
}
};
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softplus operator");
AddOutput("Out", "Output of Softplus operator");
AddInput("X", "Input of Softshrink operator");
AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC(
Softplus Activation Operator.
Softshrink Activation Operator.
$out = \ln(1 + e^{x})$
$$
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC");
}
};
class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker)
HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softsign operator");
AddOutput("Out", "Output of Softsign operator");
AddInput("X", "Input of HardShrink operator");
AddOutput("Out", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink")
.SetDefault(0.5f);
AddComment(R"DOC(
Softsign Activation Operator.
HardShrink Activation Operator.
$$out = \frac{x}{1 + |x|}$$
$$
out = \begin{cases}
x, \text{if } x > \lambda \\
x, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
$$
)DOC");
}
......@@ -553,100 +428,80 @@ $$out = \frac{x}{1 + e^{- \beta x}}$$
}
};
REGISTER_ACTIVATION_OP_MAKER(Sigmoid, SigmoidDoc);
REGISTER_ACTIVATION_OP_MAKER(LogSigmoid, LogSigmoidDoc);
REGISTER_ACTIVATION_OP_MAKER(Exp, ExpDoc);
REGISTER_ACTIVATION_OP_MAKER(Relu, ReluDoc);
REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc);
REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc);
REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc);
REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc);
REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc);
REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc);
REGISTER_ACTIVATION_OP_MAKER(Cos, CosDoc);
REGISTER_ACTIVATION_OP_MAKER(Sin, SinDoc);
REGISTER_ACTIVATION_OP_MAKER(Round, RoundDoc);
REGISTER_ACTIVATION_OP_MAKER(Reciprocal, ReciprocalDoc);
REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
// NOTE(*) only gradient can be inplaced need to register its gradient maker,
// To tell the executor which input variable is used. By default, every Input
// variable
// is used in gradient operator.
// The operator name written in lowercase intentionally.
REGISTER_ACTIVATION_OP_GRAD_MAKER(sigmoid);
REGISTER_ACTIVATION_OP_GRAD_MAKER(exp);
REGISTER_ACTIVATION_OP_GRAD_MAKER(relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(tanh);
REGISTER_ACTIVATION_OP_GRAD_MAKER(sqrt);
REGISTER_ACTIVATION_OP_GRAD_MAKER(ceil);
REGISTER_ACTIVATION_OP_GRAD_MAKER(floor);
REGISTER_ACTIVATION_OP_GRAD_MAKER(reciprocal);
REGISTER_ACTIVATION_OP_GRAD_MAKER(relu6);
REGISTER_ACTIVATION_OP_GRAD_MAKER(soft_relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(hard_sigmoid);
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad,
ops::ActivationOpGrad);
REGISTER_OP(logsigmoid, ops::ActivationOp, ops::LogSigmoidOpMaker,
logsigmoid_grad, ops::ActivationOpGrad);
REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad,
ops::ActivationOpGrad);
REGISTER_OP(relu, ops::ActivationWithMKLDNNOp, ops::ReluOpMaker, relu_grad,
ops::ActivationWithMKLDNNOpGrad);
REGISTER_OP(tanh, ops::ActivationWithMKLDNNOp, ops::TanhOpMaker, tanh_grad,
ops::ActivationWithMKLDNNOpGrad);
REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker,
tanh_shrink_grad, ops::ActivationOpGrad);
REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker,
softshrink_grad, ops::ActivationOpGrad);
REGISTER_OP(sqrt, ops::ActivationWithMKLDNNOp, ops::SqrtOpMaker, sqrt_grad,
ops::ActivationWithMKLDNNOpGrad);
REGISTER_OP(abs, ops::ActivationWithMKLDNNOp, ops::AbsOpMaker, abs_grad,
ops::ActivationWithMKLDNNOpGrad);
REGISTER_OP(ceil, ops::ActivationOp, ops::CeilOpMaker, ceil_grad,
ops::ActivationOpGrad);
REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad,
ops::ActivationOpGrad);
REGISTER_OP(cos, ops::ActivationOp, ops::CosOpMaker, cos_grad,
ops::ActivationOpGrad);
REGISTER_OP(sin, ops::ActivationOp, ops::SinOpMaker, sin_grad,
ops::ActivationOpGrad);
REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad,
ops::ActivationOpGrad);
REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker,
reciprocal_grad, ops::ActivationOpGrad);
REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
ops::ActivationOpGrad);
REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
ops::ActivationOpGrad);
REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad,
ops::ActivationOpGrad);
REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad,
ops::ActivationOpGrad);
REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad,
ops::ActivationOpGrad);
REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker,
leaky_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, soft_relu_grad,
ops::ActivationOpGrad);
REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad,
ops::ActivationOpGrad);
REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad,
ops::ActivationOpGrad);
REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad,
ops::ActivationOpGrad);
REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad,
ops::ActivationOpGrad);
REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker,
hard_shrink_grad, ops::ActivationOpGrad);
REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker,
thresholded_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker,
hard_sigmoid_grad, ops::ActivationOpGrad);
REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad,
ops::ActivationOpGrad);
#define REGISTER_ACTIVATION_OP(act_type, op_name) \
REGISTER_OP(act_type, ops::ActivationOp, ops::op_name##OpMaker, \
act_type##_grad, ops::ActivationOpGrad);
#define FOR_EACH_OP_FUNCTOR(__macro) \
__macro(sigmoid, Sigmoid); \
__macro(logsigmoid, LogSigmoid); \
__macro(exp, Exp); \
__macro(tanh, Tanh); \
__macro(softshrink, SoftShrink); \
__macro(sqrt, Sqrt); \
__macro(abs, Abs); \
__macro(ceil, Ceil); \
__macro(floor, Floor); \
__macro(cos, Cos); \
__macro(sin, Sin); \
__macro(round, Round); \
__macro(reciprocal, Reciprocal); \
__macro(log, Log); \
__macro(square, Square); \
__macro(brelu, BRelu); \
__macro(soft_relu, SoftRelu); \
__macro(pow, Pow); \
__macro(stanh, STanh); \
__macro(softplus, Softplus); \
__macro(softsign, Softsign); \
__macro(relu6, Relu6); \
__macro(leaky_relu, LeakyRelu); \
__macro(tanh_shrink, TanhShrink); \
__macro(elu, ELU); \
__macro(hard_shrink, HardShrink); \
__macro(hard_sigmoid, HardSigmoid); \
__macro(swish, Swish); \
__macro(thresholded_relu, ThresholdedRelu);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
......@@ -661,4 +516,5 @@ REGISTER_OP(swish, ops::ActivationOp, ops::SwishOpMaker, swish_grad,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<double>>);
FOR_EACH_OP_FUNCTOR(REGISTER_ACTIVATION_OP);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);
......@@ -10,6 +10,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
......@@ -25,6 +28,16 @@ limitations under the License. */
namespace paddle {
namespace operators {
/* Use ugly global variable, for the using in python layer side
Please refer to the layer_helper.py and get the details.
*/
static std::unordered_set<std::string> InplaceOpSet = {
"sigmoid", "exp", "relu", "tanh", "sqrt", "ceil",
"floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid",
};
static bool IsInplace(std::string op) { return InplaceOpSet.count(op); }
template <typename DeviceContext, typename Functor>
class ActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
......@@ -60,7 +73,6 @@ class ActivationGradKernel
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Out = context.Input<framework::Tensor>("Out");
auto* dOut =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
......@@ -68,7 +80,6 @@ class ActivationGradKernel
dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(*dOut);
auto x = framework::EigenVector<T>::Flatten(*X);
auto out = framework::EigenVector<T>::Flatten(*Out);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto* place =
......@@ -78,7 +89,16 @@ class ActivationGradKernel
for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(*place, x, out, dout, dx);
bool inplace = functor.Inplace();
if (!inplace) {
auto* X = context.Input<framework::Tensor>("X");
auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx);
} else {
VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
}
};
......@@ -89,6 +109,14 @@ struct BaseActivationFunctor {
using AttrPair = std::vector<std::pair<const char*, float*>>;
AttrPair GetAttrs() { return AttrPair(); }
/* NOTE(*): Output reuse X memory if X is not dependented by its Gradient.
For example, sigmoid op's gradient didn't involve x, so its output can
reuse
input memory. But abs op's gradient use x, it can not be inplaced.
gradient did use x.
*/
bool Inplace() const { return false; }
};
// sigmoid(x) = 1 / (1 + exp(-x))
......@@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("exp"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ReluGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("relu"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (x > static_cast<T>(0)).template cast<T>();
dx.device(d) = dout * (out > static_cast<T>(0)).template cast<T>();
}
};
......@@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct TanhGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("tanh"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sqrt"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ZeroGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("ceil"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0) / x;
dx.device(d) = static_cast<T>(0) / out;
}
};
......@@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("reciprocal"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -531,12 +566,14 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
bool Inplace() const { return IsInplace("relu6"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout *
((x > static_cast<T>(0)) * (x < static_cast<T>(threshold)))
.template cast<T>();
dx.device(d) =
dout *
((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>();
}
};
......@@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
bool Inplace() const { return IsInplace("softrelu"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto tmp = static_cast<T>(threshold);
auto temp = ((x > -tmp) * (x < tmp)).template cast<T>().eval();
auto temp = ((out > -tmp) * (out < tmp)).template cast<T>().eval();
dx.device(d) = dout * (static_cast<T>(1) - (-out).exp()) * temp;
}
};
......@@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
bool Inplace() { return IsInplace("hard_sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......
......@@ -33,6 +33,7 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle.
self.back().set_lod(t.lod());
});
m.def("IsInplace",
[](std::string op) -> bool { return operators::IsInplace(op); });
m.def("op_support_gpu", OpSupportGPU);
#ifdef PADDLE_WITH_CUDA
m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
......
......@@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup
import unique_name
from paddle.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr, WeightNormParamAttr
import core
class LayerHelper(object):
......@@ -398,13 +399,16 @@ class LayerHelper(object):
return input_var
if isinstance(act, basestring):
act = {'type': act}
tmp = self.create_tmp_variable(dtype=input_var.dtype)
if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
act_type = act.pop('type')
if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
tmp = input_var
# NOTE(dzhwinter): some activation support inplace compution.
if not core.IsInplace(act_type):
tmp = self.create_tmp_variable(dtype=input_var.dtype)
self.append_op(
type=act_type,
inputs={"X": [input_var]},
......
......@@ -361,10 +361,7 @@ class TestCeil(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
# The same reason with TestFloor
def init_dtype(self):
pass
......@@ -396,10 +393,8 @@ class TestFloor(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
# the gradient on floor, ceil, round is undefined.
# we return zero as gradient, but the numpy return nan
def init_dtype(self):
pass
......@@ -501,11 +496,6 @@ class TestRound(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
def init_dtype(self):
pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册