未验证 提交 40cd5271 编写于 作者: Z zyfncg 提交者: GitHub

Generate static graph code for some activation ops by Yaml (part3) (#47640)

* generate static graph code for some activation op

* fix bug

* fix infermeta of selected_rows
上级 2cff0e8a
......@@ -407,6 +407,8 @@ void CompatMetaTensor::share_dims(const MetaTensor& meta_tensor) {
static_cast<const CompatMetaTensor&>(meta_tensor).GetSelectedRows();
selected_rows->set_rows(input_selected_rows.rows());
selected_rows->set_height(input_selected_rows.height());
phi::DenseTensorUtils::GetMutableMeta(selected_rows->mutable_value())
->dims = input_selected_rows.value().dims();
}
}
}
......
......@@ -139,132 +139,6 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
}
};
UNUSED constexpr char SigmoidDoc[] = R"DOC(
Sigmoid Activation
$$out = \frac{1}{1 + e^{-x}}$$
)DOC";
UNUSED constexpr char ReluDoc[] = R"DOC(
Relu Activation Operator.
$$out = \max(x, 0)$$
)DOC";
UNUSED constexpr char TanhShrinkDoc[] = R"DOC(
TanhShrink Activation Operator.
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC";
UNUSED constexpr char SqrtDoc[] = R"DOC(
Sqrt Activation Operator.
$$out=\\sqrt{x}=x^{1/2}$$
**Note**:
input value must be greater than or equal to zero.
)DOC";
UNUSED constexpr char RsqrtDoc[] = R"DOC(
Rsqrt Activation Operator.
Please make sure input is legal in case of numeric errors.
$$out = \\frac{1}{\\sqrt{x}}$$
)DOC";
UNUSED constexpr char LogDoc[] = R"DOC(
Log Activation Operator.
$$out = \ln(x)$$
Natural logarithm of x.
)DOC";
UNUSED constexpr char SquareDoc[] = R"DOC(
The OP square each elements of the inputs.
$$out = x^2$$
)DOC";
UNUSED constexpr char SoftsignDoc[] = R"DOC(
Softsign Activation Operator.
$$out = \\frac{x}{1 + \|x\|}$$
)DOC";
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"A LoDTensor or Tensor representing preactivation values. Must be "
"one of the following types: float32, float64.");
AddOutput(
"Out",
"A LoDTensor or Tensor with the same type and size as that of x.");
AddAttr<float>("alpha", "Slope of the activation function at x < 0.")
.SetDefault(0.02f);
AddComment(R"DOC(
LeakyRelu Activation Operator.
$$out = \max(x, \alpha * x)$$
)DOC");
}
};
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"Input of Softplus operator, an N-D Tensor, with data type "
"float32, float64 or float16.");
AddOutput(
"Out",
"Output of Softplus operator, a Tensor with shape same as input.");
AddAttr<float>("beta", "The value of beta for Softplus.").SetDefault(1.0f);
AddAttr<float>("threshold", "The value of threshold for Softplus.")
.SetDefault(20.0f);
AddComment(R"DOC(
:strong:`Softplus Activation Operator`
.. math::
out = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) \\
\text{For numerical stability, the implementation reverts to the linear function when :}\,x \times \beta > threshold.
)DOC");
}
};
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "Input of Softshrink operator");
AddOutput("Out", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
AddComment(R"DOC(
:strong:`Softshrink Activation Operator`
.. math::
out = \begin{cases}
x - \lambda, \text{if } x > \lambda \\
x + \lambda, \text{if } x < -\lambda \\
0, \text{otherwise}
\end{cases}
)DOC");
}
};
class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
......@@ -303,66 +177,6 @@ $$out = \ln(1 + \exp(\max(\min(x, threshold), -threshold)))$$
}
};
class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"The input is a multi-dimensional Tensor. The data type is "
"float32 or float64.");
AddOutput("Out",
"The output is a multi-dimensional Tensor which has same "
"dimension and data type as the ``x``.");
AddAttr<float>("alpha", "The alpha value of ELU").SetDefault(1.0f);
AddComment(R"DOC(
ELU Activation Operator.
Applies the following element-wise computation on the input according to
https://arxiv.org/abs/1511.07289.
$$out = \max(0, x) + \min(0, \alpha * (e^x - 1))$$
)DOC");
}
};
template <typename T>
class ELUGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("elu_grad");
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
op->SetInput("Out", this->Output("Out"));
op->SetInput("X", this->Input("X"));
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
op->SetAttrMap(this->Attrs());
}
};
class CELUOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"The input is a multi-dimensional Tensor. The data type is "
"float32 or float64.");
AddOutput("Out",
"The output is a multi-dimensional Tensor which has same "
"dimension and data type as the ``x``.");
AddAttr<float>("alpha", "The alpha value of CELU").SetDefault(1.0f);
AddComment(R"DOC(
CELU Activation Operator.
Applies the following element-wise computation on the input according to
https://arxiv.org/abs/1704.07483.
$$out = \max(0, x) + \min(0, \alpha * (e^(x/\alpha) - 1))$$
)DOC");
}
};
class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
......@@ -424,27 +238,6 @@ $$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
}
};
class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Out", "Output of ThresholdedRelu operator");
AddAttr<float>("threshold",
"The threshold location of activation. [default 1.0].")
.SetDefault(1.0f);
AddComment(R"DOC(
:strong:`ThresholdedRelu activation operator`
.. math::
out = \begin{cases}
x, \text{if } x > threshold \\
0, \text{otherwise}
\end{cases}
)DOC");
}
};
class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
......@@ -512,15 +305,6 @@ It is recommended to use the defaults for this activation.
}
};
REGISTER_ACTIVATION_OP_MAKER(Sigmoid, SigmoidDoc);
REGISTER_ACTIVATION_OP_MAKER(Relu, ReluDoc);
REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc);
REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc);
REGISTER_ACTIVATION_OP_MAKER(Rsqrt, RsqrtDoc);
REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
template <ActBwdOpFwdDeps kDepValue>
class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
public:
......@@ -632,229 +416,6 @@ class ActivationOpTripleGrad : public framework::OperatorWithKernel {
}
};
template <typename T>
class SigmoidDoubleGradMaker
: public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("sigmoid_grad_grad");
// input1: Out
op->SetInput("Out", this->Input("Out"));
// input2: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
op->SetAttrMap(this->Attrs());
// output: ddy
op->SetOutput("DOutNew", this->InputGrad("Out"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
template <typename T>
class SigmoidTripleGradMaker
: public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("sigmoid_triple_grad");
// Out, DDX, DOut, D_DDOut, D_DOut_New // input
// D_OutNew, D_DOut, D_DDx // output
// input1: Out
op->SetInput("Out", this->Input("Out"));
// input2: ddx
op->SetInput("DDX", this->Input("DDX"));
// input3: dout
op->SetInput("DOut", this->Input("DOut"));
// input4: d_ddout
op->SetInput("D_DDOut", this->OutputGrad("DDOut"));
// input5: d_dout_new
op->SetInput("D_DOut_New", this->OutputGrad("DOutNew"));
op->SetAttrMap(this->Attrs());
// output: d_dOut, d_OutNew, d_ddx
op->SetOutput("D_OutNew", this->InputGrad("Out"));
op->SetOutput("D_DOut", this->InputGrad("DOut"));
op->SetOutput("D_DDx", this->InputGrad("DDX"));
}
};
// ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0
template <typename T>
class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("relu_grad_grad");
// input1: Out
op->SetInput("Out", this->Input("Out"));
// input2: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
// output: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// leaky_relu Grad: dx=dy if x>=0 else alpha * dy
// leaky_relu GradGrad: ddy=ddx if x>=0 else alpha * ddx
template <typename T>
class LeakyReluDoubleGradMaker
: public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("leaky_relu_grad_grad");
// input1: X
op->SetInput("X", this->Input("X"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
// Out@GRAD@GRAD: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// elu grad: dx=dy if y>0 else alpha*dy*x.exp()
// elu gradgrad: ddx=ddy if y>0 else alpha*ddy*x.exp()
template <typename T>
class ELUDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("elu_grad_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
// Out@GRAD@GRAD: ddy
op->SetOutput("DX", this->InputGrad("X"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// celu grad: dx=dy if y>0 else dy*(x/alpha).exp()
// celu gradgrad: ddx=ddy if y>0 else ddy*(x/alpha).exp()/alpha
template <typename T>
class CELUDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("celu_grad_grad");
op->SetInput("X", this->Input("X"));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
// Out@GRAD@GRAD: ddy
op->SetOutput("DX", this->InputGrad("X"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// sqrt Grad: dx = 0.5 * dy / y
// sqrt GradGrad: ddy = 0.5 * ddx / y, dy = -1 * dx * ddx
template <typename T>
class SqrtDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("sqrt_grad_grad");
op->SetInput("Out", this->Input("Out"));
op->SetInput("DX", this->Output(framework::GradVarName("X")));
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
op->SetOutput("DOut", this->InputGrad("Out"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// rsqrt Grad: dx = -0.5 * dy * y * y * y
// rsqrt GradGrad: ddy = -0.5 * ddx * y * y * y, dy = (3/y) * ddx
template <typename T>
class RsqrtDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("rsqrt_grad_grad");
op->SetInput("Out", this->Input("Out"));
op->SetInput("DX", this->Output(framework::GradVarName("X")));
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
op->SetOutput("DOut", this->InputGrad("Out"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// square Grad: dx=2x*dy
// square GradGrad: ddy=2x*ddx, dx=2dy*ddx
template <typename T>
class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("square_grad_grad");
op->SetInput("X", this->Input("X"));
// Out@GRAD: dy
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
// X@GRAD: dx
op->SetOutput("DX", this->InputGrad("X"));
// Out@GRAD@GRAD: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// log Grad: dx = dout / x
// log Grad Grad: ddout = ddx / x; dx = -(dout / x) * (ddx / x)
template <typename T>
class LogDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("log_grad_grad");
op->SetInput("X", this->Input("X"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
op->SetAttrMap(this->Attrs());
// X@GRAD: dx
op->SetOutput("DX", this->InputGrad("X"));
// Out@GRAD@GRAD: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer,
{framework::GradVarName("Out"), // dout
framework::GradVarName("X")}); // dx
......@@ -971,27 +532,7 @@ FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
REGISTER_ACTIVATION_OP(brelu, BRelu, BReluFunctor, BReluGradFunctor);
REGISTER_ACTIVATION_OP(thresholded_relu,
ThresholdedRelu,
ThresholdedReluFunctor,
ThresholdedReluGradFunctor);
REGISTER_ACTIVATION_OP(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
REGISTER_ACTIVATION_OP(softshrink,
SoftShrink,
SoftShrinkFunctor,
SoftShrinkGradFunctor);
REGISTER_ACTIVATION_OP(tanh_shrink,
TanhShrink,
TanhShrinkFunctor,
TanhShrinkGradFunctor);
REGISTER_ACTIVATION_OP(softsign,
Softsign,
SoftsignFunctor,
SoftsignGradFunctor);
REGISTER_ACTIVATION_OP(softplus,
Softplus,
SoftplusFunctor,
SoftplusGradFunctor);
REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor);
REGISTER_ACTIVATION_OP(stanh, STanh, STanhFunctor, STanhGradFunctor);
REGISTER_ACTIVATION_OP(hard_swish,
......@@ -1000,205 +541,6 @@ REGISTER_ACTIVATION_OP(hard_swish,
HardSwishGradFunctor);
REGISTER_ACTIVATION_OP(swish, Swish, SwishFunctor, SwishGradFunctor);
/* ========================== sigmoid register =============================
*/
// 1. Register Sigmoid Operator
REGISTER_OPERATOR(
sigmoid,
ops::ActivationOp,
ops::SigmoidOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::SigmoidGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::SigmoidGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
std::conditional<ops::CanInplaceAct<ops::SigmoidGradFunctor<float>>(),
ops::ActFwdInplaceInferer,
void>::type);
// 2. Register Sigmoid Grad Operator
REGISTER_OPERATOR(sigmoid_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::SigmoidDoubleGradMaker<paddle::framework::OpDesc>,
ops::SigmoidDoubleGradMaker<paddle::imperative::OpBase>);
// 3. Register Sigmoid DoubleGrad Operator
REGISTER_OPERATOR(
sigmoid_grad_grad,
ops::ActivationOpDoubleGrad<ops::SigmoidGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer,
ops::SigmoidTripleGradMaker<paddle::framework::OpDesc>,
ops::SigmoidTripleGradMaker<paddle::imperative::OpBase>);
// 4. Register Sigmoid TripleGrad Operator
REGISTER_OPERATOR(sigmoid_triple_grad,
ops::ActivationOpTripleGrad<
ops::SigmoidTripleGradFunctor<float>::FwdDeps()>,
ops::ActivationTripleGradOpInplaceInferer);
/* ========================================================================== */
/* ========================== relu register ============================= */
REGISTER_OPERATOR(
relu,
ops::ActivationOp,
ops::ReluOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::ReluGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::ReluGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(relu_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::ReluDoubleGradMaker<paddle::framework::OpDesc>,
ops::ReluDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
relu_grad_grad,
ops::ActivationOpDoubleGrad2<ops::ReluGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* ======================== leaky relu register ============================ */
REGISTER_OPERATOR(
leaky_relu,
ops::ActivationOp,
ops::LeakyReluOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::LeakyReluGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::LeakyReluGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(leaky_relu_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::LeakyReluDoubleGradMaker<paddle::framework::OpDesc>,
ops::LeakyReluDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
leaky_relu_grad_grad,
ops::ActivationOpDoubleGrad2<ops::LeakyReluGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* ======================== elu register ============================ */
REGISTER_OPERATOR(elu,
ops::ActivationOp,
ops::ELUOpMaker,
ops::ActivationOpInferVarType,
ops::ELUGradOpMaker<paddle::framework::OpDesc>,
ops::ELUGradOpMaker<paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(elu_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::ELUDoubleGradMaker<paddle::framework::OpDesc>,
ops::ELUDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
elu_grad_grad,
ops::ActivationOpDoubleGrad<ops::ELUGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* ======================== celu register ============================
*/
REGISTER_OPERATOR(
celu,
ops::ActivationOp,
ops::CELUOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::CELUGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::CELUGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(celu_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::CELUDoubleGradMaker<paddle::framework::OpDesc>,
ops::CELUDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
celu_grad_grad,
ops::ActivationOpDoubleGrad<ops::CELUGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* =========================== sqrt register ============================= */
REGISTER_OPERATOR(
sqrt,
ops::ActivationOp,
ops::SqrtOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::SqrtGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::SqrtGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(sqrt_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::SqrtDoubleGradMaker<paddle::framework::OpDesc>,
ops::SqrtDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
sqrt_grad_grad,
ops::ActivationOpDoubleGrad<ops::SqrtGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* =========================== rsqrt register =============================
*/
REGISTER_OPERATOR(
rsqrt,
ops::ActivationOp,
ops::RsqrtOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::RsqrtGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::RsqrtGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(rsqrt_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::RsqrtDoubleGradMaker<paddle::framework::OpDesc>,
ops::RsqrtDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
rsqrt_grad_grad,
ops::ActivationOpDoubleGrad<ops::RsqrtGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* ========================== square register ============================ */
REGISTER_OPERATOR(
square,
ops::ActivationOp,
ops::SquareOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::SquareGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::SquareGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(square_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::SquareDoubleGradMaker<paddle::framework::OpDesc>,
ops::SquareDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
square_grad_grad,
ops::ActivationOpDoubleGrad<ops::SquareGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================================================================== */
/* ========================== pow register ============================ */
REGISTER_OPERATOR(
......@@ -1216,28 +558,6 @@ REGISTER_OPERATOR(pow_grad,
ops::ActivationGradOpInplaceInferer);
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR(
log,
ops::ActivationOp,
ops::LogOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::LogGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::LogGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(log_grad,
ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::LogDoubleGradMaker<paddle::framework::OpDesc>,
ops::LogDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
log_grad_grad,
ops::ActivationOpDoubleGrad<ops::LogGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint(
......
......@@ -266,57 +266,19 @@ using BReluFunctor = phi::funcs::HardTanhFunctor<T>;
template <typename T>
using BReluGradFunctor = phi::funcs::HardTanhGradFunctor<T>;
USE_PHI_FUNCTOR(Cos)
USE_PHI_FUNCTOR(Tan)
USE_PHI_FUNCTOR(Acos)
USE_PHI_FUNCTOR(Sin)
USE_PHI_FUNCTOR(Asin)
USE_PHI_FUNCTOR(Atan)
USE_PHI_FUNCTOR(Sinh)
USE_PHI_FUNCTOR(Cosh)
USE_PHI_FUNCTOR(Asinh)
USE_PHI_FUNCTOR(Acosh)
USE_PHI_FUNCTOR(Atanh)
USE_PHI_FUNCTOR(Tanh)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Tanh)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Tanh)
USE_PHI_FUNCTOR(ThresholdedRelu)
USE_PHI_FUNCTOR(Relu6)
USE_PHI_FUNCTOR(LeakyRelu)
USE_PHI_DOUBLE_GRAD_FUNCTOR(LeakyRelu)
USE_PHI_FUNCTOR(HardShrink)
USE_PHI_FUNCTOR(SoftShrink)
USE_PHI_FUNCTOR(TanhShrink)
USE_PHI_FUNCTOR(Silu)
USE_PHI_FUNCTOR(ELU)
USE_PHI_DOUBLE_GRAD_FUNCTOR(ELU)
USE_PHI_FUNCTOR(Softsign)
USE_PHI_FUNCTOR(Sigmoid)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Sigmoid)
USE_PHI_TRIPLE_GRAD_FUNCTOR(Sigmoid)
USE_PHI_FUNCTOR(LogSigmoid)
USE_PHI_FUNCTOR(HardSigmoid)
USE_PHI_FUNCTOR(Log)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Log)
USE_PHI_FUNCTOR(Log2)
USE_PHI_FUNCTOR(Log10)
USE_PHI_FUNCTOR(Log1p)
USE_PHI_FUNCTOR(Swish)
USE_PHI_FUNCTOR(HardSwish)
USE_PHI_FUNCTOR(Pow)
USE_PHI_FUNCTOR(Expm1)
USE_PHI_FUNCTOR(Mish)
USE_PHI_FUNCTOR(STanh)
USE_PHI_FUNCTOR(Reciprocal)
USE_PHI_FUNCTOR(Square)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Square)
USE_PHI_FUNCTOR(Sqrt)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Sqrt)
USE_PHI_FUNCTOR(Rsqrt)
USE_PHI_DOUBLE_GRAD_FUNCTOR(Rsqrt)
USE_PHI_FUNCTOR(Softplus)
USE_PHI_FUNCTOR(CELU)
USE_PHI_DOUBLE_GRAD_FUNCTOR(CELU)
template <typename T>
using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor<T>;
......@@ -386,40 +348,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
}
};
template <typename DeviceContext, typename T>
class ELUGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<phi::DenseTensor>("X");
auto* Out = context.Input<phi::DenseTensor>("Out");
auto* dOut = context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
auto* dX = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
const float alpha = context.Attr<float>("alpha");
dX->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "elu_grad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Input", "Out", "elu_grad"));
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Input", "dOut", "elu_grad"));
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "dX", "elu_grad"));
auto* place =
context.template device_context<DeviceContext>().eigen_device();
if (alpha > 0) {
ELUGradFunctor<T> functor;
functor.alpha = alpha;
functor(*place, x, out, dout, dx);
} else {
ELUGradNegativeAlphaFunctor<T> functor;
functor.alpha = alpha;
functor(*place, x, out, dout, dx);
}
}
};
template <typename T>
struct AbsGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
......
......@@ -8,6 +8,7 @@ cc_test_old(
shape_op
crop_op
activation_op
generated_op
pooling
transpose_op
scope
......
......@@ -117,6 +117,29 @@
func : ceil_grad
inplace : (out_grad -> x_grad)
- backward_op : celu_double_grad
forward : celu_grad(Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : celu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : celu_grad
forward : celu(Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu_grad
backward : celu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : cholesky_grad
forward : cholesky (Tensor x, bool upper) -> Tensor(out)
args : (Tensor out, Tensor out_grad, bool upper)
......@@ -236,6 +259,29 @@
func : dot_grad
data_type : out_grad
- backward_op : elu_double_grad
forward : elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : elu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : elu_grad
forward : elu (Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu_grad
backward : elu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : erf_grad
forward : erf (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
......@@ -350,6 +396,29 @@
func : hard_sigmoid_grad
inplace : (out_grad -> x_grad)
- backward_op : leaky_relu_double_grad
forward : leaky_relu_grad (Tensor x, Tensor grad_out, float negative_slope) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, float negative_slope)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_x_grad]
kernel :
func : leaky_relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : leaky_relu_grad
forward : leaky_relu (Tensor x, float negative_slope) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float negative_slope)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu_grad
backward : leaky_relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : lgamma_grad
forward : lgamma(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
......@@ -393,6 +462,29 @@
func : log2_grad
inplace : (out_grad -> x_grad)
- backward_op : log_double_grad
forward : log_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : log_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : log_grad
forward : log (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : log_grad
backward : log_double_grad
inplace : (out_grad -> x_grad)
- backward_op : logit_grad
forward : logit (Tensor x, float eps = 1e-6f) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float eps)
......@@ -445,6 +537,29 @@
func : reciprocal_grad
inplace : (out_grad -> x_grad)
- backward_op : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : relu_grad
forward : relu (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_grad
backward: relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : round_grad
forward : round(Tensor x) -> Tensor(out)
args : (Tensor out_grad)
......@@ -456,6 +571,29 @@
func : round_grad
inplace : (out_grad -> x_grad)
- backward_op : rsqrt_double_grad
forward : rsqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : rsqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : rsqrt_grad
forward : rsqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : rsqrt_grad
backward : rsqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : send_uv_grad
forward : send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") -> Tensor(out)
args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str message_op = "ADD")
......@@ -467,6 +605,42 @@
func : send_uv_grad
data_type : x
- backward_op : sigmoid_double_grad
forward : sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, fwd_grad_out]
kernel :
func : sigmoid_double_grad
backward : sigmoid_triple_grad
inplace : (grad_x_grad -> fwd_grad_out_grad)
- backward_op : sigmoid_grad
forward : sigmoid (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sigmoid_grad
backward : sigmoid_double_grad
inplace : (out_grad -> x_grad)
- backward_op : sigmoid_triple_grad
forward : sigmoid_double_grad (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x) -> Tensor(grad_out), Tensor(grad_grad_out)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x, Tensor grad_out_grad, Tensor grad_grad_out_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad), Tensor(grad_grad_x_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [out, fwd_grad_out, grad_grad_x]
kernel :
func : sigmoid_triple_grad
optional : grad_grad_out_grad
inplace : (grad_grad_x -> fwd_grad_out_grad)
- backward_op : silu_grad
forward : silu (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
......@@ -512,6 +686,39 @@
func : sinh_grad
inplace : (out_grad -> x_grad)
- backward_op : softplus_grad
forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float beta, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus_grad
inplace : (out_grad -> x_grad)
- backward_op : softshrink_grad
forward : softshrink (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softshrink_grad
inplace : (out_grad -> x_grad)
- backward_op : softsign_grad
forward : softsign (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign_grad
inplace : (out_grad -> x_grad)
- backward_op : solve_grad
forward : solve (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
......@@ -522,6 +729,52 @@
kernel :
func : solve_grad
- backward_op : sqrt_double_grad
forward : sqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : sqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : sqrt_grad
forward : sqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sqrt_grad
backward : sqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : square_double_grad
forward : square_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : square_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : square_grad
forward : square (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : square_grad
backward : square_double_grad
inplace : (out_grad -> x_grad)
- backward_op : tan_grad
forward : tan (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
......@@ -557,6 +810,17 @@
backward : tanh_double_grad
inplace : (out_grad -> x_grad)
- backward_op : tanh_shrink_grad
forward : tanh_shrink (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : tanh_shrink_grad
inplace : (out_grad -> x_grad)
- backward_op : tanh_triple_grad
forward : tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad)
args : (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad)
......@@ -568,6 +832,17 @@
func : tanh_triple_grad
inplace : (grad_x_grad_forward -> grad_out_forward_grad)
- backward_op : thresholded_relu_grad
forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu_grad
inplace : (out_grad -> x_grad)
- backward_op : trace_grad
forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2)
......
......@@ -75,6 +75,11 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict):
else:
return names[0].strip(), names[1].split(')')[0].strip()
def update_api_attr_name(attrs, attrs_alias_map):
for attr_item in attrs:
if attr_item['name'] in attrs_alias_map:
attr_item['name'] = attrs_alias_map[attr_item['name']]
for api_args in api_op_map:
api_name, op_name = get_api_and_op_name(api_args['op'])
if api_name not in forward_api_dict:
......@@ -99,6 +104,13 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict):
double_grad_item = backward_api_dict[double_grad_api_name]
backward_api_item['backward'] = double_grad_op_name
double_grad_item['op_name'] = double_grad_op_name
if 'attrs' in api_args:
update_api_attr_name(
double_grad_item['attrs'], api_args['attrs']
)
update_api_attr_name(
double_grad_item['forward']['attrs'], api_args['attrs']
)
# for triple grad
if len(backward_op_list) > 2:
......@@ -109,6 +121,14 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict):
triple_grad_item = backward_api_dict[triple_grad_api_name]
double_grad_item['backward'] = triple_grad_op_name
triple_grad_item['op_name'] = triple_grad_op_name
if 'attrs' in api_args:
update_api_attr_name(
triple_grad_item['attrs'], api_args['attrs']
)
update_api_attr_name(
triple_grad_item['forward']['attrs'],
api_args['attrs'],
)
key_set = ['inputs', 'attrs', 'outputs']
args_map = {}
......
......@@ -459,7 +459,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker<T>
Input({{name_in_forward_orig | to_opmaker_name}})
{%- elif name in output_names %}
{% set name_in_forward_orig = output_orig_names[output_names.index(name)]%}
Output({{name | to_opmaker_name}})
Output({{name_in_forward_orig | to_opmaker_name}})
{%- elif name.endswith("_grad") %}{# output grad#}
{% set name_in_forward = name[:-5] %}
{% if name_in_forward in output_names %}
......
......@@ -217,29 +217,6 @@
invoke : cast (out_grad, x.dtype())
no_need_buffer : x
- backward_op : celu_double_grad
forward : celu_grad(Tensor x, Tensor grad_out, float alpha) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : celu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : celu_grad
forward : celu(Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu_grad
backward : celu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : clip_double_grad
forward : clip_grad (Tensor x, Tensor grad_out, Scalar min = 0., Scalar max = 0.) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, Scalar min = 0., Scalar max = 0.)
......@@ -552,29 +529,6 @@
kernel :
func : elementwise_pow_grad
- backward_op : elu_double_grad
forward : elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : elu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : elu_grad
forward : elu (Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, float alpha)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu_grad
backward : elu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : embedding_grad
forward : embedding (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false) -> Tensor(out)
args : (Tensor x, Tensor weight, Tensor out_grad, int64_t padding_idx=-1, bool sparse=false)
......@@ -940,29 +894,6 @@
no_need_buffer : bias
optional : scale, bias
- backward_op : leaky_relu_double_grad
forward : leaky_relu_grad (Tensor x, Tensor grad_out, float negative_slope) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, float negative_slope)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_x_grad]
kernel :
func : leaky_relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : leaky_relu_grad
forward : leaky_relu (Tensor x, float negative_slope) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float negative_slope)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu_grad
backward : leaky_relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : lerp_grad
forward : lerp (Tensor x, Tensor y, Tensor weight) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor weight, Tensor out, Tensor out_grad)
......@@ -985,29 +916,6 @@
func : linear_interp_grad
data_type : output_grad
- backward_op : log_double_grad
forward : log_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : log_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : log_grad
forward : log (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : log_grad
backward : log_double_grad
inplace : (out_grad -> x_grad)
- backward_op : log_loss_grad
forward : log_loss (Tensor input, Tensor label, float epsilon) -> Tensor(out)
args : (Tensor input, Tensor label, Tensor out_grad, float epsilon)
......@@ -1537,29 +1445,6 @@
func : relu6_grad
inplace : (out_grad -> x_grad)
- backward_op : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : relu_grad
forward : relu (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : relu_grad
backward: relu_double_grad
inplace : (out_grad -> x_grad)
- backward_op : renorm_grad
forward : renorm (Tensor x, float p, int axis, float max_norm) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float p, int axis, float max_norm)
......@@ -1683,29 +1568,6 @@
data_type : x
no_need_buffer : x
- backward_op : rsqrt_double_grad
forward : rsqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : rsqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : rsqrt_grad
forward : rsqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : rsqrt_grad
backward : rsqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : scale_grad
forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out)
args : (Tensor out_grad, Scalar scale=1.0, bool bias_after_scale=true)
......@@ -1791,42 +1653,6 @@
func : sigmoid_cross_entropy_with_logits_grad
inplace : (out_grad -> x_grad)
- backward_op : sigmoid_double_grad
forward : sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, fwd_grad_out]
kernel :
func : sigmoid_double_grad
backward : sigmoid_triple_grad
inplace : (grad_x_grad -> fwd_grad_out_grad)
- backward_op : sigmoid_grad
forward : sigmoid (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sigmoid_grad
backward : sigmoid_double_grad
inplace : (out_grad -> x_grad)
- backward_op : sigmoid_triple_grad
forward : sigmoid_double_grad (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x) -> Tensor(grad_out), Tensor(grad_grad_out)
args : (Tensor out, Tensor fwd_grad_out, Tensor grad_grad_x, Tensor grad_out_grad, Tensor grad_grad_out_grad)
output : Tensor(out_grad), Tensor(fwd_grad_out_grad), Tensor(grad_grad_x_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [out, fwd_grad_out, grad_grad_x]
kernel :
func : sigmoid_triple_grad
optional : grad_grad_out_grad
inplace : (grad_grad_x -> fwd_grad_out_grad)
- backward_op : sign_grad
forward : sign (Tensor x) -> Tensor(out)
args : (Tensor out_grad)
......@@ -1872,39 +1698,6 @@
func : softmax_grad
use_gpudnn : true
- backward_op : softplus_grad
forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float beta, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus_grad
inplace : (out_grad -> x_grad)
- backward_op : softshrink_grad
forward : softshrink (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : soft_shrink_grad
inplace : (out_grad -> x_grad)
- backward_op : softsign_grad
forward : softsign (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign_grad
inplace : (out_grad -> x_grad)
- backward_op : spectral_norm_grad
forward : spectral_norm (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps) -> Tensor(out)
args : (Tensor weight, Tensor u, Tensor v, Tensor out_grad, int dim, int power_iters, float eps)
......@@ -1927,52 +1720,6 @@
output : Tensor(x_grad)
invoke : concat( out_grad, axis)
- backward_op : sqrt_double_grad
forward : sqrt_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : sqrt_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : sqrt_grad
forward : sqrt (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out]
kernel :
func : sqrt_grad
backward : sqrt_double_grad
inplace : (out_grad -> x_grad)
- backward_op : square_double_grad
forward : square_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : square_double_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_op : square_grad
forward : square (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : square_grad
backward : square_double_grad
inplace : (out_grad -> x_grad)
- backward_op : squared_l2_norm_grad
forward : squared_l2_norm(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
......@@ -2112,17 +1859,6 @@
kernel :
func : take_along_axis_grad
- backward_op : tanh_shrink_grad
forward : tanh_shrink (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : tanh_shrink_grad
inplace : (out_grad -> x_grad)
- backward_op : temporal_shift_grad
forward : temporal_shift(Tensor x, int seg_num, float shift_ratio, str data_format_str) -> Tensor(out)
args : (Tensor out_grad, int seg_num, float shift_ratio, str data_format_str)
......@@ -2133,17 +1869,6 @@
kernel :
func : temporal_shift_grad
- backward_op : thresholded_relu_grad
forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu_grad
inplace : (out_grad -> x_grad)
- backward_op : tile_double_grad
forward : tile_grad (Tensor x, Tensor grad_out, IntArray repeat_times) -> Tensor(grad_x)
args : (Tensor grad_x_grad, IntArray repeat_times)
......
......@@ -357,16 +357,6 @@
data_type : x
backward : cast_grad
- op : celu
args : (Tensor x, float alpha)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu
backward : celu_grad
- op : check_finite_and_unscale_
args : (Tensor[] x, Tensor scale, Tensor input_found_infinite)
output : Tensor[](out){x.size()}, Tensor(output_found_infinite)
......@@ -664,17 +654,6 @@
func : elementwise_pow
backward : elementwise_pow_grad
- op : elu
args : (Tensor x, float alpha)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu
inplace : (x -> out)
backward : elu_grad
- op : embedding
args : (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false)
output : Tensor
......@@ -1241,16 +1220,6 @@
backward : layer_norm_grad
optional : scale, bias
- op : leaky_relu
args : (Tensor x, float negative_slope)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu
backward : leaky_relu_grad
- op : lerp
args : (Tensor x, Tensor y, Tensor weight)
output : Tensor(out)
......@@ -1300,15 +1269,6 @@
data_type : dtype
backend : place
- op : log
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : log
backward: log_grad
- op : log_loss
args : (Tensor input, Tensor label, float epsilon)
output : Tensor
......@@ -1910,16 +1870,6 @@
func : real
backward : real_grad
- op : relu
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : relu
inplace : (x -> out)
backward : relu_grad
- op : relu6
args : (Tensor x, float threshold)
output : Tensor
......@@ -2032,16 +1982,6 @@
func : roll
backward : roll_grad
- op : rsqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : rsqrt
inplace : (x -> out)
backward : rsqrt_grad
- op : scale
args : (Tensor x, Scalar scale, float bias, bool bias_after_scale)
output : Tensor(out)
......@@ -2160,15 +2100,6 @@
kernel :
func : shard_index
- op : sigmoid
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : sigmoid
backward : sigmoid_grad
- op : sigmoid_cross_entropy_with_logits
args : (Tensor x, Tensor label, bool normalize, int ignore_index)
output : Tensor
......@@ -2216,36 +2147,6 @@
inplace : (x -> out)
backward : softmax_grad
- op : softplus
args : (Tensor x, float beta, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus
backward : softplus_grad
- op : softshrink
args : (Tensor x, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : soft_shrink
backward : softshrink_grad
- op : softsign
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign
backward : softsign_grad
- op : spectral_norm
args : (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps)
output : Tensor
......@@ -2274,25 +2175,6 @@
func : split_with_num
backward : split_with_num_grad
- op : sqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : sqrt
inplace : (x -> out)
backward : sqrt_grad
- op : square
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : square
backward : square_grad
- op : squared_l2_norm
args : (Tensor x)
output : Tensor
......@@ -2394,15 +2276,6 @@
data_type : arr
backward : take_along_axis_grad
- op : tanh_shrink
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : tanh_shrink
backward : tanh_shrink_grad
- op : temporal_shift
args : (Tensor x, int seg_num, float shift_ratio, str data_format_str)
output : Tensor
......@@ -2412,16 +2285,6 @@
func : temporal_shift
backward : temporal_shift_grad
- op : thresholded_relu
args : (Tensor x, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu
backward : thresholded_relu_grad
- op : tile
args : (Tensor x, IntArray repeat_times)
output : Tensor
......
......@@ -131,6 +131,13 @@
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : celu
backward : celu_grad, celu_double_grad(celu_grad_grad)
inputs :
x : X
outputs :
out : Out
- op : cholesky
inputs :
x : X
......@@ -316,7 +323,11 @@
bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f]
- op : elu
backward : elu_grad
backward : elu_grad, elu_double_grad (elu_grad_grad)
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false]
......@@ -504,7 +515,13 @@
attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", bool is_test = false]
- op : leaky_relu
backward : leaky_relu_grad
backward : leaky_relu_grad, leaky_relu_double_grad (leaky_relu_grad_grad)
inputs :
x : X
outputs :
out : Out
attrs:
negative_slope : alpha
extra :
attrs : [bool use_mkldnn = false]
......@@ -520,7 +537,11 @@
attrs : [bool use_mkldnn = false]
- op : log
backward : log_grad
backward : log_grad, log_double_grad (log_grad_grad)
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
......@@ -725,7 +746,11 @@
attrs : [bool use_mkldnn = false]
- op : relu
backward : relu_grad
backward : relu_grad, relu_double_grad (relu_grad_grad)
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
......@@ -754,7 +779,11 @@
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : rsqrt
backward : rsqrt_grad
backward : rsqrt_grad, rsqrt_double_grad (rsqrt_grad_grad)
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
......@@ -784,7 +813,11 @@
attrs : [bool use_mkldnn = false]
- op : sigmoid
backward : sigmoid_grad
backward : sigmoid_grad, sigmoid_double_grad (sigmoid_grad_grad), sigmoid_triple_grad
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
......@@ -827,12 +860,29 @@
- op : softplus
backward : softplus_grad
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false, str fuse_activation_type = "", float fuse_activation_alpha = 0.0f,
float fuse_activation_beta = 0.0f, float fuse_activation_scale = 1.0f]
- op : softshrink
backward : softshrink_grad
inputs :
x : X
outputs :
out : Out
attrs :
threshold : lambda
- op : softsign
backward : softsign_grad
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
......@@ -843,12 +893,20 @@
out : Out
- op : sqrt
backward : sqrt_grad
backward : sqrt_grad, sqrt_double_grad (sqrt_grad_grad)
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : square
backward : square_grad
backward : square_grad, square_double_grad (square_grad_grad)
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
......@@ -903,9 +961,19 @@
- op : tanh_shrink
backward : tanh_shrink_grad
inputs :
x : X
outputs :
out : Out
extra :
attrs : [bool use_mkldnn = false, bool use_cudnn = false]
- op : thresholded_relu
inputs :
x : X
outputs :
out : Out
- op : trace
inputs :
x : Input
......
......@@ -106,6 +106,16 @@
inplace : (x -> out)
backward : ceil_grad
- op : celu
args : (Tensor x, float alpha = 1.0)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param: [x]
kernel :
func : celu
backward : celu_grad
- op : cholesky
args : (Tensor x, bool upper=false)
output : Tensor
......@@ -207,6 +217,17 @@
data_type : x
backward : dot_grad
- op : elu
args : (Tensor x, float alpha = 1.0f)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : elu
inplace : (x -> out)
backward : elu_grad
- op : erf
args : (Tensor x)
output : Tensor
......@@ -312,6 +333,16 @@
func : hard_sigmoid
backward : hardsigmoid_grad
- op : leaky_relu
args : (Tensor x, float negative_slope = 0.02f)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : leaky_relu
backward : leaky_relu_grad
- op : lgamma
args : (Tensor x)
output : Tensor(out)
......@@ -321,6 +352,15 @@
func : lgamma
backward : lgamma_grad
- op : log
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : log
backward: log_grad
- op : log10
args : (Tensor x)
output : Tensor
......@@ -395,6 +435,16 @@
inplace : (x -> out)
backward : reciprocal_grad
- op : relu
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : relu
inplace : (x -> out)
backward : relu_grad
- op : round
args : (Tensor x)
output : Tensor(out)
......@@ -405,6 +455,16 @@
inplace : (x -> out)
backward : round_grad
- op : rsqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : rsqrt
inplace : (x -> out)
backward : rsqrt_grad
- op : send_uv
args : (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD")
output : Tensor(out)
......@@ -415,6 +475,15 @@
data_type : x
backward : send_uv_grad
- op : sigmoid
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : sigmoid
backward : sigmoid_grad
- op : silu
args : (Tensor x)
output : Tensor
......@@ -442,6 +511,36 @@
func : sinh
backward : sinh_grad
- op : softplus
args : (Tensor x, float beta = 1.0, float threshold = 20.0f)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus
backward : softplus_grad
- op : softshrink
args : (Tensor x, float threshold = 0.5)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softshrink
backward : softshrink_grad
- op : softsign
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softsign
backward : softsign_grad
- op : solve
args : (Tensor x, Tensor y)
output : Tensor
......@@ -452,6 +551,27 @@
data_type : x
backward : solve_grad
- op : sqrt
args : (Tensor x)
output : Tensor(out)
infer_meta :
func : UnchangedInferMeta
kernel :
func : sqrt {dense -> dense},
sqrt_sr {selected_rows -> selected_rows}
inplace : (x -> out)
backward : sqrt_grad
- op : square
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : square {dense -> dense},
square_sr {selected_rows -> selected_rows}
backward : square_grad
- op : tan
args : (Tensor x)
output : Tensor
......@@ -471,6 +591,25 @@
inplace : (x -> out)
backward : tanh_grad
- op : tanh_shrink
args : (Tensor x)
output : Tensor
infer_meta :
func : UnchangedInferMeta
kernel :
func : tanh_shrink
backward : tanh_shrink_grad
- op : thresholded_relu
args : (Tensor x, float threshold = 1.0)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : thresholded_relu
backward : thresholded_relu_grad
- op : trace
args : (Tensor x, int offset = 0, int axis1 = 0, int axis2 = 1)
output : Tensor
......
......@@ -268,7 +268,7 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(leaky_relu_grad, LeakyReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
ThresholdedReluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel)
......
......@@ -151,7 +151,7 @@ PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(relu6, Relu6Kernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel)
......
......@@ -373,7 +373,7 @@ PD_REGISTER_KERNEL(exp_grad,
int64_t,
phi::dtype::float16) {}
PD_REGISTER_ACTIVATION_GRAD_KERNEL(soft_shrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(silu_grad, SiluGradKernel)
......
......@@ -242,7 +242,7 @@ PD_REGISTER_KERNEL(square,
phi::dtype::bfloat16) {}
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(soft_shrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel)
......
......@@ -39,19 +39,8 @@ namespace phi {
#define comma ,
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Square, "square", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardTanh, "hard_tanh", "t_min" comma "t_max");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LeakyRelu, "leaky_relu", "alpha");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(ThresholdedRelu,
"thresholded_relu",
"threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(SoftShrink, "soft_shrink", "lambda");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Mish, "mish", "threshold");
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(TanhShrink, "tanh_shrink", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softsign, "softsign", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log, "log", ); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Celu, "celu", "alpha"); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardSwish,
"hard_swish",
"threshold" comma "scale" comma
......@@ -62,106 +51,8 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(STanh,
"stanh",
"scale_a" comma "scale_b"); // NOLINT
DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus,
"softplus",
"beta" comma "threshold"); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT
DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu6, "relu6", "threshold"); // NOLINT
KernelSignature SqrtActiOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("sqrt", {"X"}, {}, {"Out"});
} else {
return KernelSignature("sqrt_sr", {"X"}, {}, {"Out"});
}
}
KernelSignature SquareActiOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("X")) {
return KernelSignature("square", {"X"}, {}, {"Out"});
} else {
return KernelSignature("square_sr", {"X"}, {}, {"Out"});
}
}
KernelSignature ReluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("relu_double_grad", {"Out", "DDX"}, {}, {"DDOut"});
}
KernelSignature SigmoidDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"sigmoid_double_grad", {"Out", "DOut", "DDX"}, {}, {"DOutNew", "DDOut"});
}
KernelSignature SigmoidTripleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("sigmoid_triple_grad",
{"Out", "DOut", "DDX", "D_DOut_New", "D_DDOut"},
{},
{"D_OutNew", "D_DOut", "D_DDx"});
}
KernelSignature LeakyReluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"leaky_relu_double_grad", {"X", "DDX"}, {"alpha"}, {"DDOut"});
}
KernelSignature LeakyReluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("leaky_relu", {"X"}, {"alpha"}, {"Out"});
}
KernelSignature EluOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("elu", {"X"}, {"alpha"}, {"Out"});
}
KernelSignature EluGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature(
"elu_grad", {"X", "Out", "Out@GRAD"}, {"alpha"}, {"X@GRAD"});
}
KernelSignature EluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"elu_double_grad", {"X", "DOut", "DDX"}, {"alpha"}, {"DX", "DDOut"});
}
KernelSignature LogDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"log_double_grad", {"X", "DOut", "DDX"}, {}, {"DX", "DDOut"});
}
KernelSignature SqrtDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"sqrt_double_grad", {"Out", "DX", "DDX"}, {}, {"DOut", "DDOut"});
}
KernelSignature RsqrtDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"rsqrt_double_grad", {"Out", "DX", "DDX"}, {}, {"DOut", "DDOut"});
}
KernelSignature CeluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"celu_double_grad", {"X", "DOut", "DDX"}, {"alpha"}, {"DX", "DDOut"});
}
KernelSignature SquareDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"square_double_grad", {"X", "DOut", "DDX"}, {}, {"DX", "DDOut"});
}
KernelSignature PowOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.HasInput("FactorTensor")) {
return KernelSignature("pow", {"X"}, {"FactorTensor"}, {"Out"});
......@@ -182,70 +73,17 @@ KernelSignature PowGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
} // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(relu_grad_grad, relu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(leaky_relu_grad_grad, leaky_relu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(softshrink, soft_shrink);
PD_REGISTER_BASE_KERNEL_NAME(softshrink_grad, soft_shrink_grad);
PD_REGISTER_BASE_KERNEL_NAME(elu_grad_grad, elu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(sigmoid_grad_grad, sigmoid_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(log_grad_grad, log_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(sqrt_grad_grad, sqrt_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(rsqrt_grad_grad, rsqrt_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(celu_grad_grad, celu_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(square_grad_grad, square_double_grad);
PD_REGISTER_BASE_KERNEL_NAME(brelu, hard_tanh);
PD_REGISTER_BASE_KERNEL_NAME(brelu_grad, hard_tanh_grad);
PD_REGISTER_ARG_MAPPING_FN(relu_grad, phi::ReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square_grad, phi::SquareGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt_grad, phi::SqrtGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt_grad_grad,
phi::SqrtDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(rsqrt_grad, phi::RsqrtGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(rsqrt_grad_grad,
phi::RsqrtDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(mish_grad, phi::MishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(stanh_grad, phi::STanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softplus_grad, phi::SoftplusGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu_grad_grad,
phi::ReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(brelu_grad, phi::HardTanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(leaky_relu, phi::LeakyReluOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad,
phi::LeakyReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad_grad,
phi::LeakyReluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(thresholded_relu_grad,
phi::ThresholdedReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu6_grad, phi::Relu6GradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softshrink_grad,
phi::SoftShrinkGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(tanh_shrink_grad,
phi::TanhShrinkGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elu, phi::EluOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elu_grad, phi::EluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(elu_grad_grad, phi::EluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softsign_grad, phi::SoftsignGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sigmoid_grad, phi::SigmoidGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sigmoid_grad_grad,
phi::SigmoidDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sigmoid_triple_grad,
phi::SigmoidTripleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log_grad, phi::LogGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(log_grad_grad, phi::LogDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sqrt, phi::SqrtActiOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square, phi::SquareActiOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(hard_swish_grad,
phi::HardSwishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(swish_grad, phi::SwishGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(pow_grad, phi::PowGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(pow, phi::PowOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(celu_grad, phi::CeluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(celu_grad_grad,
phi::CeluDoubleGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(square_grad_grad,
phi::SquareDoubleGradOpArgumentMapping);
......@@ -29,7 +29,6 @@ __deprecated_func_name__ = {
}
__activations_noattr__ = [
'sigmoid',
'silu',
'logsigmoid',
'tanh_shrink',
......@@ -38,12 +37,7 @@ __activations_noattr__ = [
'tanh',
]
__unary_func__ = [
'sqrt',
'rsqrt',
'abs',
'square',
]
__unary_func__ = ['abs']
__inplace_unary_func__ = [
'exp_',
......@@ -85,23 +79,6 @@ for _OP in set(__inplace_unary_func__):
_func = generate_inplace_fn(_OP)
globals()[_OP] = _func
add_sample_code(
globals()["sigmoid"],
r"""
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = F.sigmoid(x)
print(out)
# [0.40131234 0.450166 0.52497919 0.57444252]
""",
)
add_sample_code(
globals()["silu"],
r"""
......@@ -163,38 +140,6 @@ Examples:
""",
)
add_sample_code(
globals()["sqrt"],
r"""
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.sqrt(x)
print(out)
# [0.31622777 0.4472136 0.54772256 0.63245553]
""",
)
add_sample_code(
globals()["rsqrt"],
r"""
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.rsqrt(x)
print(out)
# [3.16227766 2.23606798 1.82574186 1.58113883]
""",
)
add_sample_code(
globals()["abs"],
r"""
......@@ -211,22 +156,6 @@ Examples:
""",
)
add_sample_code(
globals()["square"],
r"""
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = paddle.square(x)
print(out)
# [0.16 0.04 0.01 0.09]
""",
)
add_sample_code(
globals()["softplus"],
r"""
......@@ -812,6 +741,85 @@ def round(x, name=None):
return out
def rsqrt(x, name=None):
"""
Rsqrt Activation Operator.
Please make sure input is legal in case of numeric errors.
.. math::
out = \\frac{1}{\\sqrt{x}}
Args:
x (Tensor): Input of Rsqrt operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Rsqrt operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.rsqrt(x)
print(out)
# [3.16227766 2.23606798 1.82574186 1.58113883]
"""
if in_dygraph_mode():
return _C_ops.rsqrt(x)
if _in_legacy_dygraph():
return _legacy_C_ops.rsqrt(x)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'rsqrt')
helper = LayerHelper('rsqrt', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='rsqrt', inputs={"X": x}, outputs={"Out": out})
return out
def sigmoid(x, name=None):
"""
Sigmoid Activation.
.. math::
out = \\frac{1}{1 + e^{-x}}
Args:
x (Tensor): Input of Sigmoid operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Sigmoid operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = F.sigmoid(x)
print(out)
# [0.40131234 0.450166 0.52497919 0.57444252]
"""
if in_dygraph_mode():
return _C_ops.sigmoid(x)
if _in_legacy_dygraph():
return _legacy_C_ops.sigmoid(x)
check_variable_and_dtype(
x, 'x', ['float16', 'float32', 'float64'], 'sigmoid'
)
helper = LayerHelper('sigmoid', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='sigmoid', inputs={"X": x}, outputs={"Out": out})
return out
def sin(x, name=None):
"""
Sine Activation Operator.
......@@ -886,6 +894,91 @@ def sinh(x, name=None):
return out
def sqrt(x, name=None):
"""
Sqrt Activation Operator.
.. math::
out=\\sqrt{x}=x^{1/2}
Args:
x (Tensor): Input of Sqrt operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Sqrt operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4])
out = paddle.sqrt(x)
print(out)
# [0.31622777 0.4472136 0.54772256 0.63245553]
"""
if in_dygraph_mode():
return _C_ops.sqrt(x)
if _in_legacy_dygraph():
return _legacy_C_ops.sqrt(x)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'sqrt')
helper = LayerHelper('sqrt', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='sqrt', inputs={"X": x}, outputs={"Out": out})
return out
def square(x, name=None):
"""
Square each elements of the inputs.
.. math::
out = x^2
Args:
x (Tensor): Input of Square operator, an N-D Tensor, with data type float32, float64 or float16.
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor. Output of Square operator, a Tensor with shape same as input.
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
out = paddle.square(x)
print(out)
# [0.16 0.04 0.01 0.09]
"""
if in_dygraph_mode():
return _C_ops.square(x)
if _in_legacy_dygraph():
return _legacy_C_ops.square(x)
check_variable_and_dtype(
x,
'x',
[
'int32',
'int64',
'float16',
'float32',
'float64',
'complex64',
'complex128',
],
'square',
)
helper = LayerHelper('square', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type='square', inputs={"X": x}, outputs={"Out": out})
return out
def tan(x, name=None):
"""
Tangent Operator. Computes tangent of x element-wise.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册