diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 6876f5423d9a1cbf1816b8d53874af7988778b05..ca12ec7cb73a37794b4facd8c2e175e9281cf4a2 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -124,7 +124,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)) -paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'name'], varargs=None, keywords=None, defaults=(False, False, None)) +paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)) paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False)) paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/framework/grad_op_desc_maker.h b/paddle/fluid/framework/grad_op_desc_maker.h index b4d3fa25c35fbf25b3d2fdd9fa1045dda0f773ec..9bccb1a32bf63b30351ef4428594691b0eef0b6a 100644 --- a/paddle/fluid/framework/grad_op_desc_maker.h +++ b/paddle/fluid/framework/grad_op_desc_maker.h @@ -129,6 +129,9 @@ class GradOpDescMakerBase { std::string ForwardOpType() const { return this->fwd_op_.Type(); } + protected: + const OpDesc& ForwardOp() const { return fwd_op_; } + private: const OpDesc& fwd_op_; const std::unordered_set& no_grad_set_; diff --git a/paddle/fluid/operators/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise_mul_op.cc index 7cd67e74de6b9c4fbc718f60b4f671ccab2f9956..86a8459a79135d1fbcba6886172acc5a2abdb88b 100644 --- a/paddle/fluid/operators/elementwise_mul_op.cc +++ b/paddle/fluid/operators/elementwise_mul_op.cc @@ -13,9 +13,45 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise_mul_op.h" +#include #include "paddle/fluid/operators/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseMulOpGradDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_mul_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Y", Input("Y")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetAttrMap(Attrs()); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + return op; + } +}; + +class ElementwiseMulOpMaker : public ElementwiseOpMaker { + protected: + virtual std::string GetName() const { return "Mul"; } + virtual std::string GetEquation() const { return "Out = X \\\\odot Y"; } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y"); +REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOp, + ops::ElementwiseMulOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseMulOpGradDescMaker); +REGISTER_OPERATOR(elementwise_mul_grad, ops::ElementwiseOpGrad); + REGISTER_OP_CPU_KERNEL( elementwise_mul, ops::ElementwiseMulKernel, diff --git a/paddle/fluid/operators/elementwise_mul_op.h b/paddle/fluid/operators/elementwise_mul_op.h index 4437da4d95f97b5cbbca1650badf9710c26b4380..b870d08a1a28fd3e678aeb7211f7e3ec8b2c4c65 100644 --- a/paddle/fluid/operators/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise_mul_op.h @@ -93,8 +93,8 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = dout; // out is not necessary auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 7182149164854038bb67a9f06cdbec8a4a0f1fb2..242a1b9ae92ade0caf1b0f1fcb5458b8b7070d84 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -59,7 +59,8 @@ class MatMulKernel : public framework::OpKernel { RowMatrixFromVector(x.dims()), 0, context.Attr("transpose_X")); auto mat_dim_b = math::CreateMatrixDescriptor( ColumnMatrixFromVector(y.dims()), 0, context.Attr("transpose_Y")); - blas.MatMul(x, mat_dim_a, y, mat_dim_b, T(1), out, T(0)); + auto scale = static_cast(context.Attr("alpha")); + blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0)); } }; @@ -185,7 +186,8 @@ class MatMulGradKernel : public framework::OpKernel { auto blas = math::GetBlas(context); auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a); auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b); - blas.MatMul(a, mat_dim_a, b, mat_dim_b, T(1), out, T(0)); + blas.MatMul(a, mat_dim_a, b, mat_dim_b, + static_cast(context.Attr("alpha")), out, T(0)); } void CalcInputGrad(const framework::ExecutionContext &context, @@ -334,6 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { R"DOC(If true, use the transpose of `Y`. )DOC") .SetDefault(false); + AddAttr("alpha", "The scale of Out").SetDefault(1.0f); AddComment(R"DOC( MatMul Operator. diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index 2a8e4af516ce9341772d4668dc993215b4aae24d..363abfb0e0c96e8a4d82124dff168f28e339a9ae 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -156,12 +156,29 @@ class MulGradOp : public framework::OperatorWithKernel { } }; +class MulOpGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr retv(new framework::OpDesc()); + retv->SetType("mul_grad"); + retv->SetInput("X", Input("X")); + retv->SetInput("Y", Input("Y")); + retv->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + retv->SetOutput(framework::GradVarName("X"), InputGrad("X")); + retv->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + retv->SetAttrMap(Attrs()); + return retv; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, - paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGradMaker); REGISTER_OPERATOR(mul_grad, ops::MulGradOp); REGISTER_OP_CPU_KERNEL( mul, ops::MulKernel, diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index c614de2eac143b3a545c60226aefa93dd72dea4f..13be6c65be58314a75124106eb09b1300305baf0 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -52,6 +52,12 @@ $$Out = scale*X$$ )DOC"); AddAttr("scale", "The scaling factor of the scale operator.") .SetDefault(1.0); + AddAttr("bias", "The bias of the scale operator.").SetDefault(0.0); + AddAttr( + "bias_after_scale", + "Apply bias addition after or before scaling. It is useful for " + "numeric stability in some circumstances.") + .SetDefault(true); } }; @@ -80,6 +86,8 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker { grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttr("scale", GetAttr("scale")); + grad_op->SetAttr("bias", 0.0f); + grad_op->SetAttr("bias_after_scale", true); return std::unique_ptr(grad_op); } }; diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h index fe035aba81dd74d21539974beed255275be3013b..d8a199bc2b860515645b4954b49d8eb59fbd02dc 100644 --- a/paddle/fluid/operators/scale_op.h +++ b/paddle/fluid/operators/scale_op.h @@ -34,6 +34,8 @@ class ScaleKernel : public framework::OpKernel { "in and out should have the same dim"); auto scale = static_cast(ctx.Attr("scale")); + auto bias = static_cast(ctx.Attr("bias")); + auto bias_after_scale = ctx.Attr("bias_after_scale"); if (in_var->IsType() && in_var != out_var) { auto& in_slr = in_var->Get(); @@ -45,7 +47,11 @@ class ScaleKernel : public framework::OpKernel { auto eigen_out = framework::EigenVector::Flatten(*out); auto eigen_in = framework::EigenVector::Flatten(*in); auto& dev = *ctx.template device_context().eigen_device(); - eigen_out.device(dev) = scale * eigen_in; + if (bias_after_scale) { + eigen_out.device(dev) = scale * eigen_in + bias; + } else { + eigen_out.device(dev) = scale * (eigen_in + bias); + } } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e8a11e68b124eece029f8e8289069f8f22f43a7b..c7df815175c912d3e0e476a53c370d42cf45e5e0 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -3499,7 +3499,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): return out -def matmul(x, y, transpose_x=False, transpose_y=False, name=None): +def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): """ Applies matrix multiplication to two tensors. @@ -3533,6 +3533,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): y (Variable): The input variable which is a Tensor or LoDTensor. transpose_x (bool): Whether to transpose :math:`x` before multiplication. transpose_y (bool): Whether to transpose :math:`y` before multiplication. + alpha (float): The scale of output. Default 1.0. name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -3600,8 +3601,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): inputs={'X': x, 'Y': y}, outputs={'Out': out}, - attrs={'transpose_X': transpose_x, - 'transpose_Y': transpose_y}) + attrs={ + 'transpose_X': transpose_x, + 'transpose_Y': transpose_y, + 'alpha': alpha, + }) return out