From 7d16fe87a30f1909cb9a8934f2a72e2064103e80 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 6 Sep 2017 16:05:29 +0800 Subject: [PATCH] refine softmax operator. --- paddle/operators/scale_op.cc | 18 ++++++++----- paddle/operators/softmax_op.cc | 27 ++++++++++--------- paddle/operators/softmax_op.h | 16 +++++------ .../v2/framework/tests/test_softmax_op.py | 21 +++++++++------ 4 files changed, 47 insertions(+), 35 deletions(-) diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 8e96a74c94..c2e005444b 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -44,11 +44,12 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { The equation is: Out = scale*X )DOC"); - AddAttr("scale", "scale of scale operator.").SetDefault(1.0); + AddAttr("scale", "The scaling factor of the scale operator.") + .SetDefault(1.0); } }; -// Identity Op's gradient is identity op, too. +// IdentityOp's gradient is IdentityOp, too. // Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out)) template class ScaleGradOp : public NetOp { @@ -65,17 +66,20 @@ class ScaleGradOp : public NetOp { } }; -// identity is a alias of scale op. This is also a example for creating a alias -// operator. +// IdentityOp is an alias of the ScaleOp. This is also an example for creating +// an alias of an existing operator. template class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { public: IdentityOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "input tensor of identity op"); - AddOutput("Out", "output tensor of identity op"); - AddComment("identity operator. Just a alias of scale op which scale = 1.0"); + AddInput("X", "The input tensor of identity op."); + AddOutput("Out", "The output tensor of identity op."); + AddComment(R"DOC( +The identity operator is just an alias of the scale operator with the +attribute scale is fixed to 1.0. +)DOC"); } }; diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 7d062ad67c..f3aea6df7c 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -23,9 +23,9 @@ class SoftmaxOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, + PADDLE_ENFORCE(ctx.Input("logits")->dims().size() == 2UL, "The input of softmax op must be a matrix."); - ctx.Output("Y")->Resize(ctx.Input("X")->dims()); + ctx.Output("softmax")->Resize(ctx.Input("logits")->dims()); } }; @@ -34,10 +34,10 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { SoftmaxOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", + AddInput("logits", "The input tensor of softmax. " "2-D with shape [batch_size, input_feature_dimensions]."); - AddOutput("Y", "The normalized values with the same shape as X."); + AddOutput("softmax", "The normalized values with the same shape as X."); AddComment(R"DOC( The input of softmax operator is a 2-D tensor with shape N x K (N is the batch_size, K is the dimension of input feature). The output tensor has the @@ -64,14 +64,17 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), - "Input(Y@GRAD) should not be null"); - PADDLE_ENFORCE(ctx.Input("Y")->dims() == - ctx.Input(framework::GradVarName("Y"))->dims(), - "the shape of Input(0) and Input(1) should be the same"); - ctx.Output(framework::GradVarName("X")) - ->Resize(ctx.Input("Y")->dims()); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("softmax"), + "Input(softmax) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("softmax")), + "Input(softmax@GRAD) should be not null."); + PADDLE_ENFORCE_EQ( + ctx.Input("softmax")->dims(), + ctx.Input(framework::GradVarName("softmax"))->dims(), + "Input(softmax) and its gradients should have a same shape."); + + ctx.Output(framework::GradVarName("logits")) + ->Resize(ctx.Input("logits")->dims()); } }; diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 4fa6b59540..57e5bfad02 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -28,12 +28,12 @@ template class SoftmaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto input = context.Input("X"); - auto output = context.Output("Y"); - output->mutable_data(context.GetPlace()); + auto X = context.Input("logits"); + auto Y = context.Output("softmax"); + Y->mutable_data(context.GetPlace()); - auto logits = EigenMatrix::From(*input); - auto softmax = EigenMatrix::From(*output); + auto logits = EigenMatrix::From(*X); + auto softmax = EigenMatrix::From(*Y); const int kBatchDim = 0; const int kClassDim = 1; @@ -69,9 +69,9 @@ class SoftmaxGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { std::shared_ptr scale_ = std::make_shared(); - auto Y = context.Input("Y"); - auto dY = context.Input(framework::GradVarName("Y")); - auto dX = context.Output(framework::GradVarName("X")); + auto Y = context.Input("softmax"); + auto dY = context.Input(framework::GradVarName("softmax")); + auto dX = context.Output(framework::GradVarName("logits")); dX->mutable_data(context.GetPlace()); const int batch_size = Y->dims()[0]; diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index e670d93653..d7279df116 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -18,18 +18,23 @@ class TestSoftmaxOp(unittest.TestCase): def setUp(self): self.type = "softmax" - self.inputs = {'X': np.random.random((32, 100)).astype("float32")} + self.inputs = {"logits": np.random.random((10, 10)).astype("float32")} self.outputs = { - 'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) + "softmax": + np.apply_along_axis(stable_softmax, 1, self.inputs["logits"]) } -class SoftmaxGradOpTest(GradientChecker): - def test_softmax(self): - op = create_op("softmax") - inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} - self.check_grad(op, inputs, set("X"), "Y") +class TestSoftmaxGradOp(GradientChecker): + def setUp(self): + self.op = create_op("softmax") + self.inputs = { + "logits": np.random.uniform(0.1, 1, [10, 10]).astype("float32") + } + + def test_softmax_grad(self): + self.check_grad(self.op, self.inputs, ["logits"], "softmax") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() -- GitLab