refine softmax operator.

7d16fe87 · caoying03 · b3afe30d · 7d16fe87 · 7d16fe87 · 7d16fe87
4 changed file
--- a/paddle/operators/scale_op.cc
+++ b/paddle/operators/scale_op.cc
@@ -44,11 +44,12 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
 The equation is: Out = scale*X
 )DOC");
-    AddAttr<AttrType>("scale", "scale of scale operator.").SetDefault(1.0);
+    AddAttr<AttrType>("scale", "The scaling factor of the scale operator.")
+        .SetDefault(1.0);
  }
 };
-// Identity Op's gradient is identity op, too.
+// IdentityOp's gradient is IdentityOp, too.
 // Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out))
 template <typename AttrType>
 class ScaleGradOp : public NetOp {
@@ -65,17 +66,20 @@ class ScaleGradOp : public NetOp {
  }
 };
-// identity is a alias of scale op. This is also a example for creating a alias
+// IdentityOp is an alias of the ScaleOp. This is also an example for creating
-// operator.
+// an alias of an existing operator.
 template <typename AttrType>
 class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  IdentityOpMaker(framework::OpProto *proto,
                  framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "input tensor of identity op");
+    AddInput("X", "The input tensor of identity op.");
-    AddOutput("Out", "output tensor of identity op");
+    AddOutput("Out", "The output tensor of identity op.");
-    AddComment("identity operator. Just a alias of scale op which scale = 1.0");
+    AddComment(R"DOC(
+The identity operator is just an alias of the scale operator with the
+attribute scale is fixed to 1.0.
+)DOC");
  }
 };

--- a/paddle/operators/softmax_op.cc
+++ b/paddle/operators/softmax_op.cc
@@ -23,9 +23,9 @@ class SoftmaxOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL,
+    PADDLE_ENFORCE(ctx.Input<Tensor>("logits")->dims().size() == 2UL,
                   "The input of softmax op must be a matrix.");
-    ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
+    ctx.Output<Tensor>("softmax")->Resize(ctx.Input<Tensor>("logits")->dims());
  }
 };
@@ -34,10 +34,10 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
  SoftmaxOpMaker(framework::OpProto *proto,
                 framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X",
+    AddInput("logits",
             "The input tensor of softmax. "
             "2-D with shape [batch_size, input_feature_dimensions].");
-    AddOutput("Y", "The normalized values with the same shape as X.");
+    AddOutput("softmax", "The normalized values with the same shape as X.");
    AddComment(R"DOC(
 The input of softmax operator is a 2-D tensor with shape N x K (N is the
 batch_size, K is the dimension of input feature). The output tensor has the
@@ -64,14 +64,17 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("softmax"),
-    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
+                            "Input(softmax) should be not null.");
-                            "Input(Y@GRAD) should not be null");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("softmax")),
-    PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
+                            "Input(softmax@GRAD) should be not null.");
-                       ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
+    PADDLE_ENFORCE_EQ(
-                   "the shape of Input(0) and Input(1) should be the same");
+        ctx.Input<Tensor>("softmax")->dims(),
-    ctx.Output<Tensor>(framework::GradVarName("X"))
+        ctx.Input<Tensor>(framework::GradVarName("softmax"))->dims(),
-        ->Resize(ctx.Input<Tensor>("Y")->dims());
+        "Input(softmax) and its gradients should have a same shape.");
+    ctx.Output<Tensor>(framework::GradVarName("logits"))
+        ->Resize(ctx.Input<Tensor>("logits")->dims());
  }
 };

--- a/paddle/operators/softmax_op.h
+++ b/paddle/operators/softmax_op.h
@@ -28,12 +28,12 @@ template <typename Place, typename T>
 class SoftmaxKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    auto input = context.Input<Tensor>("X");
+    auto X = context.Input<Tensor>("logits");
-    auto output = context.Output<Tensor>("Y");
+    auto Y = context.Output<Tensor>("softmax");
-    output->mutable_data<T>(context.GetPlace());
+    Y->mutable_data<T>(context.GetPlace());
-    auto logits = EigenMatrix<T>::From(*input);
+    auto logits = EigenMatrix<T>::From(*X);
-    auto softmax = EigenMatrix<T>::From(*output);
+    auto softmax = EigenMatrix<T>::From(*Y);
    const int kBatchDim = 0;
    const int kClassDim = 1;
@@ -69,9 +69,9 @@ class SoftmaxGradKernel : public framework::OpKernel {
  void Compute(const framework::ExecutionContext& context) const override {
    std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>();
-    auto Y = context.Input<Tensor>("Y");
+    auto Y = context.Input<Tensor>("softmax");
-    auto dY = context.Input<Tensor>(framework::GradVarName("Y"));
+    auto dY = context.Input<Tensor>(framework::GradVarName("softmax"));
-    auto dX = context.Output<Tensor>(framework::GradVarName("X"));
+    auto dX = context.Output<Tensor>(framework::GradVarName("logits"));
    dX->mutable_data<T>(context.GetPlace());
    const int batch_size = Y->dims()[0];

--- a/python/paddle/v2/framework/tests/test_softmax_op.py
+++ b/python/paddle/v2/framework/tests/test_softmax_op.py
@@ -18,18 +18,23 @@ class TestSoftmaxOp(unittest.TestCase):
    def setUp(self):
        self.type = "softmax"
-        self.inputs = {'X': np.random.random((32, 100)).astype("float32")}
+        self.inputs = {"logits": np.random.random((10, 10)).astype("float32")}
        self.outputs = {
-            'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X'])
+            "softmax":
+            np.apply_along_axis(stable_softmax, 1, self.inputs["logits"])
        }
-class SoftmaxGradOpTest(GradientChecker):
+class TestSoftmaxGradOp(GradientChecker):
-    def test_softmax(self):
+    def setUp(self):
-        op = create_op("softmax")
+        self.op = create_op("softmax")
-        inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")}
+        self.inputs = {
-        self.check_grad(op, inputs, set("X"), "Y")
+            "logits": np.random.uniform(0.1, 1, [10, 10]).astype("float32")
+        }
+    def test_softmax_grad(self):
+        self.check_grad(self.op, self.inputs, ["logits"], "softmax")
-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()