diff --git a/paddle/operators/softmax_with_cost_op.cc b/paddle/operators/softmax_with_cost_op.cc
deleted file mode 100644
index a4537691a03810bec5099425a13a23e9163227ba..0000000000000000000000000000000000000000
--- a/paddle/operators/softmax_with_cost_op.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-:A
-limitations under the License. */
-
-#include "paddle/operators/softmax_op.h"
-
-namespace paddle {
-namespace operators {
-
-class SoftmaxWithLossOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto logits = ctx.Input<Tensor>("logits");
-    PADDLE_ENFORCE(logits->dims().size() == 2UL,
-                   "The input of softmax_with_loss_op should be a 2-d tensor.");
-    PADDLE_ENFORCE(ctx.Input<Tensor>("lables")->dims().size() == 1UL,
-                   "The label should be a 1-d tensor.");
-    ctx.Output<Tensor>("loss")->Resize({logits->dims()[0]});
-  }
-};
-
-class SoftmaxWithLossOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  SoftmaxWithLossOpMaker(framework::OpProto *proto,
-                         framework::OpAttrChecker *op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("logits",
-             "The unscaled log probabilities which is a 2-D tensor<float> with"
-             "shape [N x K]. N is the batch_size, and K is the class number.");
-    AddInput("label", "The ground truth. A 1-D tensor<int> with shape N.");
-    AddOutput("loss", "A 1-D tensor<float> with shape N.");
-    AddComment(R"DOC(
-Cross entropy loss with softmax are used as the output layer extensively. This
-operator computes the softmax normalized values for each row of the input
-tensor, after which cross-entropy loss is then computed. This provides a more
-numerically stable gradient.
-
-Because this operators performs a softmax on logits internally, it expects
-unscaled logits. Please do not call this op with the output of softmax operator,
-which will produce incorrect results.
-
-This operators expects mutually exclusive hard labels, each sample in a batch
-is in exactly one class with probabilities 1. Each sample in the batch with one
-and only one label.
-)DOC");
-  }
-};
-
-class SoftmaxWithLossOpGrad : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(const framework::InferShapeContext &ctx) const override {}
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP(softmax, ops::SoftmaxWithLossOp, ops::SoftmaxWithLossOpMaker,
-            softmax_grad, ops::SoftmaxWithLossOpGrad);
-REGISTER_OP_CPU_KERNEL(
-    softmax, ops::SoftmaxWithLossKernel<paddle::platform::CPUPlace, float>);
-REGISTER_OP_CPU_KERNEL(
-    softmax_grad,
-    ops::SoftmaxWithLossGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2edf00766ed35a6db326610399d90092942a20ec
--- /dev/null
+++ b/paddle/operators/softmax_with_cross_entropy_op.cc
@@ -0,0 +1,102 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/softmax_with_cross_entropy_op.h"
+
+namespace paddle {
+namespace operators {
+
+class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto logits = ctx.Input<Tensor>("logits");
+    PADDLE_ENFORCE(
+        logits->dims().size() == 2UL,
+        "The input of softmax_with_cross_entropy should be a 2-d tensor.");
+    PADDLE_ENFORCE(ctx.Input<Tensor>("lables")->dims().size() == 1UL,
+                   "The label should be a 1-d tensor.");
+    ctx.Output<Tensor>("Y")->Resize({logits->dims()[0]});
+  }
+};
+
+class SoftmaxWithCrossEntropyOpMaker
+    : public framework::OpProtoAndCheckerMaker {
+ public:
+  SoftmaxWithCrossEntropyOpMaker(framework::OpProto *proto,
+                                 framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("logits",
+             "The unscaled log probabilities which is a 2-D tensor<float> with"
+             "shape [N x K]. N is the batch_size, and K is the class number.");
+    AddInput("label", "The ground truth. A 1-D tensor<int> with shape N.");
+    AddOutput("Y", "A 1-D tensor<float> with shape N.");
+    AddComment(R"DOC(
+Cross entropy loss with softmax are used as the output layer extensively. This
+operator computes the softmax normalized values for each row of the input
+tensor, after which cross-entropy loss is then computed. This provides a more
+numerically stable gradient.
+
+Because this operators performs a softmax on logits internally, it expects
+unscaled logits. Please do not call this op with the output of softmax operator,
+which will produce incorrect results.
+
+This operators expects mutually exclusive hard labels, each sample in a batch
+is in exactly one class with probabilities 1. Each sample in the batch with one
+and only one label.
+)DOC");
+  }
+};
+
+class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should be not null.");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
+                            "Input(Y@GRAD) should be not null.");
+    PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("Y")->dims(),
+                      ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
+                      "Input(Y) and its gradients should have a same shape.");
+
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("labels"),
+                            "Input(lables) should be not null.");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("logits")),
+                            "Input(logits@GRAD) should be not null.");
+    PADDLE_ENFORCE_EQ(
+        ctx.Input<Tensor>("logits")->dims(),
+        ctx.Input<Tensor>(framework::GradVarName("logits"))->dims(),
+        "Input(logits) and its gradients should have a same shape.");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp,
+            ops::SoftmaxWithCrossEntropyOpMaker,
+            softmax_with_cross_entropy_grad,
+            ops::SoftmaxWithCrossEntropyOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    softmax_with_cross_entropy,
+    ops::SoftmaxWithCrossEntropyKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    softmax_with_cross_entropy_grad,
+    ops::SoftmaxWithCrossEntropyGradKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c9d47cc4aae0e3cc1e9f40da33daf95336ac754f
--- /dev/null
+++ b/paddle/operators/softmax_with_cross_entropy_op.cu
@@ -0,0 +1,25 @@
+/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "softmax_with_cross_entropy_op.h"
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_GPU_KERNEL(
+    softmax_with_cross_entropy,
+    ops::SoftmaxWithCrossEntropyKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    softmax_with_cross_entropy_grad,
+    ops::SoftmaxWithCrossEntropyGradKernel<paddle::platform::GPUPlace, float>);
diff --git a/paddle/operators/softmax_with_cost_op.h b/paddle/operators/softmax_with_cross_entropy_op.h
similarity index 51%
rename from paddle/operators/softmax_with_cost_op.h
rename to paddle/operators/softmax_with_cross_entropy_op.h
index fb544842b70e063e9b484ca73a24454b26a25a48..418fb540b8af1ce3e80b7789db91e68eb7b1b158 100644
--- a/paddle/operators/softmax_with_cost_op.h
+++ b/paddle/operators/softmax_with_cross_entropy_op.h
@@ -1,16 +1,16 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+   http://www.apache.org/licenses/LICENSE-2.0
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
 
 #pragma once
 #include "paddle/framework/eigen.h"
@@ -25,13 +25,13 @@ template <typename T, int MajorType = Eigen::RowMajor,
 using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
 
 template <typename Place, typename T>
-class SoftmaxWithLossKernel : public framework::OpKernel {
+class SoftmaxWithCrossEntropyKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {}
 };
 
 template <typename Place, typename T>
-class SoftmaxWithLossGradKernel : public framework::OpKernel {
+class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel {
  public:
   void Compute(const framework::ExecutionContext& context) const override {}
 };
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 7d363b3108c932c0d47564f73434a461e1d2c988..a059cd0b8189d38f29b8cefc519bd8af10945cf2 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -37,7 +37,7 @@ USE_OP(mul);
 USE_OP(mean);
 USE_OP(sigmoid);
 USE_OP(softmax);
-USE_OP(softmax_with_loss);
+USE_OP(softmax_with_cross_entropy);
 USE_OP(rowwise_add);
 USE_OP(fill_zeros_like);
 USE_NO_KERNEL_OP(recurrent);