diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc
index 57d8d3b2e5f1baa349deae0f83ad9f03f7b50f7b..05ed603e1a39d5ac3cc57592f2848f155b2d71fe 100644
--- a/paddle/framework/pybind.cc
+++ b/paddle/framework/pybind.cc
@@ -60,7 +60,12 @@ void ExposeOperator(ClassType &m) {
                -> std::unordered_map<std::string, std::vector<std::string>> {
                  return op.outputs_;
                })
-      .def("__str__", &ClassType::type::DebugString);
+      .def("inputs",
+           [](const typename ClassType::type &op) { return op.inputs_; })
+      .def("__str__", &ClassType::type::DebugString)
+      .def("no_intermediate_outputs", [](const typename ClassType::type &op) {
+        return op.OutputVars(false);
+      });
 }
 
 static size_t UniqueIntegerGenerator() {
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index 015e832e82560bb8b3518cbdf605c705d77cdd99..501cf6110ff745b8a6022b463bc9cc3a70145c60 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -53,15 +53,18 @@ def get_numeric_gradient(op,
         tensor.set(input_values[var_name], core.CPUPlace())
 
     # Create all output variable in local_scope
-    for output in op.outputs():
-        if local_scope.find_var(output) is None:
-            local_scope.new_var(output).get_tensor()
-
+    opts = op.outputs()
+    for key in opts:
+        for output in opts[key]:
+            if local_scope.find_var(output) is None:
+                local_scope.new_var(output).get_tensor()
     op.infer_shape(local_scope)
 
     # allocate output memory
-    for output in op.outputs():
-        local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace())
+    for key in opts:
+        for output in opts[key]:
+            local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace(
+            ))
 
     # TODO(yuyang18): Only CPU is support now.
     cpu_ctx = core.DeviceContext.create(core.CPUPlace())
@@ -150,19 +153,24 @@ class GradientChecker(unittest.TestCase):
         if no_grad_set is None:
             no_grad_set = set()
 
-        tmp_outs = forward_op.temp_outputs()
-        no_tmp_out = filter(lambda name: name not in tmp_outs,
-                            forward_op.outputs())
+        no_tmp_out = forward_op.no_intermediate_outputs()
         if len(no_tmp_out) != 1:
             raise ValueError("non temp out_names should be 1")
 
-        in_names = forward_op.inputs()
+        inputs = forward_op.inputs()
+        in_names = [item for k in inputs for item in inputs[k]]
+        outputs = forward_op.outputs()
+        out_names = [item for k in outputs for item in outputs[k]]
+
         for no_grad in no_grad_set:
             if no_grad not in in_names:
                 raise ValueError("no_grad should be in in_names")
 
         backward_op = core.Operator.backward(forward_op, no_grad_set)
 
+        bwd_outputs = backward_op.outputs()
+        bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]]
+
         places = [core.CPUPlace()]
         if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
             places.append(core.GPUPlace(0))
@@ -188,7 +196,7 @@ class GradientChecker(unittest.TestCase):
                 var.set(value, place)
 
             # create output var
-            for out_name in forward_op.outputs():
+            for out_name in out_names:
                 scope.new_var(out_name).get_tensor()
 
             # infer the shape of output var and compute/set value of output var
@@ -198,7 +206,7 @@ class GradientChecker(unittest.TestCase):
             # create output grad var
             # set shape as the output var
             # set value of this grad to ones
-            for name in forward_op.outputs():
+            for name in out_names:
                 out_tensor = scope.find_var(name).get_tensor()
                 grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
                 grad_tensor.set_dims(out_tensor.shape())
@@ -206,7 +214,7 @@ class GradientChecker(unittest.TestCase):
                 grad_tensor.set(data, place)
 
             # create input grad var
-            for name in backward_op.outputs():
+            for name in bwd_out_names:
                 scope.new_var(name).get_tensor()
 
             # infer the shape of input gradient var and compute/set it's value
diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
index fe89bf8e2cd008112e4011b0514f361336b4d6cc..4815192e255c6e0429db3f50918a76a773b30131 100644
--- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
@@ -21,17 +21,18 @@ class TestCrossEntropy(unittest.TestCase):
         self.outputs = {'Y': numpy.array(Y).astype("float32")}
 
 
-# class CrossEntropyGradOpTest(GradientChecker):
-#     def test_softmax_grad(self):
-#         op = create_op("onehot_cross_entropy")
-#         batch_size = 100
-#         class_num = 10
-#         inputs = {
-#             "X": numpy.random.uniform(
-#                 0.1, 1.0, [batch_size, class_num]).astype("float32"),
-#             "label": (class_num / 2) * numpy.ones(batch_size).astype("int32")
-#         }
-#         self.check_grad(op, inputs, set("X"), "Y")
+class CrossEntropyGradOpTest(GradientChecker):
+    def test_softmax_grad(self):
+        op = create_op("onehot_cross_entropy")
+        batch_size = 100
+        class_num = 10
+        inputs = {
+            "X": numpy.random.uniform(
+                0.1, 1.0, [batch_size, class_num]).astype("float32"),
+            "label": (class_num / 2) * numpy.ones(batch_size).astype("int32")
+        }
+        self.check_grad(op, inputs, set("X"), "Y")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py
index cc7f09e7155f5b1afa47fc4133b71ae3676b7436..b42cadd11ab75abbc35763c8d12e8c27e995f0dc 100644
--- a/python/paddle/v2/framework/tests/test_net.py
+++ b/python/paddle/v2/framework/tests/test_net.py
@@ -25,12 +25,12 @@ class TestNet(unittest.TestCase):
         net.complete_add_op(True)
 
         expected = '''
-Op(plain_net), inputs:(W, X, Y), outputs:(Out, fc.out, pre_activation).
-    Op(add_two), inputs:(X, Y), outputs:(Out).
-    Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation).
-        Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation).
-            Op(mul), inputs:(X, W), outputs:(pre_activation).
-            Op(sigmoid), inputs:(pre_activation), outputs:(fc.out).
+Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}.
+    Op(add_two), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}.
+    Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
+        Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
+            Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}.
+            Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Y[fc.out]}.
 '''
         self.assertEqual(expected, "\n" + str(net))
 
diff --git a/python/paddle/v2/framework/tests/test_protobuf.py b/python/paddle/v2/framework/tests/test_protobuf.py
index 69e98e2f250a9df23b25e7e2043af29f87c996a0..848a396b3b6eec57d500b464780b64f339b09e94 100644
--- a/python/paddle/v2/framework/tests/test_protobuf.py
+++ b/python/paddle/v2/framework/tests/test_protobuf.py
@@ -1,11 +1,10 @@
-import paddle.v2.framework.proto.op_proto_pb2 as op_proto_lib
-import paddle.v2.framework.proto.attribute_pb2 as attr_type_lib
+import paddle.v2.framework.proto.framework_pb2 as framework_pb2
 import unittest
 
 
 class TestFrameworkProto(unittest.TestCase):
     def test_all(self):
-        op_proto = op_proto_lib.OpProto()
+        op_proto = framework_pb2.OpProto()
         ipt0 = op_proto.inputs.add()
         ipt0.name = "a"
         ipt0.comment = "the input of cosine op"
@@ -19,7 +18,7 @@ class TestFrameworkProto(unittest.TestCase):
         attr = op_proto.attrs.add()
         attr.name = "scale"
         attr.comment = "scale of cosine op"
-        attr.type = attr_type_lib.FLOAT
+        attr.type = framework_pb2.FLOAT
         op_proto.type = "cos"
         self.assertTrue(op_proto.IsInitialized())
 
diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py
index 3c6b229f9438682375de5dbad4bff5c5f73ab427..e670d93653e07d35e5019c9daac45c214eddf367 100644
--- a/python/paddle/v2/framework/tests/test_softmax_op.py
+++ b/python/paddle/v2/framework/tests/test_softmax_op.py
@@ -24,11 +24,12 @@ class TestSoftmaxOp(unittest.TestCase):
         }
 
 
-# class SoftmaxGradOpTest(GradientChecker):
-#     def test_softmax(self):
-#         op = create_op("softmax")
-#         inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")}
-#         self.check_grad(op, inputs, set("X"), "Y")
+class SoftmaxGradOpTest(GradientChecker):
+    def test_softmax(self):
+        op = create_op("softmax")
+        inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")}
+        self.check_grad(op, inputs, set("X"), "Y")
+
 
 if __name__ == '__main__':
     unittest.main()