diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 61cfdb80af04ede49621bc680fa0fa733a21a2a3..a4089ba3ca08bed5702a66ed370da52ecd9b58c6 100644
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -43,14 +43,13 @@ if(APPLE)
         list(REMOVE_ITEM TEST_OPS test_desc_clone)
         list(REMOVE_ITEM TEST_OPS test_program_code)
     endif(NOT WITH_DISTRIBUTE)
-    message(WARNING "These tests has been disabled in OSX before being fixed: \n test_gradient_clip \n test_fuse_elewise_add_act_pass \n test_detection_map_op \n test_dist_se_resnext")
+    message(WARNING "These tests has been disabled in OSX before being fixed:\n test_fuse_elewise_add_act_pass \n test_detection_map_op \n test_dist_se_resnext")
     # this op is not support on mac
     list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op)
     # TODO: add the unitest back when it fixed
     list(REMOVE_ITEM TEST_OPS test_detection_map_op)
     list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
     list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass)
-    list(REMOVE_ITEM TEST_OPS test_gradient_clip)
 endif()
 if(NOT WITH_MKLML)
     # this op is not support on openblas
diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
index e4b3168ba6636253055f546fb3eec8a536714209..e49239da6d3918211fbbc302d2c56818460b6d51 100644
--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
@@ -20,9 +20,6 @@ import paddle
 import paddle.fluid.core as core
 import paddle.fluid as fluid
 
-BATCH_SIZE = 128
-CLIP = 1
-
 
 def bow_net(data,
             label,
@@ -64,6 +61,8 @@ class TestGradientClip(unittest.TestCase):
         return places
 
     def check_operators(self, place):
+        CLIP = 1
+
         prog = fluid.framework.Program()
         startup_program = fluid.framework.Program()
         with fluid.program_guard(
@@ -79,13 +78,13 @@ class TestGradientClip(unittest.TestCase):
             avg_cost = fluid.layers.mean(cost)
 
         prog_clip = prog.clone()
-
         avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
 
         p_g = fluid.backward.append_backward(loss=avg_cost)
         p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
 
-        with fluid.program_guard(main_program=prog_clip):
+        with fluid.program_guard(
+                main_program=prog_clip, startup_program=startup_program):
             fluid.clip.set_gradient_clip(
                 fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP))
             p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
@@ -96,7 +95,7 @@ class TestGradientClip(unittest.TestCase):
         train_reader = paddle.batch(
             paddle.reader.shuffle(
                 paddle.dataset.mnist.train(), buf_size=8192),
-            batch_size=BATCH_SIZE)
+            batch_size=128)
 
         exe = fluid.Executor(place)
         feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
@@ -112,12 +111,12 @@ class TestGradientClip(unittest.TestCase):
                                feed=feeder.feed(data),
                                fetch_list=grad_clip_list)
             global_norm = 0
-            for v in out[1:]:
+            for v in out:
                 global_norm += np.sum(np.power(v, 2))
             global_norm = np.sqrt(global_norm)
 
             global_norm_clip = 0
-            for v in out_clip[1:]:
+            for v in out_clip:
                 global_norm_clip += np.sum(np.power(v, 2))
             global_norm_clip = np.sqrt(global_norm_clip)