diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index 04e4906868e856b84bbc13ee60ca6a0b231cfad3..0e7a9dbea2561b92df88720b20dd98c1a4d277f6 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -158,10 +158,6 @@ class GradientClipBase(object):
 
 class GradientClipByValue(GradientClipBase):
     """
-    :alias_main: paddle.nn.GradientClipByValue
-	:alias: paddle.nn.GradientClipByValue,paddle.nn.clip.GradientClipByValue
-	:old_api: paddle.fluid.clip.GradientClipByValue
-
     Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].
     
     - Any values less than min are set to ``min``.
@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase):
     is not None, then only part of gradients can be selected for gradient clipping.
     
     Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` 
-    (for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
+    (for example: :ref:`api_paddle_optimizer_SGD`).
     
     Args:
         max (float): The maximum value to clip by.
@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase):
     Examples:
         .. code-block:: python
         
-            # use for Static mode
             import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-                        
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(
-                    main_program=main_prog, startup_program=startup_prog):
-                image = fluid.data(
-                    name='x', shape=[-1, 2], dtype='float32')
-                predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
-                loss = fluid.layers.mean(predict)
-                
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByValue(min=-1, max=1)
-                
-                # Clip a part of parameters in network: (e.g. fc_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
-                # def fileter_func(Parameter):
-                # # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
-                #   return Parameter.name=="fc_0.w_0"
-                # clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
-
-                sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
-                sgd_optimizer.minimize(loss)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
-            exe.run(startup_prog)
-            out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
-        
 
-            # use for Dygraph mode
-            import paddle
-            import paddle.fluid as fluid
-            
-            with fluid.dygraph.guard():
-                linear = fluid.dygraph.Linear(10, 10)  # Trainable parameters:: linear_0.w.0, linear_0.b.0
-                inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
-                out = linear(fluid.dygraph.to_variable(inputs))
-                loss = fluid.layers.reduce_mean(out)
-                loss.backward()
-
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByValue(min=-1, max=1)
-
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(ParamBase):
-                # # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
-                #   return ParamBase.name == "linear_0.w_0"
-                # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
-                #   return ParamBase.name == linear.weight.name
-                # clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
-
-                sgd_optimizer = fluid.optimizer.SGD(
-                    learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
-                sgd_optimizer.minimize(loss)
+            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
+            linear = paddle.nn.Linear(10, 10)
+            out = linear(x)
+            loss = paddle.mean(out)
+            loss.backward()
+
+            # clip all parameters in network:
+            clip = paddle.nn.GradientClipByValue(min=-1, max=1)
+
+            # clip a part of parameters in network: (e.g. linear_0.w_0)
+            # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
+            # def fileter_func(ParamBase):
+            # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
+            #   return ParamBase.name == "linear_0.w_0"
+            # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
+            #   return ParamBase.name == linear.weight.name
+            # clip = paddle.nn.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
+
+            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
+            sdg.step()
     """
 
     def __init__(self, max, min=None, need_clip=None):
@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase):
 
 class GradientClipByNorm(GradientClipBase):
     """
-    :alias_main: paddle.nn.GradientClipByNorm
-	:alias: paddle.nn.GradientClipByNorm,paddle.nn.clip.GradientClipByNorm
-	:old_api: paddle.fluid.clip.GradientClipByNorm
-
     Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
     
     - If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase):
     is not None, then only part of gradients can be selected for gradient clipping.
     
     Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` 
-    (for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
+    (for example: :ref:`api_paddle_optimizer_SGD`).
     
     The clipping formula is:
 
@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase):
     Examples:
         .. code-block:: python
         
-            # use for Static mode
             import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-                        
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(
-                    main_program=main_prog, startup_program=startup_prog):
-                image = fluid.data(
-                    name='x', shape=[-1, 2], dtype='float32')
-                predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
-                loss = fluid.layers.mean(predict)
-                
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
-                
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
-                # def fileter_func(Parameter):
-                # # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
-                #   return Parameter.name=="fc_0.w_0"
-                # clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
-
-                sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
-                sgd_optimizer.minimize(loss)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
-            exe.run(startup_prog)
-            out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
-            
-
-
-            # use for Dygraph mode
-            import paddle
-            import paddle.fluid as fluid
-            
-            with fluid.dygraph.guard():
-                linear = fluid.dygraph.Linear(10, 10)  # Trainable: linear_0.w.0, linear_0.b.0
-                inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
-                out = linear(fluid.dygraph.to_variable(inputs))
-                loss = fluid.layers.reduce_mean(out)
-                loss.backward()
-
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
-
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(ParamBase):
-                # # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
-                #   return ParamBase.name == "linear_0.w_0"
-                # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
-                #   return ParamBase.name == linear.weight.name
-                # clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
-
-                sgd_optimizer = fluid.optimizer.SGD(
-                    learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
-                sgd_optimizer.minimize(loss)
 
+            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
+            linear = paddle.nn.Linear(10, 10)
+            out = linear(x)
+            loss = paddle.mean(out)
+            loss.backward()
+
+            # clip all parameters in network:
+            clip = paddle.nn.GradientClipByNorm(clip_norm=1.0)
+
+            # clip a part of parameters in network: (e.g. linear_0.w_0)
+            # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
+            # def fileter_func(ParamBase):
+            # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
+            #   return ParamBase.name == "linear_0.w_0"
+            # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
+            #   return ParamBase.name == linear.weight.name
+            # clip = paddle.nn.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
+
+            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
+            sdg.step()
     """
 
     def __init__(self, clip_norm, need_clip=None):
@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase):
 
 class GradientClipByGlobalNorm(GradientClipBase):
     """
-    :alias_main: paddle.nn.GradientClipByGlobalNorm
-	:alias: paddle.nn.GradientClipByGlobalNorm,paddle.nn.clip.GradientClipByGlobalNorm
-	:old_api: paddle.fluid.clip.GradientClipByGlobalNorm
-
     Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in 
     :math:`t\_list` , and limit it to ``clip_norm`` .
     
@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase):
     is not None, then only part of gradients can be selected for gradient clipping.
     
     Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` 
-    (for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
+    (for example: :ref:`api_paddle_optimizer_SGD`).
 
     The clipping formula is:
 
@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase):
     Examples:
         .. code-block:: python
         
-            # use for Static mode
             import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-                        
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(
-                    main_program=main_prog, startup_program=startup_prog):
-                image = fluid.data(
-                    name='x', shape=[-1, 2], dtype='float32')
-                predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
-                loss = fluid.layers.mean(predict)
-                
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
-                
-                # Clip a part of parameters in network: (e.g. fc_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(Parameter):
-                # # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
-                #   return Parameter.name=="fc_0.w_0"
-                # clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
-
-                sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
-                sgd_optimizer.minimize(loss)
-
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
-            exe.run(startup_prog)
-            out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
-
-
-            # use for Dygraph mode
-            import paddle
-            import paddle.fluid as fluid
-
-            with fluid.dygraph.guard():
-                linear = fluid.dygraph.Linear(10, 10)  # Trainable: linear_0.w.0, linear_0.b.0
-                inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
-                out = linear(fluid.dygraph.to_variable(inputs))
-                loss = fluid.layers.reduce_mean(out)
-                loss.backward()
-
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
-
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(ParamBase):
-                # # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
-                #   return ParamBase.name == "linear_0.w_0"
-                # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
-                #   return ParamBase.name == linear.weight.name
-                # clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
-
-                sgd_optimizer = fluid.optimizer.SGD(
-                    learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
-                sgd_optimizer.minimize(loss)
 
+            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
+            linear = paddle.nn.Linear(10, 10)
+            out = linear(x)
+            loss = paddle.mean(out)
+            loss.backward()
+
+            # clip all parameters in network:
+            clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0)
+
+            # clip a part of parameters in network: (e.g. linear_0.w_0)
+            # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
+            # def fileter_func(ParamBase):
+            # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
+            #   return ParamBase.name == "linear_0.w_0"
+            # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
+            #   return ParamBase.name == linear.weight.name
+            # clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
+
+            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
+            sdg.step()
     """
 
     def __init__(self, clip_norm, group_name="default_group", need_clip=None):
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index b6a1e3b151bde9ead6be4edc77c01e2fb42a6ee3..411ac6e51b1c8dd3277dde584321c6ed45adee17 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -259,24 +259,24 @@ def Print(input,
     Examples:
         .. code-block:: python
            
-           import paddle.fluid as fluid
-           
-           input = fluid.layers.fill_constant(shape=[10,2], value=3, dtype='int64')
-           input = fluid.layers.Print(input, message="The content of input layer:")
-           
-           main_program = fluid.default_main_program()
-           exe = fluid.Executor(fluid.CPUPlace())
-           exe.run(main_program)
+           import paddle
 
-    Output at runtime:
-        .. code-block:: bash 
-           
-           The content of input layer:     The place is:CPUPlace
-           Tensor[fill_constant_0.tmp_0]
-               shape: [10,2,]
-               dtype: x
-               data: 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 
-               
+           paddle.enable_static()
+        
+           x = paddle.full(shape=[2, 3], fill_value=3, dtype='int64')
+           out = paddle.static.Print(x, message="The content of input layer:")
+
+           main_program = paddle.static.default_main_program()
+           exe = paddle.static.Executor(place=paddle.CPUPlace())
+           res = exe.run(main_program, fetch_list=[out])
+           # Variable: fill_constant_1.tmp_0
+           #   - message: The content of input layer:
+           #   - lod: {}
+           #   - place: CPUPlace
+           #   - shape: [2, 3]
+           #   - layout: NCHW
+           #   - dtype: long
+           #   - data: [3 3 3 3 3 3]
     '''
     check_variable_and_dtype(input, 'input',
                              ['float32', 'float64', 'int32', 'int64', 'bool'],