diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index e90eb305f8f62a1e881404e83790fda11cc3918b..74c3920674b64729d6b22f22fe64369b3d37be7c 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -1,13 +1,13 @@
-paddle.fluid.Program ('paddle.fluid.framework.Program', ('document', '7364a01d7b9132a435e46162c7fbd6c6'))
+paddle.fluid.Program ('paddle.fluid.framework.Program', ('document', '4f9e1829c89e0711355820e935d2b447'))
 paddle.fluid.Program.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', '86cd9499e226be661a3d686260ee1150'))
-paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', '11777d4121a64566a746e55497a4b78c'))
-paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd601c7719e425e3d9cf862ea4ad194ca'))
-paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd64ea1dc96e9f674499ea3006d470aa4'))
-paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '32c14b0f12baae4b352200fa09b5e789'))
-paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5'))
-paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '89acca639baf00f3ad08b9d827e81706'))
-paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'ba609cb02e4e55e8d626723567ef1778'))
+paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', '28d066e432ceda86810b1e7deb8a4afa'))
+paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', '1e910e8c4186e8ff1afb62602f369033'))
+paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '365e49ce9f346ac6d54265e29db447b5'))
+paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'dd3f2b49147861d6ae48989a77482f05'))
+paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '757cf8d083dff9507676b17376ac5af1'))
+paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', '70e063a0a09d5a8ed322db0d5de9edb4'))
+paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '6dfb00cd50eb515dcf2548a68ea94bfb'))
+paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'accb52b28228f8e93a26fabdc960f56c'))
 paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '853718df675e59aea7104f3d61bbf11d'))
 paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', '78fb5c7f70ef76bcf4a1862c3f6b8191'))
 paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '917d313881ff990de5fb18d98a9c7b42'))
@@ -16,6 +16,15 @@ paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=N
 paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c2562241744aabe3fff1b59af22dd281'))
 paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '301bae0d8e02cc9eec5be02f052f11c6'))
 paddle.fluid.is_compiled_with_cuda (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '60c7f107a5050aeb58bb74eb175672b5'))
+paddle.fluid.Variable ('paddle.fluid.framework.Variable', ('document', '65ff735c2b96673d7131f5ff6b0db40c'))
+paddle.fluid.Variable.__init__ (ArgSpec(args=['self', 'block', 'type', 'name', 'shape', 'dtype', 'lod_level', 'capacity', 'persistable', 'error_clip', 'stop_gradient', 'is_data', 'need_check_feed'], varargs=None, keywords='kwargs', defaults=(VarType.LOD_TENSOR, None, None, None, None, None, None, None, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.Variable.astype (ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '78541af4039262ed7ce3c447f8cc9cc1'))
+paddle.fluid.Variable.backward (ArgSpec(args=['self', 'backward_strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', 'cb928fa194da09694f4267f0a25268f1'))
+paddle.fluid.Variable.clear_gradient (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '509a96d23c876fc5bfb10e1147e21d5f'))
+paddle.fluid.Variable.detach (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '0730b2d310b014d9b0a903b2034757d7'))
+paddle.fluid.Variable.gradient (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '86b246bfaf20f3058e91927abbcf9fb9'))
+paddle.fluid.Variable.numpy (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7536e8feb56d827875943e7f01d406fc'))
+paddle.fluid.Variable.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '31f359a2c074f26dc0ffff296fc3983f'))
 paddle.fluid.Executor ('paddle.fluid.executor.Executor', ('document', '34e8c1769313fbeff7817212dda6259e'))
 paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a584496aa1343f36eebf3c46b323a74'))
@@ -573,7 +582,7 @@ paddle.fluid.dygraph.Layer.parameters (ArgSpec(args=['self', 'include_sublayers'
 paddle.fluid.dygraph.Layer.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.dygraph.Layer.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
 paddle.fluid.dygraph.Layer.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.dygraph.__impl__ (ArgSpec(args=['func'], varargs=None, keywords=None, defaults=()), ('document', 'fa71ad4e6c2b5bf2b5258bd1959f9b2a'))
+paddle.fluid.dygraph.__impl__ (ArgSpec(args=['func'], varargs=None, keywords=None, defaults=()), ('document', '75d1d3afccc8b39cdebf05cb1f5969f9'))
 paddle.fluid.dygraph.guard (ArgSpec(args=['place'], varargs=None, keywords=None, defaults=(None,)), ('document', '7071320ffe2eec9aacdae574951278c6'))
 paddle.fluid.dygraph.to_variable (ArgSpec(args=['value', 'block', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0e69fa3666f15dd01b6e3e270b9371cd'))
 paddle.fluid.dygraph.Conv2D ('paddle.fluid.dygraph.nn.Conv2D', ('document', 'baafe7ae0d3a61ae79cf4c7443e2c37c'))
diff --git a/python/paddle/compat.py b/python/paddle/compat.py
index 50726b6fa1bbbde68a590c86db9344b8f02f79f2..f306ca7a36021dbfc1fb8e026b107285b2abb2df 100644
--- a/python/paddle/compat.py
+++ b/python/paddle/compat.py
@@ -72,6 +72,18 @@ def to_text(obj, encoding='utf-8', inplace=False):
             return obj
         else:
             return set([_to_text(item, encoding) for item in obj])
+    elif isinstance(obj, dict):
+        if inplace:
+            new_obj = {}
+            for key, value in six.iteritems(obj):
+                new_obj[_to_text(key, encoding)] = _to_text(value, encoding)
+            obj.update(new_obj)
+            return obj
+        else:
+            new_obj = {}
+            for key, value in six.iteritems(obj):
+                new_obj[_to_text(key, encoding)] = _to_text(value, encoding)
+            return new_obj
     else:
         return _to_text(obj, encoding)
 
@@ -99,6 +111,8 @@ def _to_text(obj, encoding):
         return obj.decode(encoding)
     elif isinstance(obj, six.text_type):
         return obj
+    elif isinstance(obj, (bool, float)):
+        return obj
     else:
         return six.u(obj)
 
diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index 95d547f2f4a2be80affeb3445f2b9cae511cecee..aeef8505f8e47e21bd8cf9c9a1b9eae7b6db93db 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -21,7 +21,6 @@ import functools
 from . import layers
 from . import framework
 from . import core
-from .dygraph.base import _not_support
 
 __all__ = [
     'set_gradient_clip',
@@ -337,7 +336,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
         return param, new_grad
 
 
-@_not_support
+@framework.dygraph_not_support
 def set_gradient_clip(clip, param_list=None, program=None):
     """
     To specify parameters that require gradient clip.
diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py
index e218544a130f4de54459ffe89cadddecad19a37a..762b65b551e96f596219000f870c5168a2bd7318 100644
--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -45,21 +45,12 @@ def _switch_tracer_mode_guard_(is_train=True):
         yield
 
 
-def _dygraph_not_support_(func):
-    def __impl__(*args, **kwargs):
-        assert not framework.in_dygraph_mode(
-        ), "We don't support %s in Dygraph mode" % func.__name__
-        return func(*args, **kwargs)
-
-    return __impl__
-
-
 def _no_grad_(func):
     """
     This Decorator will avoid the func being decorated creating backward network in dygraph mode
 
-    Args:
-        func: the func don't need grad
+    Parameter:
+        - **func** (python func): the func don't need grad
 
     Examples:
 
@@ -92,7 +83,6 @@ def _no_grad_(func):
 no_grad = wrap_decorator(_no_grad_)
 # for fluidDoc
 no_grad.__doc__ = _no_grad_.__doc__
-_not_support = wrap_decorator(_dygraph_not_support_)
 
 
 @signature_safe_contextmanager
@@ -157,6 +147,7 @@ def _print_debug_msg(limit=5, is_test=False):
         return unique_name_size, tracer_var_size, alive_cpp_var_size
 
 
+@framework.dygraph_only
 def to_variable(value, block=None, name=None):
     """
     This function will create a variable from ndarray
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index a13e1f2a310e1f2f4e4340602f8f7cc30c753416..3b171ed5c68cbd73ed0d1fe6a6549f892c9bd995 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -18,7 +18,7 @@ import collections
 from collections import defaultdict
 from collections import Iterable
 import contextlib
-from .wrapped_decorator import signature_safe_contextmanager
+from .wrapped_decorator import signature_safe_contextmanager, wrap_decorator
 import os
 import re
 import traceback
@@ -28,6 +28,7 @@ import numpy as np
 import subprocess
 import multiprocessing
 import sys
+import logging
 from .. import compat as cpt
 from .proto import framework_pb2
 
@@ -45,6 +46,7 @@ __all__ = [
     'cuda_pinned_places',
     'in_dygraph_mode',
     'is_compiled_with_cuda',
+    'Variable',
 ]
 
 EMPTY_VAR_NAME = core.kEmptyVarName()
@@ -75,6 +77,28 @@ def in_dygraph_mode():
     return _dygraph_tracer_ is not None
 
 
+def _dygraph_not_support_(func):
+    def __impl__(*args, **kwargs):
+        assert not in_dygraph_mode(
+        ), "We don't support %s in Dygraph mode" % func.__name__
+        return func(*args, **kwargs)
+
+    return __impl__
+
+
+def _dygraph_only_(func):
+    def __impl__(*args, **kwargs):
+        assert in_dygraph_mode(
+        ), "We Only support %s in Dygraph mode, please use fluid.dygraph.guard() as context to run it in Dygraph Mode" % func.__name__
+        return func(*args, **kwargs)
+
+    return __impl__
+
+
+dygraph_not_support = wrap_decorator(_dygraph_not_support_)
+dygraph_only = wrap_decorator(_dygraph_only_)
+
+
 def _dygraph_tracer():
     return _dygraph_tracer_
 
@@ -382,6 +406,11 @@ def _debug_string_(proto, throw_on_error=True):
 
 class Variable(object):
     """
+    **Notes:**
+        **The constructor of Variable should not be invoked directly.**
+        **In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.**
+        **In Dygraph Mode: Please use** `fluid.dygraph.to_variable()` **to create a dygraph variable with real data**
+
     In Fluid, every input and output of an operator is a variable. In most
     cases, variables are used for holding different kinds of data or training
     labels. A variable belongs to a block. All variable has its own name and
@@ -393,37 +422,9 @@ class Variable(object):
     Most of a Variable's member variables can be setted to be None. It mean
     it is not available or will be specified later.
 
-    Args:
-        block(Block): The block that the variable belongs to.
-        type(core.VarDesc.VarType): Variable type. Please reference the
-            framework.proto for details.
-        name(str|None): The name of the variable. If setted None, it will be
-            generated automatically. Default: None
-        shape(tuple|list|None): The shape of the variable. -1 means the batch size.
-            Some kinds of variable do not contain shape, just set it to None.
-            Default: None
-        dtype(np.dtype|core.VarDesc.VarType|str|None): The data type of variable.
-            Default: None
-        lod_level (int|None): The level of lod tensor. 0 means it is not a time
-            series data.
-            Default: None
-        capacity (int|None): The capacity of Channel variable. Ignored for other
-            types. Default: None
-        persistable (bool|None): True if the variable is persistable. A persistable
-            variable will not be deleted after an iteration ending. Defaults: None.
-        error_clip (BaseErrorClipAttr|None): The error clip attributes of the
-            corresponding gradient variable. Default: None
-        stop_gradient (bool): True if the variable will stop to calculate its
-            gradients when backward. Default: False.
-        is_data (bool): True if the variable is an input data. Default: False
-        need_check_feed (bool): True if the variable is an input data and have
-            to check the feed data shape and dtype. Default: False
-
-    Notes:
-        The constructor of Variable should not be invoked directly. Please
-        use `Block.create_var` to create a variable.
-
     Examples:
+        In Static Graph Mode:
+
         .. code-block:: python
 
             import paddle.fluid as fluid
@@ -432,6 +433,16 @@ class Variable(object):
             new_variable = cur_block.create_var(name="X",
                                                 shape=[-1, 23, 48],
                                                 dtype='float32')
+        In Dygraph Mode:
+
+        .. code-block:: python
+
+            import paddle.fluid as fluid
+            import numpy as np
+
+            with fluid.dygraph.guard():
+                new_variable = fluid.dygraph.to_variable(np.arange(10))
+
     """
 
     def __init__(self,
@@ -551,13 +562,19 @@ class Variable(object):
             self._stop_gradient = stop_gradient
             self.is_data = is_data
 
+    @dygraph_only
     def detach(self):
         """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+
         Returns a new Variable, detached from the current graph.
-        
+
         Returns:
             Variable: The detached Variable.
 
+        Returns type:
+            Variable(Tensor|LoDTensor) dtype is same as current Variable
+
         Examples:
             .. code-block:: python
 
@@ -585,11 +602,74 @@ class Variable(object):
         else:
             raise AttributeError("static graph model DO NOT supprt detach")
 
+    @dygraph_only
     def numpy(self):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+
+        Returns a numpy array shows the value of current :ref:`api_guide_Variable`
+
+        Returns:
+            ndarray: The numpy value of current Variable.
+
+        Returns type:
+            ndarray dtype is same as current Variable
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                from paddle.fluid.dygraph.base import to_variable
+                from paddle.fluid.dygraph import FC
+                import numpy as np
+
+                data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
+                with fluid.dygraph.guard():
+                    fc = FC("fc", 64, num_flatten_dims=2)
+                    data = to_variable(data)
+                    x = fc(data)
+                    print(x.numpy())
+
+        """
+
+        if not self._ivar.value().get_tensor()._is_initialized():
+            raise ValueError("%s is Empty, Please check if it has no data in" %
+                             self.name)
         new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
         return np.array(new_ivar.value().get_tensor())
 
+    @dygraph_only
     def backward(self, backward_strategy=None):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+
+        Run backward of current Graph which starts from current Variable
+
+        Parameter:
+            - **backward_strategy** : ( :ref:`api_fluid_dygraph_BackwardStrategy` ) - The Backward Strategy to run backward
+
+        Returns:  None
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                import numpy as np
+
+                x = np.ones([2, 2], np.float32)
+                with fluid.dygraph.guard():
+                    inputs2 = []
+                    for _ in range(10):
+                        tmp = fluid.dygraph.base.to_variable(x)
+                        tmp.stop_gradient=False
+                        inputs2.append(tmp)
+                    ret2 = fluid.layers.sums(inputs2)
+                    loss2 = fluid.layers.reduce_sum(ret2)
+                    backward_strategy = fluid.dygraph.BackwardStrategy()
+                    backward_strategy.sort_sum_gradient = True
+                    loss2.backward(backward_strategy)
+
+        """
         if in_dygraph_mode():
             from .dygraph import BackwardStrategy
             if backward_strategy is None:
@@ -601,11 +681,81 @@ class Variable(object):
             raise ValueError(
                 "Variable.backward() is only avaliable in DyGraph mode")
 
+    @dygraph_only
     def gradient(self):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+
+        Get the Gradient of Current Variable
+
+        Returns:  Numpy value of the gradient of current Variable
+
+        Returns type: ndarray
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                import numpy as np
+
+                x = np.ones([2, 2], np.float32)
+                with fluid.dygraph.guard():
+                    inputs2 = []
+                    for _ in range(10):
+                        tmp = fluid.dygraph.base.to_variable(x)
+                        tmp.stop_gradient=False
+                        inputs2.append(tmp)
+                    ret2 = fluid.layers.sums(inputs2)
+                    loss2 = fluid.layers.reduce_sum(ret2)
+                    backward_strategy = fluid.dygraph.BackwardStrategy()
+                    backward_strategy.sort_sum_gradient = True
+                    loss2.backward(backward_strategy)
+                    print(loss2.gradient())
+
+        """
+        if self._ivar._grad_ivar() is None:
+            raise ValueError("%s has no grad, Please set Variable.stop_gradient=False, or " \
+                             "check if this is the first and only variable need grad, if so, please set its pre-Variable's " \
+                             "stop_gradient=False, to make sure it has gradient " % self.name)
+        if not self._ivar._grad_ivar().value().get_tensor()._is_initialized():
+            raise ValueError(
+                "%s's Grad is Empty, Please check if it has no data in" %
+                self.name)
         new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
         return np.array(new_ivar.value().get_tensor())
 
+    @dygraph_only
     def clear_gradient(self):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+
+        Clear  (set to zero) the Gradient of Current Variable
+
+        Returns:  None
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                import numpy as np
+
+                x = np.ones([2, 2], np.float32)
+                with fluid.dygraph.guard():
+                    inputs2 = []
+                    for _ in range(10):
+                        tmp = fluid.dygraph.base.to_variable(x)
+                        tmp.stop_gradient=False
+                        inputs2.append(tmp)
+                    ret2 = fluid.layers.sums(inputs2)
+                    loss2 = fluid.layers.reduce_sum(ret2)
+                    backward_strategy = fluid.dygraph.BackwardStrategy()
+                    backward_strategy.sort_sum_gradient = True
+                    loss2.backward(backward_strategy)
+                    print(loss2.gradient())
+                    loss2.clear_gradient()
+                    print("After clear {}".format(loss2.gradient()))
+
+        """
         self._ivar._clear_gradient()
 
     def __str__(self):
@@ -615,26 +765,32 @@ class Variable(object):
         """
         Get debug string.
 
-        Args:
-            throw_on_error(bool): True if raise an exception when self is
+        Parameters:
+            - **throw_on_error** (bool): True if raise an exception when self is
                 not initialized.
-            with_details(bool): more details about variables and parameters
+            - **with_details** (bool): more details about variables and parameters
                 (e.g. trainable, optimize_attr, ...) will be printed when
                 with_details is True. Default False;
 
         Returns:
             str: The debug string.
 
+        Returns Type:
+            str
+
         Examples:
             .. code-block:: python
 
                 import paddle.fluid as fluid
+
                 cur_program = fluid.Program()
                 cur_block = cur_program.current_block()
                 new_variable = cur_block.create_var(name="X",
                                                     shape=[-1, 23, 48],
                                                     dtype='float32')
-                new_variable.to_string(True)
+                print(new_variable.to_string(True))
+                print("\n=============with detail===============\n")
+                print(new_variable.to_string(True, True))
         """
         if in_dygraph_mode():
             # TODO(panyx0718): add more dygraph debug info.
@@ -654,8 +810,9 @@ class Variable(object):
         if with_details:
             additional_attr = ("error_clip", "stop_gradient")
             for attr_name in additional_attr:
-                res_str += "%s: %s\n" % (
-                    attr_name, six.binary_type(getattr(self, attr_name)))
+                res_str += "%s: %s\n" % (attr_name,
+                                         cpt.to_text(getattr(self, attr_name)))
+
         return res_str
 
     __repr__ = __str__
@@ -684,7 +841,9 @@ class Variable(object):
     @persistable.setter
     def persistable(self, p):
         if in_dygraph_mode():
-            return self._ivar.persistable
+            logging.warn(
+                "There will be no use to set persistable in Dygraph Mode, since "
+                "you can just do it by hold it as normal Python variable")
         else:
             self.desc.set_persistable(p)
 
@@ -718,6 +877,7 @@ class Variable(object):
             return self.desc.dtype()
 
     @property
+    @dygraph_not_support
     def lod_level(self):
         # TODO(minqiyang): Support lod_level in dygraph mode
         if in_dygraph_mode():
@@ -2945,11 +3105,10 @@ class IrGraph(object):
 
 class Program(object):
     """
-    Python Program. Beneath it is a ProgramDesc, which is used for
-    create c++ Program. A program is a self-contained programing
-    language like container. It has at least one Block, when the
-    control flow op like conditional_block, while_op is included,
+    Create Python Program.  It has at least one :ref:`api_guide_Block_en`, when the
+    control flow op like conditional_block, while :ref:`api_fluid_layers_While` is included,
     it will contain nested block.
+
     Please reference the framework.proto for details.
 
     A set of Program usually contains startup program and main program.
@@ -2967,7 +3126,9 @@ class Program(object):
     default_main_program run in every mini batch and adjust the weights.
 
     Returns:
-        A empty program.
+        An empty Program.
+
+    Return type: Program
 
     Examples:
         .. code-block:: python
@@ -3152,16 +3313,16 @@ class Program(object):
         """
         To debug string.
 
-        Args:
-            throw_on_error(bool): raise Value error when any of required fields
+        Parameters:
+            - **throw_on_error** (bool): raise Value error when any of required fields
                 is not set.
 
-            with_details(bool): True if more details about variables and
+            - **with_details** (bool): True if more details about variables and
                 parameters, e.g., :code:`trainable`, :code:`optimize_attr`, need
                 to print.
 
         Returns:
-            str : The debug string.
+            The debug string describe current Program.
 
         Raises:
             ValueError: If any of required fields is not set and throw_on_error is
@@ -3203,12 +3364,19 @@ class Program(object):
     def _version(self):
         return self.desc._version()
 
+    @dygraph_not_support
     def clone(self, for_test=False):
         """
-        Create a new, duplicated program.
+        **Notes**:
+            **1.** :code:`Program.clone()` **method DOES NOT clone** :code:`py_reader`.
+            **2. Recommend you to use** :code:`clone` **before using** :code:`Opimizer.minimize`.**
+            **3. This API has no effect in Dygraph Mode**
 
+        Create a new Program with forward content of original one when ``for_test=True``.
+        Create a new Program as the same as original one when ``for_test=False``
 
-        Some operators, e.g., :code:`batch_norm`, behave differently between
+
+        Some operators, e.g., :ref:`cn_api_fluid_layers_batch_norm` , behave differently between
         training and testing. They have an attribute, :code:`is_test`, to
         control this behaviour. This method will change the :code:`is_test`
         attribute of them to :code:`True` when :code:`for_test=True`.
@@ -3217,29 +3385,27 @@ class Program(object):
         * Set for_test to True when we want to clone the program for testing.
           We will prune the backward and optimize part of the program when you
           use :code:`clone` after :code:`Opimizer.minimize`, but we still
-          recommend you to use :code:`clone` before using :code:`Opimizer.minimize`.
+          recommend you to use :code:`clone` before using :code:`Opimizer.minimize`. For example:
 
-        Notes: 
-        1. :code:`Program.clone()` method DOES NOT clone :code:`py_reader`.
-        2. We recommend you to use :code:`clone(for_test=True)` before backward
-           and optimization. E.g.
 
         .. code-block:: python
 
             test_program = fluid.default_main_program().clone(for_test=True)
+            # Here we use clone before Momentum
             optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
             optimizer.minimize()
 
-        Args:
-            for_test(bool): True if change the :code:`is_test` attribute of
+        Parameters:
+            - **for_test** (bool) - True if change the :code:`is_test` attribute of
                 operators to :code:`True`.
 
-        Returns:
-            Program: The new, duplicated Program object.
+        Returns:   A new Program with forward content of original one when ``for_test=True``.  A new Program as the same as original one when ``for_test=False``
+
+        Return type: Program
 
         Examples:
 
-        Notes: The Program Descs' order maybe different after :code:`clone` and
+        Notes: The Program's order maybe different after :code:`clone` and
         this will not affect your training or testing progress. In the following
         example we give you an simple method :code:`print_prog(program)` to
         print Program Descs inorder to make sure you have same print result
@@ -3499,16 +3665,41 @@ class Program(object):
     @staticmethod
     def parse_from_string(binary_str):
         """
-        Deserialize a program desc from protobuf binary string.
+        **Notes:**
+            **- All information about parameters will be lost after serialization**
+            **- This API has no effect in Dygraph mode**
 
-        Notes: All information about parameters will be lost after serialization
-        and deserialization.
+        Deserialize a Program from  `protobuf <https://en.wikipedia.org/wiki/Protocol_Buffers>`_  binary string.
+        This method always use to save and load model
 
-        Args:
-            binary_str_type(str): The binary prootbuf string.
+        Parameters:
+            - **binary_str_type** (str) - the binary prootbuf string.
 
-        Returns:
-            Program: A deserialized program desc.
+        Returns: Program: A deserialized Program.
+
+        Return type: Program
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+
+                startup_prog = fluid.Program()
+                main_prog = fluid.Program()
+                with fluid.program_guard(startup_prog, main_prog):
+                    x = fluid.layers.data(
+                        name='X', shape=[1000, 784], dtype='float32', append_batch_size=False)
+
+                    y = fluid.layers.data(
+                        name='Y', shape=[784, 100], dtype='float32', append_batch_size=False)
+
+                    z = fluid.layers.mul(x=x, y=y)
+
+                    binary_str = fluid.default_main_program().desc.serialize_to_string()
+                    prog_restored = fluid.default_main_program().parse_from_string(binary_str)
+
+                    print(fluid.default_main_program())
+                    print(prog_restored)
         """
         p = Program()
         p.desc = core.ProgramDesc(binary_str)
@@ -3536,10 +3727,14 @@ class Program(object):
     @property
     def random_seed(self):
         """
+        **Notes: It must be set before the operators have been added.**
+
         The default random seed for random operators in Program. Zero means get
         the random seed from random device.
 
-        Notes: It must be set before the operators have been added.
+        Returns: random seed in current Program
+
+        Return type: int64
 
         Examples:
             .. code-block:: python
@@ -3548,8 +3743,13 @@ class Program(object):
 
                 prog = fluid.default_main_program()
                 random_seed = prog.random_seed
+                x_var = fluid.layers.data(name="X", shape=[3,3], dtype="float32", append_batch_size=False)
+
+                # Here we need to set random seed before we use fluid.layers.dropout
                 print(random_seed)
                 prog.random_seed = 1
+                z_var = fluid.layers.dropout(x_var, 0.7)
+
                 print(prog.random_seed)
         """
         return self._seed
@@ -3557,7 +3757,13 @@ class Program(object):
     @property
     def num_blocks(self):
         """
-        The number of blocks in this program.
+        **Notes: This API has no effect in Dygraph mode**
+
+        The number of :ref:`api_guide_Block_en`  in this Program.
+
+        Returns: num of :ref:`api_guide_Block_en`  in current Program
+
+        Return type: int(Platform-dependent size)
 
         Examples:
             .. code-block:: python
@@ -3567,6 +3773,8 @@ class Program(object):
                 prog = fluid.default_main_program()
                 num_blocks = prog.num_blocks
                 print(num_blocks)
+
+
         """
         return self.desc.num_blocks()
 
@@ -3581,7 +3789,13 @@ class Program(object):
 
     def global_block(self):
         """
-        Get the first block of this program.
+        **Notes: This API has no effect in Dygraph mode**
+
+        Get the first :ref:`api_guide_Block_en` of this Program.
+
+        Returns: The first  :ref:`api_guide_Block_en`  of this Program.
+
+        Return type: :ref:`api_guide_Block_en`
 
         Examples:
             .. code-block:: python
@@ -3591,17 +3805,22 @@ class Program(object):
                 prog = fluid.default_main_program()
                 gb_block = prog.global_block()
                 print(gb_block)
+
         """
         return self.blocks[0]
 
     def block(self, index):
         """
-        Get the :code:`index` block of this program
-        Args:
-            index(int): The index of block to get
+        **Notes: This API has no effect in Dygraph mode**
 
-        Returns:
-            Block: The :code:`index` block
+        Get the :code:`index`  :ref:`api_guide_Block_en`  of this Program
+
+        Parameter:
+            - **index** (int) - The index of  :ref:`api_guide_Block_en`  to get
+
+        Returns: The :code:`index` block
+
+        Return type:  :ref:`api_guide_Block_en`
 
         Examples:
             .. code-block:: python
@@ -3616,9 +3835,15 @@ class Program(object):
 
     def current_block(self):
         """
+        **Notes: This API has no effect in Dygraph mode**
+
         Get the current block. The :code:`current` block is the block to append
         operators.
 
+        Returns: The :code:`index` block
+
+        Return type: Block
+
         Examples:
             .. code-block:: python
 
@@ -3741,12 +3966,14 @@ class Program(object):
             if var.desc.need_check_feed():
                 self.global_block().var(var.name).desc.set_need_check_feed(True)
 
+    @dygraph_not_support
     def list_vars(self):
         """
-        Get all variables from this Program. A iterable object is returned.
+        Get all :ref:`api_guide_Variable` from this Program. A iterable object is returned.
 
-        Returns:
-            iterable: The generator will yield every variable in this program.
+        Returns: The Generator will yield every variable in this program.
+
+        Return type: iterable :ref:`api_guide_Variable_en`
 
         Examples:
             .. code-block:: python
@@ -3845,8 +4072,8 @@ class Parameter(Variable):
             additional_attr = ("trainable", "optimize_attr", "regularizer",
                                "gradient_clip_attr", "do_model_average")
             for attr_name in additional_attr:
-                res_str += "%s: %s\n" % (
-                    attr_name, six.binary_type(getattr(self, attr_name)))
+                res_str += "%s: %s\n" % (attr_name,
+                                         cpt.to_text(getattr(self, attr_name)))
         else:
             res_str = Variable.to_string(self, throw_on_error, False)
         return res_str
@@ -3871,8 +4098,9 @@ def default_startup_program():
     This method will return the :code:`default` or the :code:`current` startup
     program. Users can use :code:`fluid.program_guard` to switch program.
 
-    Returns:
-        Program: startup program
+    Returns: current default startup program
+
+    Returns type: Program
 
     Examples:
         .. code-block:: python
diff --git a/python/paddle/fluid/tests/unittests/test_compat.py b/python/paddle/fluid/tests/unittests/test_compat.py
index 1c2c46f99a82875b917a330d6ec76062222420de..0c85e85d06fa07e136b54a184ae69e6e8290149f 100644
--- a/python/paddle/fluid/tests/unittests/test_compat.py
+++ b/python/paddle/fluid/tests/unittests/test_compat.py
@@ -135,6 +135,22 @@ class TestCompatible(unittest.TestCase):
             self.assertEqual(l, l2)
             self.assertEqual(set([u"", u"123", u"321"]), l2)
 
+            # check dict types, not inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=False)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertFalse(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
+
+            # check dict types, inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=True)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertTrue(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
+
         elif six.PY3:
             self.assertIsNone(cpt.to_text(None))
 
@@ -236,6 +252,22 @@ class TestCompatible(unittest.TestCase):
             for i in l2:
                 self.assertTrue(isinstance(i, str))
 
+            # check dict types, not inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=False)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertFalse(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
+
+            # check dict types, inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=True)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertTrue(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
+
     def test_to_bytes(self):
         # Only support python2.x and python3.x now
         self.assertTrue(six.PY2 | six.PY3)
diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py
index 684fe3298e2bf3cbe530da80f48843ef56d6e30e..6b163ee56e1a0f858e6a97ba4a6f3bd98fe91c3f 100644
--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -155,8 +155,11 @@ class Test_Detach(unittest.TestCase):
         try:
             y_detach = y.detach()
         except Exception as e:
-            assert type(e) == AttributeError
-            assert str(e) == 'static graph model DO NOT supprt detach'
+            # Here is to check
+            assert type(e) == AssertionError
+            assert str(
+                e
+            ) == 'We Only support detach in Dygraph mode, please use fluid.dygraph.guard() as context to run it in Dygraph Mode'
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index acfc1e75c0fb5b3d5709896d7efb64699e0d62d2..245c6a6ecc008301964f3baa41370dcbd5793e55 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -207,6 +207,59 @@ class TestImperative(unittest.TestCase):
             a = inputs2[0].gradient()
             self.assertTrue(np.allclose(inputs2[0].gradient(), x))
 
+    def test_empty_var(self):
+        with fluid.dygraph.guard():
+            cur_program = fluid.Program()
+            cur_block = cur_program.current_block()
+            new_variable = cur_block.create_var(
+                name="X", shape=[-1, 23, 48], dtype='float32')
+            try:
+                new_variable.numpy()
+            except Exception as e:
+                assert type(e) == ValueError
+
+            try:
+                new_variable.backward()
+            except Exception as e:
+                assert type(e) == ValueError
+
+            try:
+                new_variable.clear_gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+
+    def test_empty_grad(self):
+        with fluid.dygraph.guard():
+            x = np.ones([2, 2], np.float32)
+            new_var = fluid.dygraph.base.to_variable(x)
+            try:
+                new_var.gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+
+            try:
+                new_var.clear_gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+
+        with fluid.dygraph.guard():
+            cur_program = fluid.Program()
+            cur_block = cur_program.current_block()
+            new_variable = cur_block.create_var(
+                name="X", shape=[-1, 23, 48], dtype='float32')
+            try:
+                new_variable.gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+
+    def test_set_persistable(self):
+        with fluid.dygraph.guard():
+            x = np.ones([2, 2], np.float32)
+            new_var = fluid.dygraph.base.to_variable(x)
+            self.assertFalse(new_var.persistable)
+            new_var.persistable = True
+            self.assertFalse(new_var.persistable)
+
     def test_layer(self):
         with fluid.dygraph.guard():
             cl = core.Layer()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
index f55f36c00f5f1c2c62169d5db7ca97e30ed2259d..504c4dbdb2a23083c335e1348042b629d91173d9 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
@@ -15,6 +15,7 @@
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
 import unittest
+
 from test_imperative_base import new_program_scope
 
 
@@ -30,7 +31,7 @@ class TestTracerMode(unittest.TestCase):
         self.assertEqual(self.tracer._train_mode, False)
         return a
 
-    @fluid.dygraph.base._not_support
+    @framework.dygraph_not_support
     def not_support_func(self):
         return True
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_framework.py b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
index 0f83f89f7bd3876f6a9a8aedb9ca43082395f7a9..d68d362f0bef531d07436c10e337db624218170c 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -56,7 +56,7 @@ class TestDygraphFramework(unittest.TestCase):
                 out.backward()
                 raise AssertionError(
                     "backward should not be usable in static graph mode")
-            except ValueError as e:
+            except AssertionError as e:
                 self.assertTrue((e is not None))
 
     def test_dygraph_to_string(self):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
index 8531eda86978302f4014e11577f7055f1ef156b6..3ac301a8f69b8d5323c33814a3fecc8245e7b779 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -135,6 +135,8 @@ class TestDygraphGNN(unittest.TestCase):
 
             adam.minimize(loss)
             model.clear_gradients()
+            loss_value = loss.numpy()
+            model_gc_weight_value = model.gc.weight.numpy()
 
         with fluid.dygraph.guard():
             fluid.default_startup_program().random_seed = seed
@@ -157,12 +159,14 @@ class TestDygraphGNN(unittest.TestCase):
             adam2 = AdamOptimizer(learning_rate=1e-3)
             adam2.minimize(loss2)
             model2.clear_gradients()
-
-        self.assertEqual(static_loss, loss.numpy())
-        self.assertTrue(np.allclose(static_weight, model.gc.weight.numpy()))
-        self.assertEqual(static_loss, loss2.numpy())
-        self.assertTrue(np.allclose(static_weight, model2.gc.weight.numpy()))
-        sys.stderr.write('%s %s\n' % (static_loss, loss.numpy()))
+            loss2_value = loss2.numpy()
+            model2_gc_weight_value = model2.gc.weight.numpy()
+
+        self.assertEqual(static_loss, loss_value)
+        self.assertTrue(np.allclose(static_weight, model_gc_weight_value))
+        self.assertEqual(static_loss, loss2_value)
+        self.assertTrue(np.allclose(static_weight, model2_gc_weight_value))
+        sys.stderr.write('%s %s\n' % (static_loss, loss_value))
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index 5f6c5b1cb6a5a641b23dbbd82b98c78313efb1ca..3804150f9e8d18f1a23dd966605b46aa76b8f26b 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -264,6 +264,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
                     for param in ptb_model.parameters():
                         dy_param_updated[param.name] = param.numpy()
 
+            dy_loss_value = dy_loss.numpy()
+            dy_last_cell_value = last_cell.numpy()
+            dy_last_hidden_value = last_hidden.numpy()
+
         with new_program_scope():
             fluid.default_startup_program().random_seed = seed
             fluid.default_main_program().random_seed = seed
@@ -330,11 +334,11 @@ class TestDygraphPtbRnn(unittest.TestCase):
                         static_param_updated[static_param_name_list[k -
                                                                     3]] = out[k]
 
-        self.assertTrue(np.array_equal(static_loss_value, dy_loss.numpy()))
+        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
         self.assertTrue(
-            np.array_equal(static_last_cell_value, last_cell.numpy()))
+            np.array_equal(static_last_cell_value, dy_last_cell_value))
         self.assertTrue(
-            np.array_equal(static_last_hidden_value, last_hidden.numpy()))
+            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
         for key, value in six.iteritems(static_param_init):
             self.assertTrue(np.array_equal(value, dy_param_init[key]))
         for key, value in six.iteritems(static_param_updated):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
index d3beed7b00773e4be7037afb33b363f9d6e81f4d..ca0b03c60ab2f41a13eb7431688976346c530a34 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
@@ -84,6 +84,10 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
                     for param in ptb_model.parameters():
                         dy_param_updated[param.name] = param.numpy()
 
+            dy_loss_value = dy_loss.numpy()
+            dy_last_cell_value = last_cell.numpy()
+            dy_last_hidden_value = last_hidden.numpy()
+
         with new_program_scope():
             fluid.default_startup_program().random_seed = seed
             fluid.default_main_program().random_seed = seed
@@ -150,11 +154,11 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
                         static_param_updated[static_param_name_list[k -
                                                                     3]] = out[k]
 
-        self.assertTrue(np.array_equal(static_loss_value, dy_loss.numpy()))
+        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
         self.assertTrue(
-            np.array_equal(static_last_cell_value, last_cell.numpy()))
+            np.array_equal(static_last_cell_value, dy_last_cell_value))
         self.assertTrue(
-            np.array_equal(static_last_hidden_value, last_hidden.numpy()))
+            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
         for key, value in six.iteritems(static_param_init):
             self.assertTrue(np.array_equal(value, dy_param_init[key]))
         for key, value in six.iteritems(static_param_updated):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
index 51fb66f7743e7d79fb3d75feb2d32e080f1f48df..fe780df680cca104e10f2ce4f69112c30333a39e 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -993,6 +993,11 @@ class TestDygraphTransformerSortGradient(unittest.TestCase):
                     for param in transformer.parameters():
                         dy_param_updated[param.name] = param.numpy()
 
+            dy_avg_cost_value = dy_avg_cost.numpy()
+            dy_sum_cost_value = dy_sum_cost.numpy()
+            dy_predict_value = dy_predict.numpy()
+            dy_token_num_value = dy_token_num.numpy()
+
         with new_program_scope():
             fluid.default_startup_program().random_seed = seed
             fluid.default_main_program().random_seed = seed
@@ -1067,13 +1072,12 @@ class TestDygraphTransformerSortGradient(unittest.TestCase):
                                                                     4]] = out[k]
 
         self.assertTrue(
-            np.array_equal(static_avg_cost_value, dy_avg_cost.numpy()))
-        self.assertTrue(
-            np.array_equal(static_sum_cost_value, dy_sum_cost.numpy()))
+            np.array_equal(static_avg_cost_value, dy_avg_cost_value))
         self.assertTrue(
-            np.array_equal(static_predict_value, dy_predict.numpy()))
+            np.array_equal(static_sum_cost_value, dy_sum_cost_value))
+        self.assertTrue(np.array_equal(static_predict_value, dy_predict_value))
         self.assertTrue(
-            np.array_equal(static_token_num_value, dy_token_num.numpy()))
+            np.array_equal(static_token_num_value, dy_token_num_value))
 
         for key, value in six.iteritems(static_param_init):
             self.assertTrue(np.array_equal(value, dy_param_init[key]))
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 3466a9b7829f597b8fc4b29ee56d79b7cfbf8f57..838314115a7c804b61746cf947ceb31fcc83acba 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -112,9 +112,10 @@ class TestLayer(LayerTest):
             fc2 = nn.FC('fc2', size=4)
             ret = fc1(t)
             dy_ret = fc2(ret)
+            dy_ret_value = dy_ret.numpy()
 
         self.assertTrue(np.array_equal(static_ret, static_ret2))
-        self.assertTrue(np.array_equal(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.array_equal(static_ret, dy_ret_value))
 
     def test_layer_norm(self):
         inp = np.ones([3, 32, 32], dtype='float32')
@@ -149,6 +150,7 @@ class TestLayer(LayerTest):
                 bias_attr=fluid.initializer.ConstantInitializer(value=1),
                 act='sigmoid')
             dy_ret = lm(base.to_variable(inp))
+            dy_ret_value = dy_ret.numpy()
         with self.dynamic_graph():
             lm = nn.LayerNorm(
                 'layer_norm',
@@ -163,7 +165,7 @@ class TestLayer(LayerTest):
             self.assertFalse(hasattr(lm, "_bias_w"))
 
         self.assertTrue(np.array_equal(static_ret, static_ret2))
-        self.assertTrue(np.array_equal(dy_ret.numpy(), static_ret2))
+        self.assertTrue(np.array_equal(dy_ret_value, static_ret2))
 
     def test_relu(self):
         with self.static_graph():
@@ -176,8 +178,9 @@ class TestLayer(LayerTest):
         with self.dynamic_graph():
             t = np.ones([3, 3], dtype='float32')
             dy_ret = layers.relu(base.to_variable(t))
+            dy_ret_value = dy_ret.numpy()
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
 
     def test_matmul(self):
         with self.static_graph():
@@ -197,8 +200,9 @@ class TestLayer(LayerTest):
             t = np.ones([3, 3], dtype='float32')
             t2 = np.ones([3, 3], dtype='float32')
             dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2))
+            dy_ret_value = dy_ret.numpy()
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
 
     def test_conv2d(self):
         with self.static_graph():
@@ -222,6 +226,7 @@ class TestLayer(LayerTest):
             images = np.ones([2, 3, 5, 5], dtype='float32')
             conv2d = nn.Conv2D('conv2d', num_filters=3, filter_size=[2, 2])
             dy_ret = conv2d(base.to_variable(images))
+            dy_ret_value = dy_ret.numpy()
 
         with self.dynamic_graph():
             images = np.ones([2, 3, 5, 5], dtype='float32')
@@ -230,7 +235,7 @@ class TestLayer(LayerTest):
             dy_ret = conv2d(base.to_variable(images))
             self.assertTrue(conv2d._bias_param is None)
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
         self.assertTrue(np.allclose(static_ret, static_ret2))
 
     def test_gru_unit(self):
@@ -269,10 +274,13 @@ class TestLayer(LayerTest):
             gru = nn.GRUUnit('gru', size=D * 3)
             dy_ret = gru(
                 base.to_variable(input), base.to_variable(hidden_input))
+            dy_ret_value = []
+            for i in range(len(static_ret)):
+                dy_ret_value.append(dy_ret[i].numpy())
 
         for i in range(len(static_ret)):
             self.assertTrue(np.allclose(static_ret[i], static_ret2[i]))
-            self.assertTrue(np.allclose(static_ret[i], dy_ret[i].numpy()))
+            self.assertTrue(np.allclose(static_ret[i], dy_ret_value[i]))
 
     def test_elementwise_math(self):
         n = np.ones([3, 3], dtype='float32')
@@ -313,9 +321,8 @@ class TestLayer(LayerTest):
             ret = layers.elementwise_div(ret, n4)
             ret = layers.elementwise_sub(ret, n5)
             dy_ret = layers.elementwise_mul(ret, n6)
-        self.assertTrue(
-            np.allclose(static_ret, dy_ret.numpy()),
-            '%s vs %s' % (static_ret, dy_ret.numpy()))
+            dy_ret_value = dy_ret.numpy()
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
 
     def test_elementwise_minmax(self):
         n = np.ones([3, 3], dtype='float32')
@@ -324,9 +331,11 @@ class TestLayer(LayerTest):
         with self.dynamic_graph():
             min_ret = layers.elementwise_min(n, n2)
             max_ret = layers.elementwise_max(n, n2)
+            min_ret_value = min_ret.numpy()
+            max_ret_value = max_ret.numpy()
 
-        self.assertTrue(np.allclose(n, min_ret.numpy()))
-        self.assertTrue(np.allclose(n2, max_ret.numpy()))
+        self.assertTrue(np.allclose(n, min_ret_value))
+        self.assertTrue(np.allclose(n2, max_ret_value))
 
     def test_sequence_conv(self):
         inp_np = np.arange(12).reshape([3, 4]).astype('float32')
@@ -404,8 +413,9 @@ class TestLayer(LayerTest):
                 act='sigmoid',
                 bias_attr=fluid.initializer.ConstantInitializer(value=1))
             dy_rlt = conv2d_transpose(base.to_variable(inp_np))
+            dy_rlt_value = dy_rlt.numpy()
         self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt2))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt2))
 
     def test_bilinear_tensor_product(self):
         inp_np_x = np.array([[1, 2, 3]]).astype('float32')
@@ -460,12 +470,12 @@ class TestLayer(LayerTest):
                 bias_attr=fluid.initializer.ConstantInitializer(value=1),
                 act='sigmoid')
             dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
-
+            dy_rlt_value = dy_rlt.numpy()
         with self.dynamic_graph():
             btp2 = nn.BilinearTensorProduct('btp', 6, act='sigmoid')
             dy_rlt2 = btp2(
                 base.to_variable(inp_np_x), base.to_variable(inp_np_y))
-
+            dy_rlt2_value = dy_rlt2.numpy()
         with self.static_graph():
             data_x2 = layers.data(
                 name='x',
@@ -484,9 +494,9 @@ class TestLayer(LayerTest):
                 feed={'x': inp_np_x,
                       'y': inp_np_y}, fetch_list=[out2])[0]
 
-        self.assertTrue(np.array_equal(dy_rlt2.numpy(), static_rlt3))
+        self.assertTrue(np.array_equal(dy_rlt2_value, static_rlt3))
         self.assertTrue(np.array_equal(static_rlt2, static_rlt))
-        self.assertTrue(np.array_equal(dy_rlt.numpy(), static_rlt))
+        self.assertTrue(np.array_equal(dy_rlt_value, static_rlt))
 
     def test_prelu(self):
         inp_np = np.ones([5, 200, 100, 100]).astype('float32')
@@ -525,9 +535,10 @@ class TestLayer(LayerTest):
                 mode=mode,
                 param_attr=ParamAttr(initializer=Constant(1.0)))
             dy_rlt = prelu(base.to_variable(inp_np))
+            dy_rlt_value = dy_rlt.numpy()
 
         self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
 
     def test_embeding(self):
         inp_word = np.array([[[1]]]).astype('int64')
@@ -557,10 +568,11 @@ class TestLayer(LayerTest):
                 size=[dict_size, 32],
                 param_attr='emb.w',
                 is_sparse=False)
-            static_rlt3 = emb2(base.to_variable(inp_word))
+            dy_rlt = emb2(base.to_variable(inp_word))
+            dy_rlt_value = dy_rlt.numpy()
 
         self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(static_rlt3.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
 
     def test_nce(self):
         window_size = 5
@@ -677,10 +689,11 @@ class TestLayer(LayerTest):
                          bias_attr='nce.b',
                          sample_weight=sample_weights)
 
-            nce_loss3 = nce(embs3, words[label_word])
+            dy_rlt = nce(embs3, words[label_word])
+            dy_rlt_value = dy_rlt.numpy()
 
         self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(nce_loss3.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
 
     def test_conv3d(self):
         with self.static_graph():
@@ -706,8 +719,9 @@ class TestLayer(LayerTest):
             images = np.ones([2, 3, 6, 6, 6], dtype='float32')
             conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2)
             dy_ret = conv3d(base.to_variable(images))
+            dy_rlt_value = dy_ret.numpy()
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
         self.assertTrue(np.allclose(static_ret, static_ret2))
 
     def test_row_conv(self):
@@ -800,8 +814,9 @@ class TestLayer(LayerTest):
         with self.dynamic_graph():
             groupNorm = nn.GroupNorm('GroupNorm', groups=2)
             dy_ret = groupNorm(base.to_variable(input))
+            dy_rlt_value = dy_ret.numpy()
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
         self.assertTrue(np.allclose(static_ret, static_ret2))
 
     def test_spectral_norm(self):
@@ -850,8 +865,9 @@ class TestLayer(LayerTest):
         with self.dynamic_graph():
             spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
             dy_ret = spectralNorm(base.to_variable(input))
+            dy_rlt_value = dy_ret.numpy()
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
         self.assertTrue(np.allclose(static_ret, static_ret2))
 
     def test_tree_conv(self):
@@ -922,9 +938,10 @@ class TestLayer(LayerTest):
             treeConv = nn.TreeConv(
                 'SpectralNorm', output_size=6, num_filters=1, max_depth=2)
             dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj))
+            dy_rlt_value = dy_ret.numpy()
 
         self.assertTrue(np.allclose(static_ret, static_ret2))
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
 
     def test_conv3d_transpose(self):
         input_array = np.arange(0, 48).reshape(
@@ -953,8 +970,9 @@ class TestLayer(LayerTest):
                 filter_size=12,
                 use_cudnn=False)
             dy_rlt = conv3d_transpose(base.to_variable(input_array))
+            dy_rlt_value = dy_rlt.numpy()
         self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
 
     def test_eye_op(self):
         np_eye = np.eye(3, 2)
@@ -972,11 +990,14 @@ class TestLayer(LayerTest):
                                          num_columns=2,
                                          batch_shape=[4, 3])
             diag_tensor = layers.eye(20)
-
-        self.assertTrue(np.allclose(eye_tensor.numpy(), np_eye))
-        self.assertTrue(np.allclose(eye_tensor_rlt1.numpy(), stack_rlt1))
-        self.assertTrue(np.allclose(eye_tensor_rlt2.numpy(), stack_rlt2))
-        self.assertTrue(np.allclose(diag_tensor.numpy(), np.eye(20)))
+            eye_tensor_value = eye_tensor.numpy()
+            eye_tensor_rlt1_value = eye_tensor_rlt1.numpy()
+            eye_tensor_rlt2_value = eye_tensor_rlt2.numpy()
+            diag_tensor_value = diag_tensor.numpy()
+        self.assertTrue(np.allclose(eye_tensor_value, np_eye))
+        self.assertTrue(np.allclose(eye_tensor_rlt1_value, stack_rlt1))
+        self.assertTrue(np.allclose(eye_tensor_rlt2_value, stack_rlt2))
+        self.assertTrue(np.allclose(diag_tensor_value, np.eye(20)))
 
         with self.assertRaises(TypeError):
             layers.eye(num_rows=3.1)
@@ -998,8 +1019,9 @@ class TestLayer(LayerTest):
         with self.dynamic_graph():
             t = np.ones([3, 3], dtype='float32')
             dy_ret = layers.hard_swish(base.to_variable(t))
+            dy_ret_rlt = dy_ret.numpy()
 
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_rlt))
 
     def test_compare(self):
         value_a = np.arange(3)
@@ -1017,8 +1039,8 @@ class TestLayer(LayerTest):
             db = base.to_variable(value_b)
             dcond = layers.less_than(x=da, y=db)
 
-        for i in range(len(static_ret)):
-            self.assertTrue(dcond.numpy()[i] == static_ret[i])
+            for i in range(len(static_ret)):
+                self.assertTrue(dcond.numpy()[i] == static_ret[i])
 
         # less equal
         with self.static_graph():
@@ -1160,8 +1182,9 @@ class TestBook(LayerTest):
                 dy_result = method()
                 if isinstance(dy_result, tuple):
                     dy_result = dy_result[0]
+                dy_result_value = dy_result.numpy()
 
-        self.assertTrue(np.array_equal(static_result[0], dy_result.numpy()))
+        self.assertTrue(np.array_equal(static_result[0], dy_result_value))
 
     def _get_np_data(self, shape, dtype, append_batch_size=True):
         np.random.seed(self.seed)
diff --git a/python/paddle/fluid/tests/unittests/test_program_to_string.py b/python/paddle/fluid/tests/unittests/test_program_to_string.py
new file mode 100644
index 0000000000000000000000000000000000000000..22ba43bde2ad4d5473da4a54725789207cd8ea58
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_program_to_string.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import paddle.fluid as fluid
+import unittest
+
+
+class TestProgram(unittest.TestCase):
+    def test_program_to_string(self):
+        prog = fluid.default_main_program()
+        a = fluid.layers.data(
+            name="X", shape=[2, 3], dtype="float32", append_batch_size=False)
+        c = fluid.layers.fc(a, size=3)
+        prog_string = prog.to_string(throw_on_error=True, with_details=False)
+        prog_string_with_details = prog.to_string(
+            throw_on_error=False, with_details=True)
+        assert prog_string is not None
+        assert len(prog_string_with_details) > len(prog_string)
+
+
+if __name__ == '__main__':
+    unittest.main()