Optimze/optimize dygraph api (#19999)

* test=develop, fix docker with paddle nccl problem * test=develop, Add Variable api and refine dygraph related API * test=develop, Add Variable api and refine dygraph related API * test=develop, refine test for new api and error info * test=develop, refine error info and test_layers * test=develop, add API.spec * test=devleop, fix to_string python2 and python3 compat error and refien doc * test=devleop, add API spec * test=devleop, update API spec * test=devleop, update API spec * test=develop, invoke ci * test=develop, fix example code * test=develop, update API spec * test=develop, add compat test and fix inplace campat dict error

Optimze/optimize dygraph api (#19999)
* test=develop, fix docker with paddle nccl problem * test=develop, Add Variable api and refine dygraph related API * test=develop, Add Variable api and refine dygraph related API * test=develop, refine test for new api and error info * test=develop, refine error info and test_layers * test=develop, add API.spec * test=devleop, fix to_string python2 and python3 compat error and refien doc * test=devleop, add API spec * test=devleop, update API spec * test=devleop, update API spec * test=develop, invoke ci * test=develop, fix example code * test=develop, update API spec * test=develop, add compat test and fix inplace campat dict error
39ff0f9c · Jiabin Yang · GitHub · f5221ac1 · 39ff0f9c · 39ff0f9c
16 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
-paddle.fluid.Program ('paddle.fluid.framework.Program', ('document', '7364a01d7b9132a435e46162c7fbd6c6'))
+paddle.fluid.Program ('paddle.fluid.framework.Program', ('document', '4f9e1829c89e0711355820e935d2b447'))
 paddle.fluid.Program.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', '86cd9499e226be661a3d686260ee1150'))
+paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', '28d066e432ceda86810b1e7deb8a4afa'))
-paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', '11777d4121a64566a746e55497a4b78c'))
+paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', '1e910e8c4186e8ff1afb62602f369033'))
-paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd601c7719e425e3d9cf862ea4ad194ca'))
+paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '365e49ce9f346ac6d54265e29db447b5'))
-paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd64ea1dc96e9f674499ea3006d470aa4'))
+paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'dd3f2b49147861d6ae48989a77482f05'))
-paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '32c14b0f12baae4b352200fa09b5e789'))
+paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '757cf8d083dff9507676b17376ac5af1'))
-paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5'))
+paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', '70e063a0a09d5a8ed322db0d5de9edb4'))
-paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '89acca639baf00f3ad08b9d827e81706'))
+paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '6dfb00cd50eb515dcf2548a68ea94bfb'))
-paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'ba609cb02e4e55e8d626723567ef1778'))
+paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'accb52b28228f8e93a26fabdc960f56c'))
 paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '853718df675e59aea7104f3d61bbf11d'))
 paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', '78fb5c7f70ef76bcf4a1862c3f6b8191'))
 paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '917d313881ff990de5fb18d98a9c7b42'))
@@ -16,6 +16,15 @@ paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=N
 paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c2562241744aabe3fff1b59af22dd281'))
 paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '301bae0d8e02cc9eec5be02f052f11c6'))
 paddle.fluid.is_compiled_with_cuda (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '60c7f107a5050aeb58bb74eb175672b5'))
+paddle.fluid.Variable ('paddle.fluid.framework.Variable', ('document', '65ff735c2b96673d7131f5ff6b0db40c'))
+paddle.fluid.Variable.__init__ (ArgSpec(args=['self', 'block', 'type', 'name', 'shape', 'dtype', 'lod_level', 'capacity', 'persistable', 'error_clip', 'stop_gradient', 'is_data', 'need_check_feed'], varargs=None, keywords='kwargs', defaults=(VarType.LOD_TENSOR, None, None, None, None, None, None, None, False, False, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.Variable.astype (ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '78541af4039262ed7ce3c447f8cc9cc1'))
+paddle.fluid.Variable.backward (ArgSpec(args=['self', 'backward_strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', 'cb928fa194da09694f4267f0a25268f1'))
+paddle.fluid.Variable.clear_gradient (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '509a96d23c876fc5bfb10e1147e21d5f'))
+paddle.fluid.Variable.detach (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '0730b2d310b014d9b0a903b2034757d7'))
+paddle.fluid.Variable.gradient (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '86b246bfaf20f3058e91927abbcf9fb9'))
+paddle.fluid.Variable.numpy (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7536e8feb56d827875943e7f01d406fc'))
+paddle.fluid.Variable.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', '31f359a2c074f26dc0ffff296fc3983f'))
 paddle.fluid.Executor ('paddle.fluid.executor.Executor', ('document', '34e8c1769313fbeff7817212dda6259e'))
 paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a584496aa1343f36eebf3c46b323a74'))
@@ -573,7 +582,7 @@ paddle.fluid.dygraph.Layer.parameters (ArgSpec(args=['self', 'include_sublayers'
 paddle.fluid.dygraph.Layer.state_dict (ArgSpec(args=['self', 'destination', 'include_sublayers'], varargs=None, keywords=None, defaults=(None, True)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.dygraph.Layer.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
 paddle.fluid.dygraph.Layer.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.dygraph.__impl__ (ArgSpec(args=['func'], varargs=None, keywords=None, defaults=()), ('document', 'fa71ad4e6c2b5bf2b5258bd1959f9b2a'))
+paddle.fluid.dygraph.__impl__ (ArgSpec(args=['func'], varargs=None, keywords=None, defaults=()), ('document', '75d1d3afccc8b39cdebf05cb1f5969f9'))
 paddle.fluid.dygraph.guard (ArgSpec(args=['place'], varargs=None, keywords=None, defaults=(None,)), ('document', '7071320ffe2eec9aacdae574951278c6'))
 paddle.fluid.dygraph.to_variable (ArgSpec(args=['value', 'block', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0e69fa3666f15dd01b6e3e270b9371cd'))
 paddle.fluid.dygraph.Conv2D ('paddle.fluid.dygraph.nn.Conv2D', ('document', 'baafe7ae0d3a61ae79cf4c7443e2c37c'))

--- a/python/paddle/compat.py
+++ b/python/paddle/compat.py
@@ -72,6 +72,18 @@ def to_text(obj, encoding='utf-8', inplace=False):
            return obj
        else:
            return set([_to_text(item, encoding) for item in obj])
+    elif isinstance(obj, dict):
+        if inplace:
+            new_obj = {}
+            for key, value in six.iteritems(obj):
+                new_obj[_to_text(key, encoding)] = _to_text(value, encoding)
+            obj.update(new_obj)
+            return obj
+        else:
+            new_obj = {}
+            for key, value in six.iteritems(obj):
+                new_obj[_to_text(key, encoding)] = _to_text(value, encoding)
+            return new_obj
    else:
        return _to_text(obj, encoding)
@@ -99,6 +111,8 @@ def _to_text(obj, encoding):
        return obj.decode(encoding)
    elif isinstance(obj, six.text_type):
        return obj
+    elif isinstance(obj, (bool, float)):
+        return obj
    else:
        return six.u(obj)

--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -21,7 +21,6 @@ import functools
 from . import layers
 from . import framework
 from . import core
-from .dygraph.base import _not_support
 __all__ = [
    'set_gradient_clip',
@@ -337,7 +336,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
        return param, new_grad
-@_not_support
+@framework.dygraph_not_support
 def set_gradient_clip(clip, param_list=None, program=None):
    """
    To specify parameters that require gradient clip.

--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -45,21 +45,12 @@ def _switch_tracer_mode_guard_(is_train=True):
        yield
-def _dygraph_not_support_(func):
-    def __impl__(*args, **kwargs):
-        assert not framework.in_dygraph_mode(
-        ), "We don't support %s in Dygraph mode" % func.__name__
-        return func(*args, **kwargs)
-    return __impl__
 def _no_grad_(func):
    """
    This Decorator will avoid the func being decorated creating backward network in dygraph mode
-    Args:
+    Parameter:
-        func: the func don't need grad
+        - **func** (python func): the func don't need grad
    Examples:
@@ -92,7 +83,6 @@ def _no_grad_(func):
 no_grad = wrap_decorator(_no_grad_)
 # for fluidDoc
 no_grad.__doc__ = _no_grad_.__doc__
-_not_support = wrap_decorator(_dygraph_not_support_)
 @signature_safe_contextmanager
@@ -157,6 +147,7 @@ def _print_debug_msg(limit=5, is_test=False):
        return unique_name_size, tracer_var_size, alive_cpp_var_size
+@framework.dygraph_only
 def to_variable(value, block=None, name=None):
    """
    This function will create a variable from ndarray

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -18,7 +18,7 @@ import collections
 from collections import defaultdict
 from collections import Iterable
 import contextlib
-from .wrapped_decorator import signature_safe_contextmanager
+from .wrapped_decorator import signature_safe_contextmanager, wrap_decorator
 import os
 import re
 import traceback
@@ -28,6 +28,7 @@ import numpy as np
 import subprocess
 import multiprocessing
 import sys
+import logging
 from .. import compat as cpt
 from .proto import framework_pb2
@@ -45,6 +46,7 @@ __all__ = [
    'cuda_pinned_places',
    'in_dygraph_mode',
    'is_compiled_with_cuda',
+    'Variable',
 ]
 EMPTY_VAR_NAME = core.kEmptyVarName()
@@ -75,6 +77,28 @@ def in_dygraph_mode():
    return _dygraph_tracer_ is not None
+def _dygraph_not_support_(func):
+    def __impl__(*args, **kwargs):
+        assert not in_dygraph_mode(
+        ), "We don't support %s in Dygraph mode" % func.__name__
+        return func(*args, **kwargs)
+    return __impl__
+def _dygraph_only_(func):
+    def __impl__(*args, **kwargs):
+        assert in_dygraph_mode(
+        ), "We Only support %s in Dygraph mode, please use fluid.dygraph.guard() as context to run it in Dygraph Mode" % func.__name__
+        return func(*args, **kwargs)
+    return __impl__
+dygraph_not_support = wrap_decorator(_dygraph_not_support_)
+dygraph_only = wrap_decorator(_dygraph_only_)
 def _dygraph_tracer():
    return _dygraph_tracer_
@@ -382,6 +406,11 @@ def _debug_string_(proto, throw_on_error=True):
 class Variable(object):
    """
+    **Notes:**
+        **The constructor of Variable should not be invoked directly.**
+        **In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.**
+        **In Dygraph Mode: Please use** `fluid.dygraph.to_variable()` **to create a dygraph variable with real data**
    In Fluid, every input and output of an operator is a variable. In most
    cases, variables are used for holding different kinds of data or training
    labels. A variable belongs to a block. All variable has its own name and
@@ -393,37 +422,9 @@ class Variable(object):
    Most of a Variable's member variables can be setted to be None. It mean
    it is not available or will be specified later.
-    Args:
-        block(Block): The block that the variable belongs to.
-        type(core.VarDesc.VarType): Variable type. Please reference the
-            framework.proto for details.
-        name(str|None): The name of the variable. If setted None, it will be
-            generated automatically. Default: None
-        shape(tuple|list|None): The shape of the variable. -1 means the batch size.
-            Some kinds of variable do not contain shape, just set it to None.
-            Default: None
-        dtype(np.dtype|core.VarDesc.VarType|str|None): The data type of variable.
-            Default: None
-        lod_level (int|None): The level of lod tensor. 0 means it is not a time
-            series data.
-            Default: None
-        capacity (int|None): The capacity of Channel variable. Ignored for other
-            types. Default: None
-        persistable (bool|None): True if the variable is persistable. A persistable
-            variable will not be deleted after an iteration ending. Defaults: None.
-        error_clip (BaseErrorClipAttr|None): The error clip attributes of the
-            corresponding gradient variable. Default: None
-        stop_gradient (bool): True if the variable will stop to calculate its
-            gradients when backward. Default: False.
-        is_data (bool): True if the variable is an input data. Default: False
-        need_check_feed (bool): True if the variable is an input data and have
-            to check the feed data shape and dtype. Default: False
-    Notes:
-        The constructor of Variable should not be invoked directly. Please
-        use `Block.create_var` to create a variable.
    Examples:
+        In Static Graph Mode:
        .. code-block:: python
            import paddle.fluid as fluid
@@ -432,6 +433,16 @@ class Variable(object):
            new_variable = cur_block.create_var(name="X",
                                                shape=[-1, 23, 48],
                                                dtype='float32')
+        In Dygraph Mode:
+        .. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            with fluid.dygraph.guard():
+                new_variable = fluid.dygraph.to_variable(np.arange(10))
    """
    def __init__(self,
@@ -551,13 +562,19 @@ class Variable(object):
            self._stop_gradient = stop_gradient
            self.is_data = is_data
+    @dygraph_only
    def detach(self):
        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
        Returns a new Variable, detached from the current graph.
        Returns:
            Variable: The detached Variable.
+        Returns type:
+            Variable(Tensor|LoDTensor) dtype is same as current Variable
        Examples:
            .. code-block:: python
@@ -585,11 +602,74 @@ class Variable(object):
        else:
            raise AttributeError("static graph model DO NOT supprt detach")
+    @dygraph_only
    def numpy(self):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+        Returns a numpy array shows the value of current :ref:`api_guide_Variable`
+        Returns:
+            ndarray: The numpy value of current Variable.
+        Returns type:
+            ndarray dtype is same as current Variable
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                from paddle.fluid.dygraph.base import to_variable
+                from paddle.fluid.dygraph import FC
+                import numpy as np
+                data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
+                with fluid.dygraph.guard():
+                    fc = FC("fc", 64, num_flatten_dims=2)
+                    data = to_variable(data)
+                    x = fc(data)
+                    print(x.numpy())
+        """
+        if not self._ivar.value().get_tensor()._is_initialized():
+            raise ValueError("%s is Empty, Please check if it has no data in" %
+                             self.name)
        new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
        return np.array(new_ivar.value().get_tensor())
+    @dygraph_only
    def backward(self, backward_strategy=None):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+        Run backward of current Graph which starts from current Variable
+        Parameter:
+            - **backward_strategy** : ( :ref:`api_fluid_dygraph_BackwardStrategy` ) - The Backward Strategy to run backward
+        Returns:  None
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                import numpy as np
+                x = np.ones([2, 2], np.float32)
+                with fluid.dygraph.guard():
+                    inputs2 = []
+                    for _ in range(10):
+                        tmp = fluid.dygraph.base.to_variable(x)
+                        tmp.stop_gradient=False
+                        inputs2.append(tmp)
+                    ret2 = fluid.layers.sums(inputs2)
+                    loss2 = fluid.layers.reduce_sum(ret2)
+                    backward_strategy = fluid.dygraph.BackwardStrategy()
+                    backward_strategy.sort_sum_gradient = True
+                    loss2.backward(backward_strategy)
+        """
        if in_dygraph_mode():
            from .dygraph import BackwardStrategy
            if backward_strategy is None:
@@ -601,11 +681,81 @@ class Variable(object):
            raise ValueError(
                "Variable.backward() is only avaliable in DyGraph mode")
+    @dygraph_only
    def gradient(self):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+        Get the Gradient of Current Variable
+        Returns:  Numpy value of the gradient of current Variable
+        Returns type: ndarray
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                import numpy as np
+                x = np.ones([2, 2], np.float32)
+                with fluid.dygraph.guard():
+                    inputs2 = []
+                    for _ in range(10):
+                        tmp = fluid.dygraph.base.to_variable(x)
+                        tmp.stop_gradient=False
+                        inputs2.append(tmp)
+                    ret2 = fluid.layers.sums(inputs2)
+                    loss2 = fluid.layers.reduce_sum(ret2)
+                    backward_strategy = fluid.dygraph.BackwardStrategy()
+                    backward_strategy.sort_sum_gradient = True
+                    loss2.backward(backward_strategy)
+                    print(loss2.gradient())
+        """
+        if self._ivar._grad_ivar() is None:
+            raise ValueError("%s has no grad, Please set Variable.stop_gradient=False, or " \
+                             "check if this is the first and only variable need grad, if so, please set its pre-Variable's " \
+                             "stop_gradient=False, to make sure it has gradient " % self.name)
+        if not self._ivar._grad_ivar().value().get_tensor()._is_initialized():
+            raise ValueError(
+                "%s's Grad is Empty, Please check if it has no data in" %
+                self.name)
        new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
        return np.array(new_ivar.value().get_tensor())
+    @dygraph_only
    def clear_gradient(self):
+        """
+        **Notes: This API is ONLY avaliable in Dygraph mode**
+        Clear  (set to zero) the Gradient of Current Variable
+        Returns:  None
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                import numpy as np
+                x = np.ones([2, 2], np.float32)
+                with fluid.dygraph.guard():
+                    inputs2 = []
+                    for _ in range(10):
+                        tmp = fluid.dygraph.base.to_variable(x)
+                        tmp.stop_gradient=False
+                        inputs2.append(tmp)
+                    ret2 = fluid.layers.sums(inputs2)
+                    loss2 = fluid.layers.reduce_sum(ret2)
+                    backward_strategy = fluid.dygraph.BackwardStrategy()
+                    backward_strategy.sort_sum_gradient = True
+                    loss2.backward(backward_strategy)
+                    print(loss2.gradient())
+                    loss2.clear_gradient()
+                    print("After clear {}".format(loss2.gradient()))
+        """
        self._ivar._clear_gradient()
    def __str__(self):
@@ -615,26 +765,32 @@ class Variable(object):
        """
        Get debug string.
-        Args:
+        Parameters:
-            throw_on_error(bool): True if raise an exception when self is
+            - **throw_on_error** (bool): True if raise an exception when self is
                not initialized.
-            with_details(bool): more details about variables and parameters
+            - **with_details** (bool): more details about variables and parameters
                (e.g. trainable, optimize_attr, ...) will be printed when
                with_details is True. Default False;
        Returns:
            str: The debug string.
+        Returns Type:
+            str
        Examples:
            .. code-block:: python
                import paddle.fluid as fluid
                cur_program = fluid.Program()
                cur_block = cur_program.current_block()
                new_variable = cur_block.create_var(name="X",
                                                    shape=[-1, 23, 48],
                                                    dtype='float32')
-                new_variable.to_string(True)
+                print(new_variable.to_string(True))
+                print("\n=============with detail===============\n")
+                print(new_variable.to_string(True, True))
        """
        if in_dygraph_mode():
            # TODO(panyx0718): add more dygraph debug info.
@@ -654,8 +810,9 @@ class Variable(object):
        if with_details:
            additional_attr = ("error_clip", "stop_gradient")
            for attr_name in additional_attr:
-                res_str += "%s: %s\n" % (
+                res_str += "%s: %s\n" % (attr_name,
-                    attr_name, six.binary_type(getattr(self, attr_name)))
+                                         cpt.to_text(getattr(self, attr_name)))
        return res_str
    __repr__ = __str__
@@ -684,7 +841,9 @@ class Variable(object):
    @persistable.setter
    def persistable(self, p):
        if in_dygraph_mode():
-            return self._ivar.persistable
+            logging.warn(
+                "There will be no use to set persistable in Dygraph Mode, since "
+                "you can just do it by hold it as normal Python variable")
        else:
            self.desc.set_persistable(p)
@@ -718,6 +877,7 @@ class Variable(object):
            return self.desc.dtype()
    @property
+    @dygraph_not_support
    def lod_level(self):
        # TODO(minqiyang): Support lod_level in dygraph mode
        if in_dygraph_mode():
@@ -2945,11 +3105,10 @@ class IrGraph(object):
 class Program(object):
    """
-    Python Program. Beneath it is a ProgramDesc, which is used for
+    Create Python Program.  It has at least one :ref:`api_guide_Block_en`, when the
-    create c++ Program. A program is a self-contained programing
+    control flow op like conditional_block, while :ref:`api_fluid_layers_While` is included,
-    language like container. It has at least one Block, when the
-    control flow op like conditional_block, while_op is included,
    it will contain nested block.
    Please reference the framework.proto for details.
    A set of Program usually contains startup program and main program.
@@ -2967,7 +3126,9 @@ class Program(object):
    default_main_program run in every mini batch and adjust the weights.
    Returns:
-        A empty program.
+        An empty Program.
+    Return type: Program
    Examples:
        .. code-block:: python
@@ -3152,16 +3313,16 @@ class Program(object):
        """
        To debug string.
-        Args:
+        Parameters:
-            throw_on_error(bool): raise Value error when any of required fields
+            - **throw_on_error** (bool): raise Value error when any of required fields
                is not set.
-            with_details(bool): True if more details about variables and
+            - **with_details** (bool): True if more details about variables and
                parameters, e.g., :code:`trainable`, :code:`optimize_attr`, need
                to print.
        Returns:
-            str : The debug string.
+            The debug string describe current Program.
        Raises:
            ValueError: If any of required fields is not set and throw_on_error is
@@ -3203,12 +3364,19 @@ class Program(object):
    def _version(self):
        return self.desc._version()
+    @dygraph_not_support
    def clone(self, for_test=False):
        """
-        Create a new, duplicated program.
+        **Notes**:
+            **1.** :code:`Program.clone()` **method DOES NOT clone** :code:`py_reader`.
+            **2. Recommend you to use** :code:`clone` **before using** :code:`Opimizer.minimize`.**
+            **3. This API has no effect in Dygraph Mode**
+        Create a new Program with forward content of original one when ``for_test=True``.
+        Create a new Program as the same as original one when ``for_test=False``
-        Some operators, e.g., :code:`batch_norm`, behave differently between
+        Some operators, e.g., :ref:`cn_api_fluid_layers_batch_norm` , behave differently between
        training and testing. They have an attribute, :code:`is_test`, to
        control this behaviour. This method will change the :code:`is_test`
        attribute of them to :code:`True` when :code:`for_test=True`.
@@ -3217,29 +3385,27 @@ class Program(object):
        * Set for_test to True when we want to clone the program for testing.
          We will prune the backward and optimize part of the program when you
          use :code:`clone` after :code:`Opimizer.minimize`, but we still
-          recommend you to use :code:`clone` before using :code:`Opimizer.minimize`.
+          recommend you to use :code:`clone` before using :code:`Opimizer.minimize`. For example:
-        Notes: 
-        1. :code:`Program.clone()` method DOES NOT clone :code:`py_reader`.
-        2. We recommend you to use :code:`clone(for_test=True)` before backward
-           and optimization. E.g.
        .. code-block:: python
            test_program = fluid.default_main_program().clone(for_test=True)
+            # Here we use clone before Momentum
            optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
            optimizer.minimize()
-        Args:
+        Parameters:
-            for_test(bool): True if change the :code:`is_test` attribute of
+            - **for_test** (bool) - True if change the :code:`is_test` attribute of
                operators to :code:`True`.
-        Returns:
+        Returns:   A new Program with forward content of original one when ``for_test=True``.  A new Program as the same as original one when ``for_test=False``
-            Program: The new, duplicated Program object.
+        Return type: Program
        Examples:
-        Notes: The Program Descs' order maybe different after :code:`clone` and
+        Notes: The Program's order maybe different after :code:`clone` and
        this will not affect your training or testing progress. In the following
        example we give you an simple method :code:`print_prog(program)` to
        print Program Descs inorder to make sure you have same print result
@@ -3499,16 +3665,41 @@ class Program(object):
    @staticmethod
    def parse_from_string(binary_str):
        """
-        Deserialize a program desc from protobuf binary string.
+        **Notes:**
+            **- All information about parameters will be lost after serialization**
+            **- This API has no effect in Dygraph mode**
-        Notes: All information about parameters will be lost after serialization
+        Deserialize a Program from  `protobuf <https://en.wikipedia.org/wiki/Protocol_Buffers>`_  binary string.
-        and deserialization.
+        This method always use to save and load model
-        Args:
+        Parameters:
-            binary_str_type(str): The binary prootbuf string.
+            - **binary_str_type** (str) - the binary prootbuf string.
-        Returns:
+        Returns: Program: A deserialized Program.
-            Program: A deserialized program desc.
+        Return type: Program
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                startup_prog = fluid.Program()
+                main_prog = fluid.Program()
+                with fluid.program_guard(startup_prog, main_prog):
+                    x = fluid.layers.data(
+                        name='X', shape=[1000, 784], dtype='float32', append_batch_size=False)
+                    y = fluid.layers.data(
+                        name='Y', shape=[784, 100], dtype='float32', append_batch_size=False)
+                    z = fluid.layers.mul(x=x, y=y)
+                    binary_str = fluid.default_main_program().desc.serialize_to_string()
+                    prog_restored = fluid.default_main_program().parse_from_string(binary_str)
+                    print(fluid.default_main_program())
+                    print(prog_restored)
        """
        p = Program()
        p.desc = core.ProgramDesc(binary_str)
@@ -3536,10 +3727,14 @@ class Program(object):
    @property
    def random_seed(self):
        """
+        **Notes: It must be set before the operators have been added.**
        The default random seed for random operators in Program. Zero means get
        the random seed from random device.
-        Notes: It must be set before the operators have been added.
+        Returns: random seed in current Program
+        Return type: int64
        Examples:
            .. code-block:: python
@@ -3548,8 +3743,13 @@ class Program(object):
                prog = fluid.default_main_program()
                random_seed = prog.random_seed
+                x_var = fluid.layers.data(name="X", shape=[3,3], dtype="float32", append_batch_size=False)
+                # Here we need to set random seed before we use fluid.layers.dropout
                print(random_seed)
                prog.random_seed = 1
+                z_var = fluid.layers.dropout(x_var, 0.7)
                print(prog.random_seed)
        """
        return self._seed
@@ -3557,7 +3757,13 @@ class Program(object):
    @property
    def num_blocks(self):
        """
-        The number of blocks in this program.
+        **Notes: This API has no effect in Dygraph mode**
+        The number of :ref:`api_guide_Block_en`  in this Program.
+        Returns: num of :ref:`api_guide_Block_en`  in current Program
+        Return type: int(Platform-dependent size)
        Examples:
            .. code-block:: python
@@ -3567,6 +3773,8 @@ class Program(object):
                prog = fluid.default_main_program()
                num_blocks = prog.num_blocks
                print(num_blocks)
        """
        return self.desc.num_blocks()
@@ -3581,7 +3789,13 @@ class Program(object):
    def global_block(self):
        """
-        Get the first block of this program.
+        **Notes: This API has no effect in Dygraph mode**
+        Get the first :ref:`api_guide_Block_en` of this Program.
+        Returns: The first  :ref:`api_guide_Block_en`  of this Program.
+        Return type: :ref:`api_guide_Block_en`
        Examples:
            .. code-block:: python
@@ -3591,17 +3805,22 @@ class Program(object):
                prog = fluid.default_main_program()
                gb_block = prog.global_block()
                print(gb_block)
        """
        return self.blocks[0]
    def block(self, index):
        """
-        Get the :code:`index` block of this program
+        **Notes: This API has no effect in Dygraph mode**
-        Args:
-            index(int): The index of block to get
-        Returns:
+        Get the :code:`index`  :ref:`api_guide_Block_en`  of this Program
-            Block: The :code:`index` block
+        Parameter:
+            - **index** (int) - The index of  :ref:`api_guide_Block_en`  to get
+        Returns: The :code:`index` block
+        Return type:  :ref:`api_guide_Block_en`
        Examples:
            .. code-block:: python
@@ -3616,9 +3835,15 @@ class Program(object):
    def current_block(self):
        """
+        **Notes: This API has no effect in Dygraph mode**
        Get the current block. The :code:`current` block is the block to append
        operators.
+        Returns: The :code:`index` block
+        Return type: Block
        Examples:
            .. code-block:: python
@@ -3741,12 +3966,14 @@ class Program(object):
            if var.desc.need_check_feed():
                self.global_block().var(var.name).desc.set_need_check_feed(True)
+    @dygraph_not_support
    def list_vars(self):
        """
-        Get all variables from this Program. A iterable object is returned.
+        Get all :ref:`api_guide_Variable` from this Program. A iterable object is returned.
-        Returns:
+        Returns: The Generator will yield every variable in this program.
-            iterable: The generator will yield every variable in this program.
+        Return type: iterable :ref:`api_guide_Variable_en`
        Examples:
            .. code-block:: python
@@ -3845,8 +4072,8 @@ class Parameter(Variable):
            additional_attr = ("trainable", "optimize_attr", "regularizer",
                               "gradient_clip_attr", "do_model_average")
            for attr_name in additional_attr:
-                res_str += "%s: %s\n" % (
+                res_str += "%s: %s\n" % (attr_name,
-                    attr_name, six.binary_type(getattr(self, attr_name)))
+                                         cpt.to_text(getattr(self, attr_name)))
        else:
            res_str = Variable.to_string(self, throw_on_error, False)
        return res_str
@@ -3871,8 +4098,9 @@ def default_startup_program():
    This method will return the :code:`default` or the :code:`current` startup
    program. Users can use :code:`fluid.program_guard` to switch program.
-    Returns:
+    Returns: current default startup program
-        Program: startup program
+    Returns type: Program
    Examples:
        .. code-block:: python

--- a/python/paddle/fluid/tests/unittests/test_compat.py
+++ b/python/paddle/fluid/tests/unittests/test_compat.py
@@ -135,6 +135,22 @@ class TestCompatible(unittest.TestCase):
            self.assertEqual(l, l2)
            self.assertEqual(set([u"", u"123", u"321"]), l2)
+            # check dict types, not inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=False)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertFalse(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
+            # check dict types, inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=True)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertTrue(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
        elif six.PY3:
            self.assertIsNone(cpt.to_text(None))
@@ -236,6 +252,22 @@ class TestCompatible(unittest.TestCase):
            for i in l2:
                self.assertTrue(isinstance(i, str))
+            # check dict types, not inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=False)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertFalse(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
+            # check dict types, inplace
+            l = {"": ""}
+            l2 = cpt.to_text(l, inplace=True)
+            self.assertTrue(isinstance(l2, dict))
+            self.assertTrue(l is l2)
+            self.assertEqual(l, l2)
+            self.assertEqual({"": ""}, l2)
    def test_to_bytes(self):
        # Only support python2.x and python3.x now
        self.assertTrue(six.PY2 | six.PY3)

--- a/python/paddle/fluid/tests/unittests/test_detach.py
+++ b/python/paddle/fluid/tests/unittests/test_detach.py
@@ -155,8 +155,11 @@ class Test_Detach(unittest.TestCase):
        try:
            y_detach = y.detach()
        except Exception as e:
-            assert type(e) == AttributeError
+            # Here is to check
-            assert str(e) == 'static graph model DO NOT supprt detach'
+            assert type(e) == AssertionError
+            assert str(
+                e
+            ) == 'We Only support detach in Dygraph mode, please use fluid.dygraph.guard() as context to run it in Dygraph Mode'
 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -207,6 +207,59 @@ class TestImperative(unittest.TestCase):
            a = inputs2[0].gradient()
            self.assertTrue(np.allclose(inputs2[0].gradient(), x))
+    def test_empty_var(self):
+        with fluid.dygraph.guard():
+            cur_program = fluid.Program()
+            cur_block = cur_program.current_block()
+            new_variable = cur_block.create_var(
+                name="X", shape=[-1, 23, 48], dtype='float32')
+            try:
+                new_variable.numpy()
+            except Exception as e:
+                assert type(e) == ValueError
+            try:
+                new_variable.backward()
+            except Exception as e:
+                assert type(e) == ValueError
+            try:
+                new_variable.clear_gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+    def test_empty_grad(self):
+        with fluid.dygraph.guard():
+            x = np.ones([2, 2], np.float32)
+            new_var = fluid.dygraph.base.to_variable(x)
+            try:
+                new_var.gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+            try:
+                new_var.clear_gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+        with fluid.dygraph.guard():
+            cur_program = fluid.Program()
+            cur_block = cur_program.current_block()
+            new_variable = cur_block.create_var(
+                name="X", shape=[-1, 23, 48], dtype='float32')
+            try:
+                new_variable.gradient()
+            except Exception as e:
+                assert type(e) == ValueError
+    def test_set_persistable(self):
+        with fluid.dygraph.guard():
+            x = np.ones([2, 2], np.float32)
+            new_var = fluid.dygraph.base.to_variable(x)
+            self.assertFalse(new_var.persistable)
+            new_var.persistable = True
+            self.assertFalse(new_var.persistable)
    def test_layer(self):
        with fluid.dygraph.guard():
            cl = core.Layer()

--- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py
@@ -15,6 +15,7 @@
 import paddle.fluid as fluid
 import paddle.fluid.framework as framework
 import unittest
 from test_imperative_base import new_program_scope
@@ -30,7 +31,7 @@ class TestTracerMode(unittest.TestCase):
        self.assertEqual(self.tracer._train_mode, False)
        return a
-    @fluid.dygraph.base._not_support
+    @framework.dygraph_not_support
    def not_support_func(self):
        return True

--- a/python/paddle/fluid/tests/unittests/test_imperative_framework.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_framework.py
@@ -56,7 +56,7 @@ class TestDygraphFramework(unittest.TestCase):
                out.backward()
                raise AssertionError(
                    "backward should not be usable in static graph mode")
-            except ValueError as e:
+            except AssertionError as e:
                self.assertTrue((e is not None))
    def test_dygraph_to_string(self):

--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -135,6 +135,8 @@ class TestDygraphGNN(unittest.TestCase):
            adam.minimize(loss)
            model.clear_gradients()
+            loss_value = loss.numpy()
+            model_gc_weight_value = model.gc.weight.numpy()
        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
@@ -157,12 +159,14 @@ class TestDygraphGNN(unittest.TestCase):
            adam2 = AdamOptimizer(learning_rate=1e-3)
            adam2.minimize(loss2)
            model2.clear_gradients()
+            loss2_value = loss2.numpy()
-        self.assertEqual(static_loss, loss.numpy())
+            model2_gc_weight_value = model2.gc.weight.numpy()
-        self.assertTrue(np.allclose(static_weight, model.gc.weight.numpy()))
-        self.assertEqual(static_loss, loss2.numpy())
+        self.assertEqual(static_loss, loss_value)
-        self.assertTrue(np.allclose(static_weight, model2.gc.weight.numpy()))
+        self.assertTrue(np.allclose(static_weight, model_gc_weight_value))
-        sys.stderr.write('%s %s\n' % (static_loss, loss.numpy()))
+        self.assertEqual(static_loss, loss2_value)
+        self.assertTrue(np.allclose(static_weight, model2_gc_weight_value))
+        sys.stderr.write('%s %s\n' % (static_loss, loss_value))
 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -264,6 +264,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()
+            dy_loss_value = dy_loss.numpy()
+            dy_last_cell_value = last_cell.numpy()
+            dy_last_hidden_value = last_hidden.numpy()
        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
@@ -330,11 +334,11 @@ class TestDygraphPtbRnn(unittest.TestCase):
                        static_param_updated[static_param_name_list[k -
                                                                    3]] = out[k]
-        self.assertTrue(np.array_equal(static_loss_value, dy_loss.numpy()))
+        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
        self.assertTrue(
-            np.array_equal(static_last_cell_value, last_cell.numpy()))
+            np.array_equal(static_last_cell_value, dy_last_cell_value))
        self.assertTrue(
-            np.array_equal(static_last_hidden_value, last_hidden.numpy()))
+            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):

--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn_sorted_gradient.py
@@ -84,6 +84,10 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()
+            dy_loss_value = dy_loss.numpy()
+            dy_last_cell_value = last_cell.numpy()
+            dy_last_hidden_value = last_hidden.numpy()
        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
@@ -150,11 +154,11 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
                        static_param_updated[static_param_name_list[k -
                                                                    3]] = out[k]
-        self.assertTrue(np.array_equal(static_loss_value, dy_loss.numpy()))
+        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
        self.assertTrue(
-            np.array_equal(static_last_cell_value, last_cell.numpy()))
+            np.array_equal(static_last_cell_value, dy_last_cell_value))
        self.assertTrue(
-            np.array_equal(static_last_hidden_value, last_hidden.numpy()))
+            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):

--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
@@ -993,6 +993,11 @@ class TestDygraphTransformerSortGradient(unittest.TestCase):
                    for param in transformer.parameters():
                        dy_param_updated[param.name] = param.numpy()
+            dy_avg_cost_value = dy_avg_cost.numpy()
+            dy_sum_cost_value = dy_sum_cost.numpy()
+            dy_predict_value = dy_predict.numpy()
+            dy_token_num_value = dy_token_num.numpy()
        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
@@ -1067,13 +1072,12 @@ class TestDygraphTransformerSortGradient(unittest.TestCase):
                                                                    4]] = out[k]
        self.assertTrue(
-            np.array_equal(static_avg_cost_value, dy_avg_cost.numpy()))
+            np.array_equal(static_avg_cost_value, dy_avg_cost_value))
-        self.assertTrue(
-            np.array_equal(static_sum_cost_value, dy_sum_cost.numpy()))
        self.assertTrue(
-            np.array_equal(static_predict_value, dy_predict.numpy()))
+            np.array_equal(static_sum_cost_value, dy_sum_cost_value))
+        self.assertTrue(np.array_equal(static_predict_value, dy_predict_value))
        self.assertTrue(
-            np.array_equal(static_token_num_value, dy_token_num.numpy()))
+            np.array_equal(static_token_num_value, dy_token_num_value))
        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -112,9 +112,10 @@ class TestLayer(LayerTest):
            fc2 = nn.FC('fc2', size=4)
            ret = fc1(t)
            dy_ret = fc2(ret)
+            dy_ret_value = dy_ret.numpy()
        self.assertTrue(np.array_equal(static_ret, static_ret2))
-        self.assertTrue(np.array_equal(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.array_equal(static_ret, dy_ret_value))
    def test_layer_norm(self):
        inp = np.ones([3, 32, 32], dtype='float32')
@@ -149,6 +150,7 @@ class TestLayer(LayerTest):
                bias_attr=fluid.initializer.ConstantInitializer(value=1),
                act='sigmoid')
            dy_ret = lm(base.to_variable(inp))
+            dy_ret_value = dy_ret.numpy()
        with self.dynamic_graph():
            lm = nn.LayerNorm(
                'layer_norm',
@@ -163,7 +165,7 @@ class TestLayer(LayerTest):
            self.assertFalse(hasattr(lm, "_bias_w"))
        self.assertTrue(np.array_equal(static_ret, static_ret2))
-        self.assertTrue(np.array_equal(dy_ret.numpy(), static_ret2))
+        self.assertTrue(np.array_equal(dy_ret_value, static_ret2))
    def test_relu(self):
        with self.static_graph():
@@ -176,8 +178,9 @@ class TestLayer(LayerTest):
        with self.dynamic_graph():
            t = np.ones([3, 3], dtype='float32')
            dy_ret = layers.relu(base.to_variable(t))
+            dy_ret_value = dy_ret.numpy()
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
    def test_matmul(self):
        with self.static_graph():
@@ -197,8 +200,9 @@ class TestLayer(LayerTest):
            t = np.ones([3, 3], dtype='float32')
            t2 = np.ones([3, 3], dtype='float32')
            dy_ret = layers.matmul(base.to_variable(t), base.to_variable(t2))
+            dy_ret_value = dy_ret.numpy()
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
    def test_conv2d(self):
        with self.static_graph():
@@ -222,6 +226,7 @@ class TestLayer(LayerTest):
            images = np.ones([2, 3, 5, 5], dtype='float32')
            conv2d = nn.Conv2D('conv2d', num_filters=3, filter_size=[2, 2])
            dy_ret = conv2d(base.to_variable(images))
+            dy_ret_value = dy_ret.numpy()
        with self.dynamic_graph():
            images = np.ones([2, 3, 5, 5], dtype='float32')
@@ -230,7 +235,7 @@ class TestLayer(LayerTest):
            dy_ret = conv2d(base.to_variable(images))
            self.assertTrue(conv2d._bias_param is None)
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
        self.assertTrue(np.allclose(static_ret, static_ret2))
    def test_gru_unit(self):
@@ -269,10 +274,13 @@ class TestLayer(LayerTest):
            gru = nn.GRUUnit('gru', size=D * 3)
            dy_ret = gru(
                base.to_variable(input), base.to_variable(hidden_input))
+            dy_ret_value = []
+            for i in range(len(static_ret)):
+                dy_ret_value.append(dy_ret[i].numpy())
        for i in range(len(static_ret)):
            self.assertTrue(np.allclose(static_ret[i], static_ret2[i]))
-            self.assertTrue(np.allclose(static_ret[i], dy_ret[i].numpy()))
+            self.assertTrue(np.allclose(static_ret[i], dy_ret_value[i]))
    def test_elementwise_math(self):
        n = np.ones([3, 3], dtype='float32')
@@ -313,9 +321,8 @@ class TestLayer(LayerTest):
            ret = layers.elementwise_div(ret, n4)
            ret = layers.elementwise_sub(ret, n5)
            dy_ret = layers.elementwise_mul(ret, n6)
-        self.assertTrue(
+            dy_ret_value = dy_ret.numpy()
-            np.allclose(static_ret, dy_ret.numpy()),
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
-            '%s vs %s' % (static_ret, dy_ret.numpy()))
    def test_elementwise_minmax(self):
        n = np.ones([3, 3], dtype='float32')
@@ -324,9 +331,11 @@ class TestLayer(LayerTest):
        with self.dynamic_graph():
            min_ret = layers.elementwise_min(n, n2)
            max_ret = layers.elementwise_max(n, n2)
+            min_ret_value = min_ret.numpy()
+            max_ret_value = max_ret.numpy()
-        self.assertTrue(np.allclose(n, min_ret.numpy()))
+        self.assertTrue(np.allclose(n, min_ret_value))
-        self.assertTrue(np.allclose(n2, max_ret.numpy()))
+        self.assertTrue(np.allclose(n2, max_ret_value))
    def test_sequence_conv(self):
        inp_np = np.arange(12).reshape([3, 4]).astype('float32')
@@ -404,8 +413,9 @@ class TestLayer(LayerTest):
                act='sigmoid',
                bias_attr=fluid.initializer.ConstantInitializer(value=1))
            dy_rlt = conv2d_transpose(base.to_variable(inp_np))
+            dy_rlt_value = dy_rlt.numpy()
        self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt2))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt2))
    def test_bilinear_tensor_product(self):
        inp_np_x = np.array([[1, 2, 3]]).astype('float32')
@@ -460,12 +470,12 @@ class TestLayer(LayerTest):
                bias_attr=fluid.initializer.ConstantInitializer(value=1),
                act='sigmoid')
            dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
+            dy_rlt_value = dy_rlt.numpy()
        with self.dynamic_graph():
            btp2 = nn.BilinearTensorProduct('btp', 6, act='sigmoid')
            dy_rlt2 = btp2(
                base.to_variable(inp_np_x), base.to_variable(inp_np_y))
+            dy_rlt2_value = dy_rlt2.numpy()
        with self.static_graph():
            data_x2 = layers.data(
                name='x',
@@ -484,9 +494,9 @@ class TestLayer(LayerTest):
                feed={'x': inp_np_x,
                      'y': inp_np_y}, fetch_list=[out2])[0]
-        self.assertTrue(np.array_equal(dy_rlt2.numpy(), static_rlt3))
+        self.assertTrue(np.array_equal(dy_rlt2_value, static_rlt3))
        self.assertTrue(np.array_equal(static_rlt2, static_rlt))
-        self.assertTrue(np.array_equal(dy_rlt.numpy(), static_rlt))
+        self.assertTrue(np.array_equal(dy_rlt_value, static_rlt))
    def test_prelu(self):
        inp_np = np.ones([5, 200, 100, 100]).astype('float32')
@@ -525,9 +535,10 @@ class TestLayer(LayerTest):
                mode=mode,
                param_attr=ParamAttr(initializer=Constant(1.0)))
            dy_rlt = prelu(base.to_variable(inp_np))
+            dy_rlt_value = dy_rlt.numpy()
        self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
    def test_embeding(self):
        inp_word = np.array([[[1]]]).astype('int64')
@@ -557,10 +568,11 @@ class TestLayer(LayerTest):
                size=[dict_size, 32],
                param_attr='emb.w',
                is_sparse=False)
-            static_rlt3 = emb2(base.to_variable(inp_word))
+            dy_rlt = emb2(base.to_variable(inp_word))
+            dy_rlt_value = dy_rlt.numpy()
        self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(static_rlt3.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
    def test_nce(self):
        window_size = 5
@@ -677,10 +689,11 @@ class TestLayer(LayerTest):
                         bias_attr='nce.b',
                         sample_weight=sample_weights)
-            nce_loss3 = nce(embs3, words[label_word])
+            dy_rlt = nce(embs3, words[label_word])
+            dy_rlt_value = dy_rlt.numpy()
        self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(nce_loss3.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
    def test_conv3d(self):
        with self.static_graph():
@@ -706,8 +719,9 @@ class TestLayer(LayerTest):
            images = np.ones([2, 3, 6, 6, 6], dtype='float32')
            conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2)
            dy_ret = conv3d(base.to_variable(images))
+            dy_rlt_value = dy_ret.numpy()
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
        self.assertTrue(np.allclose(static_ret, static_ret2))
    def test_row_conv(self):
@@ -800,8 +814,9 @@ class TestLayer(LayerTest):
        with self.dynamic_graph():
            groupNorm = nn.GroupNorm('GroupNorm', groups=2)
            dy_ret = groupNorm(base.to_variable(input))
+            dy_rlt_value = dy_ret.numpy()
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
        self.assertTrue(np.allclose(static_ret, static_ret2))
    def test_spectral_norm(self):
@@ -850,8 +865,9 @@ class TestLayer(LayerTest):
        with self.dynamic_graph():
            spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2)
            dy_ret = spectralNorm(base.to_variable(input))
+            dy_rlt_value = dy_ret.numpy()
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
        self.assertTrue(np.allclose(static_ret, static_ret2))
    def test_tree_conv(self):
@@ -922,9 +938,10 @@ class TestLayer(LayerTest):
            treeConv = nn.TreeConv(
                'SpectralNorm', output_size=6, num_filters=1, max_depth=2)
            dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj))
+            dy_rlt_value = dy_ret.numpy()
        self.assertTrue(np.allclose(static_ret, static_ret2))
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_rlt_value))
    def test_conv3d_transpose(self):
        input_array = np.arange(0, 48).reshape(
@@ -953,8 +970,9 @@ class TestLayer(LayerTest):
                filter_size=12,
                use_cudnn=False)
            dy_rlt = conv3d_transpose(base.to_variable(input_array))
+            dy_rlt_value = dy_rlt.numpy()
        self.assertTrue(np.allclose(static_rlt2, static_rlt))
-        self.assertTrue(np.allclose(dy_rlt.numpy(), static_rlt))
+        self.assertTrue(np.allclose(dy_rlt_value, static_rlt))
    def test_eye_op(self):
        np_eye = np.eye(3, 2)
@@ -972,11 +990,14 @@ class TestLayer(LayerTest):
                                         num_columns=2,
                                         batch_shape=[4, 3])
            diag_tensor = layers.eye(20)
+            eye_tensor_value = eye_tensor.numpy()
-        self.assertTrue(np.allclose(eye_tensor.numpy(), np_eye))
+            eye_tensor_rlt1_value = eye_tensor_rlt1.numpy()
-        self.assertTrue(np.allclose(eye_tensor_rlt1.numpy(), stack_rlt1))
+            eye_tensor_rlt2_value = eye_tensor_rlt2.numpy()
-        self.assertTrue(np.allclose(eye_tensor_rlt2.numpy(), stack_rlt2))
+            diag_tensor_value = diag_tensor.numpy()
-        self.assertTrue(np.allclose(diag_tensor.numpy(), np.eye(20)))
+        self.assertTrue(np.allclose(eye_tensor_value, np_eye))
+        self.assertTrue(np.allclose(eye_tensor_rlt1_value, stack_rlt1))
+        self.assertTrue(np.allclose(eye_tensor_rlt2_value, stack_rlt2))
+        self.assertTrue(np.allclose(diag_tensor_value, np.eye(20)))
        with self.assertRaises(TypeError):
            layers.eye(num_rows=3.1)
@@ -998,8 +1019,9 @@ class TestLayer(LayerTest):
        with self.dynamic_graph():
            t = np.ones([3, 3], dtype='float32')
            dy_ret = layers.hard_swish(base.to_variable(t))
+            dy_ret_rlt = dy_ret.numpy()
-        self.assertTrue(np.allclose(static_ret, dy_ret.numpy()))
+        self.assertTrue(np.allclose(static_ret, dy_ret_rlt))
    def test_compare(self):
        value_a = np.arange(3)
@@ -1017,8 +1039,8 @@ class TestLayer(LayerTest):
            db = base.to_variable(value_b)
            dcond = layers.less_than(x=da, y=db)
-        for i in range(len(static_ret)):
+            for i in range(len(static_ret)):
-            self.assertTrue(dcond.numpy()[i] == static_ret[i])
+                self.assertTrue(dcond.numpy()[i] == static_ret[i])
        # less equal
        with self.static_graph():
@@ -1160,8 +1182,9 @@ class TestBook(LayerTest):
                dy_result = method()
                if isinstance(dy_result, tuple):
                    dy_result = dy_result[0]
+                dy_result_value = dy_result.numpy()
-        self.assertTrue(np.array_equal(static_result[0], dy_result.numpy()))
+        self.assertTrue(np.array_equal(static_result[0], dy_result_value))
    def _get_np_data(self, shape, dtype, append_batch_size=True):
        np.random.seed(self.seed)

--- a/python/paddle/fluid/tests/unittests/test_program_to_string.py
+++ b/python/paddle/fluid/tests/unittests/test_program_to_string.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import paddle.fluid as fluid
+import unittest
+class TestProgram(unittest.TestCase):
+    def test_program_to_string(self):
+        prog = fluid.default_main_program()
+        a = fluid.layers.data(
+            name="X", shape=[2, 3], dtype="float32", append_batch_size=False)
+        c = fluid.layers.fc(a, size=3)
+        prog_string = prog.to_string(throw_on_error=True, with_details=False)
+        prog_string_with_details = prog.to_string(
+            throw_on_error=False, with_details=True)
+        assert prog_string is not None
+        assert len(prog_string_with_details) > len(prog_string)
+if __name__ == '__main__':
+    unittest.main()