update samples of print and clip api, test=develop (#27670)

4d79304c · Qi Li · GitHub · e262cb62 · 4d79304c · 4d79304c
隐藏空白更改
内联并排

Showing with 80 addition and 209 deletion

python/paddle/fluid/clip.py python/paddle/fluid/clip.py +63 -192

python/paddle/fluid/layers/control_flow.py python/paddle/fluid/layers/control_flow.py +17 -17

未找到文件。
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -158,10 +158,6 @@ class GradientClipBase(object):
 class GradientClipByValue(GradientClipBase):
    """
-    :alias_main: paddle.nn.GradientClipByValue
-	:alias: paddle.nn.GradientClipByValue,paddle.nn.clip.GradientClipByValue
-	:old_api: paddle.fluid.clip.GradientClipByValue
    Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].
    - Any values less than min are set to ``min``.
@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase):
    is not None, then only part of gradients can be selected for gradient clipping.
    Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` 
-    (for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
+    (for example: :ref:`api_paddle_optimizer_SGD`).
    Args:
        max (float): The maximum value to clip by.
@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase):
    Examples:
        .. code-block:: python
-            # use for Static mode
            import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(
-                    main_program=main_prog, startup_program=startup_prog):
-                image = fluid.data(
-                    name='x', shape=[-1, 2], dtype='float32')
-                predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
-                loss = fluid.layers.mean(predict)
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByValue(min=-1, max=1)
-                # Clip a part of parameters in network: (e.g. fc_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
-                # def fileter_func(Parameter):
-                # # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
-                #   return Parameter.name=="fc_0.w_0"
-                # clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
-                sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
-                sgd_optimizer.minimize(loss)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
-            exe.run(startup_prog)
-            out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
-            # use for Dygraph mode
+            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
-            import paddle
+            linear = paddle.nn.Linear(10, 10)
-            import paddle.fluid as fluid
+            out = linear(x)
+            loss = paddle.mean(out)
-            with fluid.dygraph.guard():
+            loss.backward()
-                linear = fluid.dygraph.Linear(10, 10)  # Trainable parameters:: linear_0.w.0, linear_0.b.0
-                inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
+            # clip all parameters in network:
-                out = linear(fluid.dygraph.to_variable(inputs))
+            clip = paddle.nn.GradientClipByValue(min=-1, max=1)
-                loss = fluid.layers.reduce_mean(out)
-                loss.backward()
+            # clip a part of parameters in network: (e.g. linear_0.w_0)
+            # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # Clip all parameters in network:
+            # def fileter_func(ParamBase):
-                clip = fluid.clip.GradientClipByValue(min=-1, max=1)
+            # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
+            #   return ParamBase.name == "linear_0.w_0"
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
+            # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
+            #   return ParamBase.name == linear.weight.name
-                # def fileter_func(ParamBase):
+            # clip = paddle.nn.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
-                # # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
-                #   return ParamBase.name == "linear_0.w_0"
+            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
-                # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
+            sdg.step()
-                #   return ParamBase.name == linear.weight.name
-                # clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
-                sgd_optimizer = fluid.optimizer.SGD(
-                    learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
-                sgd_optimizer.minimize(loss)
    """
    def __init__(self, max, min=None, need_clip=None):
@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase):
 class GradientClipByNorm(GradientClipBase):
    """
-    :alias_main: paddle.nn.GradientClipByNorm
-	:alias: paddle.nn.GradientClipByNorm,paddle.nn.clip.GradientClipByNorm
-	:old_api: paddle.fluid.clip.GradientClipByNorm
    Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
    - If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase):
    is not None, then only part of gradients can be selected for gradient clipping.
    Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` 
-    (for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
+    (for example: :ref:`api_paddle_optimizer_SGD`).
    The clipping formula is:
@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase):
    Examples:
        .. code-block:: python
-            # use for Static mode
            import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(
-                    main_program=main_prog, startup_program=startup_prog):
-                image = fluid.data(
-                    name='x', shape=[-1, 2], dtype='float32')
-                predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
-                loss = fluid.layers.mean(predict)
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
-                # def fileter_func(Parameter):
-                # # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
-                #   return Parameter.name=="fc_0.w_0"
-                # clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
-                sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
-                sgd_optimizer.minimize(loss)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
-            exe.run(startup_prog)
-            out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
-            # use for Dygraph mode
-            import paddle
-            import paddle.fluid as fluid
-            with fluid.dygraph.guard():
-                linear = fluid.dygraph.Linear(10, 10)  # Trainable: linear_0.w.0, linear_0.b.0
-                inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
-                out = linear(fluid.dygraph.to_variable(inputs))
-                loss = fluid.layers.reduce_mean(out)
-                loss.backward()
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(ParamBase):
-                # # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
-                #   return ParamBase.name == "linear_0.w_0"
-                # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
-                #   return ParamBase.name == linear.weight.name
-                # clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
-                sgd_optimizer = fluid.optimizer.SGD(
-                    learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
-                sgd_optimizer.minimize(loss)
+            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
+            linear = paddle.nn.Linear(10, 10)
+            out = linear(x)
+            loss = paddle.mean(out)
+            loss.backward()
+            # clip all parameters in network:
+            clip = paddle.nn.GradientClipByNorm(clip_norm=1.0)
+            # clip a part of parameters in network: (e.g. linear_0.w_0)
+            # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
+            # def fileter_func(ParamBase):
+            # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
+            #   return ParamBase.name == "linear_0.w_0"
+            # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
+            #   return ParamBase.name == linear.weight.name
+            # clip = paddle.nn.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
+            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
+            sdg.step()
    """
    def __init__(self, clip_norm, need_clip=None):
@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase):
 class GradientClipByGlobalNorm(GradientClipBase):
    """
-    :alias_main: paddle.nn.GradientClipByGlobalNorm
-	:alias: paddle.nn.GradientClipByGlobalNorm,paddle.nn.clip.GradientClipByGlobalNorm
-	:old_api: paddle.fluid.clip.GradientClipByGlobalNorm
    Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in 
    :math:`t\_list` , and limit it to ``clip_norm`` .
@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase):
    is not None, then only part of gradients can be selected for gradient clipping.
    Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` 
-    (for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
+    (for example: :ref:`api_paddle_optimizer_SGD`).
    The clipping formula is:
@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase):
    Examples:
        .. code-block:: python
-            # use for Static mode
            import paddle
-            import paddle.fluid as fluid
-            import numpy as np
-            main_prog = fluid.Program()
-            startup_prog = fluid.Program()
-            with fluid.program_guard(
-                    main_program=main_prog, startup_program=startup_prog):
-                image = fluid.data(
-                    name='x', shape=[-1, 2], dtype='float32')
-                predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
-                loss = fluid.layers.mean(predict)
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
-                # Clip a part of parameters in network: (e.g. fc_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(Parameter):
-                # # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
-                #   return Parameter.name=="fc_0.w_0"
-                # clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
-                sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
-                sgd_optimizer.minimize(loss)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
-            exe.run(startup_prog)
-            out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
-            # use for Dygraph mode
-            import paddle
-            import paddle.fluid as fluid
-            with fluid.dygraph.guard():
-                linear = fluid.dygraph.Linear(10, 10)  # Trainable: linear_0.w.0, linear_0.b.0
-                inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
-                out = linear(fluid.dygraph.to_variable(inputs))
-                loss = fluid.layers.reduce_mean(out)
-                loss.backward()
-                # Clip all parameters in network:
-                clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
-                # Clip a part of parameters in network: (e.g. linear_0.w_0)
-                # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
-                # def fileter_func(ParamBase):
-                # # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
-                #   return ParamBase.name == "linear_0.w_0"
-                # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
-                #   return ParamBase.name == linear.weight.name
-                # clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
-                sgd_optimizer = fluid.optimizer.SGD(
-                    learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
-                sgd_optimizer.minimize(loss)
+            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
+            linear = paddle.nn.Linear(10, 10)
+            out = linear(x)
+            loss = paddle.mean(out)
+            loss.backward()
+            # clip all parameters in network:
+            clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0)
+            # clip a part of parameters in network: (e.g. linear_0.w_0)
+            # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
+            # def fileter_func(ParamBase):
+            # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
+            #   return ParamBase.name == "linear_0.w_0"
+            # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
+            #   return ParamBase.name == linear.weight.name
+            # clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
+            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
+            sdg.step()
    """
    def __init__(self, clip_norm, group_name="default_group", need_clip=None):

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -259,24 +259,24 @@ def Print(input,
    Examples:
        .. code-block:: python
-           import paddle.fluid as fluid
+           import paddle
-           input = fluid.layers.fill_constant(shape=[10,2], value=3, dtype='int64')
-           input = fluid.layers.Print(input, message="The content of input layer:")
-           main_program = fluid.default_main_program()
-           exe = fluid.Executor(fluid.CPUPlace())
-           exe.run(main_program)
-    Output at runtime:
+           paddle.enable_static()
-        .. code-block:: bash 
+           x = paddle.full(shape=[2, 3], fill_value=3, dtype='int64')
-           The content of input layer:     The place is:CPUPlace
+           out = paddle.static.Print(x, message="The content of input layer:")
-           Tensor[fill_constant_0.tmp_0]
-               shape: [10,2,]
+           main_program = paddle.static.default_main_program()
-               dtype: x
+           exe = paddle.static.Executor(place=paddle.CPUPlace())
-               data: 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 
+           res = exe.run(main_program, fetch_list=[out])
+           # Variable: fill_constant_1.tmp_0
+           #   - message: The content of input layer:
+           #   - lod: {}
+           #   - place: CPUPlace
+           #   - shape: [2, 3]
+           #   - layout: NCHW
+           #   - dtype: long
+           #   - data: [3 3 3 3 3 3]
    '''
    check_variable_and_dtype(input, 'input',
                             ['float32', 'float64', 'int32', 'int64', 'bool'],