未验证 提交 4d79304c 编写于 作者: Q Qi Li 提交者: GitHub

update samples of print and clip api, test=develop (#27670)

上级 e262cb62
......@@ -158,10 +158,6 @@ class GradientClipBase(object):
class GradientClipByValue(GradientClipBase):
:alias_main: paddle.nn.GradientClipByValue
:alias: paddle.nn.GradientClipByValue,paddle.nn.clip.GradientClipByValue
:old_api: paddle.fluid.clip.GradientClipByValue
Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].
- Any values less than min are set to ``min``.
......@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
(for example: :ref:`api_paddle_optimizer_SGD`).
max (float): The maximum value to clip by.
......@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase):
.. code-block:: python
# use for Static mode
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByValue(min=-1, max=1)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable parameters:: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByValue(min=-1, max=1)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(x)
loss = paddle.mean(out)
# clip all parameters in network:
clip = paddle.nn.GradientClipByValue(min=-1, max=1)
# clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = paddle.nn.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
def __init__(self, max, min=None, need_clip=None):
......@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase):
class GradientClipByNorm(GradientClipBase):
:alias_main: paddle.nn.GradientClipByNorm
:alias: paddle.nn.GradientClipByNorm,paddle.nn.clip.GradientClipByNorm
:old_api: paddle.fluid.clip.GradientClipByNorm
Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
- If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
......@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
(for example: :ref:`api_paddle_optimizer_SGD`).
The clipping formula is:
......@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase):
.. code-block:: python
# use for Static mode
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(x)
loss = paddle.mean(out)
# clip all parameters in network:
clip = paddle.nn.GradientClipByNorm(clip_norm=1.0)
# clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = paddle.nn.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
def __init__(self, clip_norm, need_clip=None):
......@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase):
class GradientClipByGlobalNorm(GradientClipBase):
:alias_main: paddle.nn.GradientClipByGlobalNorm
:alias: paddle.nn.GradientClipByGlobalNorm,paddle.nn.clip.GradientClipByGlobalNorm
:old_api: paddle.fluid.clip.GradientClipByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .
......@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`).
(for example: :ref:`api_paddle_optimizer_SGD`).
The clipping formula is:
......@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase):
.. code-block:: python
# use for Static mode
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(x)
loss = paddle.mean(out)
# clip all parameters in network:
clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0)
# clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
def __init__(self, clip_norm, group_name="default_group", need_clip=None):
......@@ -259,24 +259,24 @@ def Print(input,
.. code-block:: python
import paddle.fluid as fluid
input = fluid.layers.fill_constant(shape=[10,2], value=3, dtype='int64')
input = fluid.layers.Print(input, message="The content of input layer:")
main_program = fluid.default_main_program()
exe = fluid.Executor(fluid.CPUPlace())
import paddle
Output at runtime:
.. code-block:: bash
The content of input layer: The place is:CPUPlace
shape: [10,2,]
dtype: x
data: 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
x = paddle.full(shape=[2, 3], fill_value=3, dtype='int64')
out = paddle.static.Print(x, message="The content of input layer:")
main_program = paddle.static.default_main_program()
exe = paddle.static.Executor(place=paddle.CPUPlace())
res = exe.run(main_program, fetch_list=[out])
# Variable: fill_constant_1.tmp_0
# - message: The content of input layer:
# - lod: {}
# - place: CPUPlace
# - shape: [2, 3]
# - layout: NCHW
# - dtype: long
# - data: [3 3 3 3 3 3]
check_variable_and_dtype(input, 'input',
['float32', 'float64', 'int32', 'int64', 'bool'],
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册