未验证 提交 4d79304c 编写于 作者: Q Qi Li 提交者: GitHub

update samples of print and clip api, test=develop (#27670)

上级 e262cb62
...@@ -158,10 +158,6 @@ class GradientClipBase(object): ...@@ -158,10 +158,6 @@ class GradientClipBase(object):
class GradientClipByValue(GradientClipBase): class GradientClipByValue(GradientClipBase):
""" """
:alias_main: paddle.nn.GradientClipByValue
:alias: paddle.nn.GradientClipByValue,paddle.nn.clip.GradientClipByValue
:old_api: paddle.fluid.clip.GradientClipByValue
Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max]. Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].
- Any values less than min are set to ``min``. - Any values less than min are set to ``min``.
...@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase): ...@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping. is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`). (for example: :ref:`api_paddle_optimizer_SGD`).
Args: Args:
max (float): The maximum value to clip by. max (float): The maximum value to clip by.
...@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase): ...@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
# use for Static mode
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByValue(min=-1, max=1)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard(): x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = fluid.dygraph.Linear(10, 10) # Trainable parameters:: linear_0.w.0, linear_0.b.0 linear = paddle.nn.Linear(10, 10)
inputs = fluid.layers.uniform_random([32, 10]).astype('float32') out = linear(x)
out = linear(fluid.dygraph.to_variable(inputs)) loss = paddle.mean(out)
loss = fluid.layers.reduce_mean(out)
loss.backward() loss.backward()
# Clip all parameters in network: # clip all parameters in network:
clip = fluid.clip.GradientClipByValue(min=-1, max=1) clip = paddle.nn.GradientClipByValue(min=-1, max=1)
# Clip a part of parameters in network: (e.g. linear_0.w_0) # clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase): # def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0) # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0" # return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name # return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func) # clip = paddle.nn.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD( sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip) sdg.step()
sgd_optimizer.minimize(loss)
""" """
def __init__(self, max, min=None, need_clip=None): def __init__(self, max, min=None, need_clip=None):
...@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase): ...@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase):
class GradientClipByNorm(GradientClipBase): class GradientClipByNorm(GradientClipBase):
""" """
:alias_main: paddle.nn.GradientClipByNorm
:alias: paddle.nn.GradientClipByNorm,paddle.nn.clip.GradientClipByNorm
:old_api: paddle.fluid.clip.GradientClipByNorm
Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` . Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
- If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio. - If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
...@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase): ...@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping. is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`). (for example: :ref:`api_paddle_optimizer_SGD`).
The clipping formula is: The clipping formula is:
...@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase): ...@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
# use for Static mode
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard(): x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0 linear = paddle.nn.Linear(10, 10)
inputs = fluid.layers.uniform_random([32, 10]).astype('float32') out = linear(x)
out = linear(fluid.dygraph.to_variable(inputs)) loss = paddle.mean(out)
loss = fluid.layers.reduce_mean(out)
loss.backward() loss.backward()
# Clip all parameters in network: # clip all parameters in network:
clip = fluid.clip.GradientClipByNorm(clip_norm=1.0) clip = paddle.nn.GradientClipByNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0) # clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase): # def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0) # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0" # return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name # return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func) # clip = paddle.nn.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
sgd_optimizer.minimize(loss)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
sdg.step()
""" """
def __init__(self, clip_norm, need_clip=None): def __init__(self, clip_norm, need_clip=None):
...@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase): ...@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase):
class GradientClipByGlobalNorm(GradientClipBase): class GradientClipByGlobalNorm(GradientClipBase):
""" """
:alias_main: paddle.nn.GradientClipByGlobalNorm
:alias: paddle.nn.GradientClipByGlobalNorm,paddle.nn.clip.GradientClipByGlobalNorm
:old_api: paddle.fluid.clip.GradientClipByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` . :math:`t\_list` , and limit it to ``clip_norm`` .
...@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase): ...@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping. is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer`` Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_fluid_optimizer_SGDOptimizer`). (for example: :ref:`api_paddle_optimizer_SGD`).
The clipping formula is: The clipping formula is:
...@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase): ...@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
# use for Static mode
import paddle import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip) x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
sgd_optimizer.minimize(loss) linear = paddle.nn.Linear(10, 10)
out = linear(x)
place = fluid.CPUPlace() loss = paddle.mean(out)
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
loss.backward() loss.backward()
# Clip all parameters in network: # clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0) clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0) # clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool # pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase): # def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0) # # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0" # return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter # # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name # return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func) # clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
sgd_optimizer.minimize(loss)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
sdg.step()
""" """
def __init__(self, clip_norm, group_name="default_group", need_clip=None): def __init__(self, clip_norm, group_name="default_group", need_clip=None):
......
...@@ -259,24 +259,24 @@ def Print(input, ...@@ -259,24 +259,24 @@ def Print(input,
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
input = fluid.layers.fill_constant(shape=[10,2], value=3, dtype='int64')
input = fluid.layers.Print(input, message="The content of input layer:")
main_program = fluid.default_main_program()
exe = fluid.Executor(fluid.CPUPlace())
exe.run(main_program)
Output at runtime: paddle.enable_static()
.. code-block:: bash
The content of input layer: The place is:CPUPlace x = paddle.full(shape=[2, 3], fill_value=3, dtype='int64')
Tensor[fill_constant_0.tmp_0] out = paddle.static.Print(x, message="The content of input layer:")
shape: [10,2,]
dtype: x
data: 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
main_program = paddle.static.default_main_program()
exe = paddle.static.Executor(place=paddle.CPUPlace())
res = exe.run(main_program, fetch_list=[out])
# Variable: fill_constant_1.tmp_0
# - message: The content of input layer:
# - lod: {}
# - place: CPUPlace
# - shape: [2, 3]
# - layout: NCHW
# - dtype: long
# - data: [3 3 3 3 3 3]
''' '''
check_variable_and_dtype(input, 'input', check_variable_and_dtype(input, 'input',
['float32', 'float64', 'int32', 'int64', 'bool'], ['float32', 'float64', 'int32', 'int64', 'bool'],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册