Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4d79304c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4d79304c
编写于
9月 30, 2020
作者:
Q
Qi Li
提交者:
GitHub
9月 30, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update samples of print and clip api, test=develop (#27670)
上级
e262cb62
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
80 addition
and
209 deletion
+80
-209
python/paddle/fluid/clip.py
python/paddle/fluid/clip.py
+63
-192
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+17
-17
未找到文件。
python/paddle/fluid/clip.py
浏览文件 @
4d79304c
...
@@ -158,10 +158,6 @@ class GradientClipBase(object):
...
@@ -158,10 +158,6 @@ class GradientClipBase(object):
class
GradientClipByValue
(
GradientClipBase
):
class
GradientClipByValue
(
GradientClipBase
):
"""
"""
:alias_main: paddle.nn.GradientClipByValue
:alias: paddle.nn.GradientClipByValue,paddle.nn.clip.GradientClipByValue
:old_api: paddle.fluid.clip.GradientClipByValue
Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].
Limit the value of multi-dimensional Tensor :math:`X` to the range [min, max].
- Any values less than min are set to ``min``.
- Any values less than min are set to ``min``.
...
@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase):
...
@@ -172,7 +168,7 @@ class GradientClipByValue(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping.
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_
fluid_optimizer_SGDOptimizer
`).
(for example: :ref:`api_
paddle_optimizer_SGD
`).
Args:
Args:
max (float): The maximum value to clip by.
max (float): The maximum value to clip by.
...
@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase):
...
@@ -185,66 +181,28 @@ class GradientClipByValue(GradientClipBase):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
# use for Static mode
import paddle
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByValue(min=-1, max=1)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
import paddle
linear = paddle.nn.Linear(10, 10)
import paddle.fluid as fluid
out = linear(x)
loss = paddle.mean(out)
with fluid.dygraph.guard():
loss.backward()
linear = fluid.dygraph.Linear(10, 10) # Trainable parameters:: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
# clip all parameters in network:
out = linear(fluid.dygraph.to_variable(inputs))
clip = paddle.nn.GradientClipByValue(min=-1, max=1)
loss = fluid.layers.reduce_mean(out)
loss.backward()
# clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# Clip all parameters in network:
# def fileter_func(ParamBase):
clip = fluid.clip.GradientClipByValue(min=-1, max=1)
# # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# return ParamBase.name == linear.weight.name
# def fileter_func(ParamBase):
# clip = paddle.nn.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
sdg.step()
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByValue(min=-1, max=1, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
sgd_optimizer.minimize(loss)
"""
"""
def
__init__
(
self
,
max
,
min
=
None
,
need_clip
=
None
):
def
__init__
(
self
,
max
,
min
=
None
,
need_clip
=
None
):
...
@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase):
...
@@ -300,10 +258,6 @@ class GradientClipByValue(GradientClipBase):
class
GradientClipByNorm
(
GradientClipBase
):
class
GradientClipByNorm
(
GradientClipBase
):
"""
"""
:alias_main: paddle.nn.GradientClipByNorm
:alias: paddle.nn.GradientClipByNorm,paddle.nn.clip.GradientClipByNorm
:old_api: paddle.fluid.clip.GradientClipByNorm
Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
Limit the l2 norm of multi-dimensional Tensor :math:`X` to ``clip_norm`` .
- If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
- If the l2 norm of :math:`X` is greater than ``clip_norm`` , :math:`X` will be compressed by a ratio.
...
@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase):
...
@@ -314,7 +268,7 @@ class GradientClipByNorm(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping.
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_
fluid_optimizer_SGDOptimizer
`).
(for example: :ref:`api_
paddle_optimizer_SGD
`).
The clipping formula is:
The clipping formula is:
...
@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase):
...
@@ -342,68 +296,28 @@ class GradientClipByNorm(GradientClipBase):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
# use for Static mode
import paddle
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a Parameter, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
loss.backward()
# Clip all parameters in network:
clip = fluid.clip.GradientClipByNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
sgd_optimizer.minimize(loss)
x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(x)
loss = paddle.mean(out)
loss.backward()
# clip all parameters in network:
clip = paddle.nn.GradientClipByNorm(clip_norm=1.0)
# clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = paddle.nn.GradientClipByNorm(clip_norm=1.0, need_clip=fileter_func)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
sdg.step()
"""
"""
def
__init__
(
self
,
clip_norm
,
need_clip
=
None
):
def
__init__
(
self
,
clip_norm
,
need_clip
=
None
):
...
@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase):
...
@@ -455,10 +369,6 @@ class GradientClipByNorm(GradientClipBase):
class
GradientClipByGlobalNorm
(
GradientClipBase
):
class
GradientClipByGlobalNorm
(
GradientClipBase
):
"""
"""
:alias_main: paddle.nn.GradientClipByGlobalNorm
:alias: paddle.nn.GradientClipByGlobalNorm,paddle.nn.clip.GradientClipByGlobalNorm
:old_api: paddle.fluid.clip.GradientClipByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .
:math:`t\_list` , and limit it to ``clip_norm`` .
...
@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase):
...
@@ -470,7 +380,7 @@ class GradientClipByGlobalNorm(GradientClipBase):
is not None, then only part of gradients can be selected for gradient clipping.
is not None, then only part of gradients can be selected for gradient clipping.
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
Gradient clip will takes effect after being set in ``optimizer`` , see the document ``optimizer``
(for example: :ref:`api_
fluid_optimizer_SGDOptimizer
`).
(for example: :ref:`api_
paddle_optimizer_SGD
`).
The clipping formula is:
The clipping formula is:
...
@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase):
...
@@ -494,67 +404,28 @@ class GradientClipByGlobalNorm(GradientClipBase):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
# use for Static mode
import paddle
import paddle
import paddle.fluid as fluid
import numpy as np
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(
main_program=main_prog, startup_program=startup_prog):
image = fluid.data(
name='x', shape=[-1, 2], dtype='float32')
predict = fluid.layers.fc(input=image, size=3, act='relu') # Trainable parameters: fc_0.w.0, fc_0.b.0
loss = fluid.layers.mean(predict)
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. fc_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(Parameter):
# # It can be easily filtered by Parameter.name (name can be set in fluid.ParamAttr, and the default name is fc_0.w_0, fc_0.b_0)
# return Parameter.name=="fc_0.w_0"
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.1, grad_clip=clip)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
x = np.random.uniform(-100, 100, (10, 2)).astype('float32')
exe.run(startup_prog)
out = exe.run(main_prog, feed={'x': x}, fetch_list=loss)
# use for Dygraph mode
import paddle
import paddle.fluid as fluid
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(10, 10) # Trainable: linear_0.w.0, linear_0.b.0
inputs = fluid.layers.uniform_random([32, 10]).astype('float32')
out = linear(fluid.dygraph.to_variable(inputs))
loss = fluid.layers.reduce_mean(out)
loss.backward()
# Clip all parameters in network:
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0)
# Clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in fluid.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=0.1, parameter_list=linear.parameters(), grad_clip=clip)
sgd_optimizer.minimize(loss)
x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
linear = paddle.nn.Linear(10, 10)
out = linear(x)
loss = paddle.mean(out)
loss.backward()
# clip all parameters in network:
clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0)
# clip a part of parameters in network: (e.g. linear_0.w_0)
# pass a function(fileter_func) to need_clip, and fileter_func receive a ParamBase, and return bool
# def fileter_func(ParamBase):
# # It can be easily filtered by ParamBase.name(name can be set in paddle.ParamAttr, and the default name is linear_0.w_0, linear_0.b_0)
# return ParamBase.name == "linear_0.w_0"
# # Note: linear.weight and linear.bias can return the weight and bias of dygraph.Linear, respectively, and can be used to filter
# return ParamBase.name == linear.weight.name
# clip = paddle.nn.GradientClipByGlobalNorm(clip_norm=1.0, need_clip=fileter_func)
sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), grad_clip=clip)
sdg.step()
"""
"""
def
__init__
(
self
,
clip_norm
,
group_name
=
"default_group"
,
need_clip
=
None
):
def
__init__
(
self
,
clip_norm
,
group_name
=
"default_group"
,
need_clip
=
None
):
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
4d79304c
...
@@ -259,24 +259,24 @@ def Print(input,
...
@@ -259,24 +259,24 @@ def Print(input,
Examples:
Examples:
.. code-block:: python
.. code-block:: python
import paddle.fluid as fluid
import paddle
input = fluid.layers.fill_constant(shape=[10,2], value=3, dtype='int64')
input = fluid.layers.Print(input, message="The content of input layer:")
main_program = fluid.default_main_program()
exe = fluid.Executor(fluid.CPUPlace())
exe.run(main_program)
Output at runtime:
paddle.enable_static()
.. code-block:: bash
x = paddle.full(shape=[2, 3], fill_value=3, dtype='int64')
The content of input layer: The place is:CPUPlace
out = paddle.static.Print(x, message="The content of input layer:")
Tensor[fill_constant_0.tmp_0]
shape: [10,2,]
main_program = paddle.static.default_main_program()
dtype: x
exe = paddle.static.Executor(place=paddle.CPUPlace())
data: 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
res = exe.run(main_program, fetch_list=[out])
# Variable: fill_constant_1.tmp_0
# - message: The content of input layer:
# - lod: {}
# - place: CPUPlace
# - shape: [2, 3]
# - layout: NCHW
# - dtype: long
# - data: [3 3 3 3 3 3]
'''
'''
check_variable_and_dtype
(
input
,
'input'
,
check_variable_and_dtype
(
input
,
'input'
,
[
'float32'
,
'float64'
,
'int32'
,
'int64'
,
'bool'
],
[
'float32'
,
'float64'
,
'int32'
,
'int64'
,
'bool'
],
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录