Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
fe0dc40d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
You need to sign in or sign up before continuing.
未验证
提交
fe0dc40d
编写于
1月 03, 2023
作者:
骑
骑马小猫
提交者:
GitHub
1月 03, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[FluidAPI]remove clip api (#48946)
上级
822ea0f9
变更
43
展开全部
隐藏空白更改
内联
并排
Showing
43 changed file
with
1174 addition
and
1279 deletion
+1174
-1279
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
...paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
+3
-3
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
...optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
+3
-4
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
.../meta_parallel/sharding/group_sharded_optimizer_stage2.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
...uted/fleet/meta_parallel/sharding/group_sharded_stage3.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
...buted/fleet/meta_parallel/sharding/group_sharded_utils.py
+3
-2
python/paddle/distributed/fleet/metrics/metric.py
python/paddle/distributed/fleet/metrics/metric.py
+1
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+0
-2
python/paddle/fluid/clip.py
python/paddle/fluid/clip.py
+0
-944
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+3
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-200
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+4
-11
python/paddle/fluid/tests/test_error_clip.py
python/paddle/fluid/tests/test_error_clip.py
+2
-2
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
.../fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+1
-1
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
...unittests/collective/fleet/pipeline_mnist_multi_device.py
+1
-1
python/paddle/fluid/tests/unittests/collective/fleet/test_dgc_optimizer.py
...id/tests/unittests/collective/fleet/test_dgc_optimizer.py
+4
-4
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
...ests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
+4
-4
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py
...ts/collective/fleet/test_fleet_sharding_meta_optimizer.py
+3
-3
python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
...fluid/tests/unittests/distributed_fused_lamb_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
...addle/fluid/tests/unittests/dygraph_to_static/test_len.py
+3
-2
python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
...e/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
+2
-2
python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
...s/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
+3
-3
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
.../tests/unittests/ir/inference/test_trt_activation_pass.py
+1
-1
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
+2
-2
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+2
-1
python/paddle/fluid/tests/unittests/test_clip_op.py
python/paddle/fluid/tests/unittests/test_clip_op.py
+1
-7
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+1
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
.../fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+2
-2
python/paddle/fluid/tests/unittests/test_fleet_executor.py
python/paddle/fluid/tests/unittests/test_fleet_executor.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
...d/tests/unittests/test_fleet_executor_origin_scheduler.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
...id/tests/unittests/test_fleet_executor_with_task_nodes.py
+1
-1
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
.../tests/unittests/test_get_tensor_from_selected_rows_op.py
+3
-2
python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
...n/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
+4
-10
python/paddle/fluid/tests/unittests/test_gradient_clip.py
python/paddle/fluid/tests/unittests/test_gradient_clip.py
+21
-25
python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
...addle/fluid/tests/unittests/test_imperative_auto_prune.py
+2
-2
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
...le/fluid/tests/unittests/test_imperative_selected_rows.py
+2
-2
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
+2
-2
python/paddle/hapi/model.py
python/paddle/hapi/model.py
+1
-1
python/paddle/incubate/distributed/models/moe/grad_clip.py
python/paddle/incubate/distributed/models/moe/grad_clip.py
+5
-4
python/paddle/incubate/optimizer/distributed_fused_lamb.py
python/paddle/incubate/optimizer/distributed_fused_lamb.py
+1
-1
python/paddle/nn/clip.py
python/paddle/nn/clip.py
+1069
-4
python/paddle/optimizer/adamw.py
python/paddle/optimizer/adamw.py
+1
-1
python/paddle/optimizer/optimizer.py
python/paddle/optimizer/optimizer.py
+7
-12
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -20,11 +20,11 @@ __all__ = []
import
paddle
from
paddle.common_ops_import
import
LayerHelper
from
paddle.fluid.clip
import
GradientClipByNorm
,
append_gradient_clip_ops
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.optimizer
import
Momentum
,
Optimizer
from
paddle.framework
import
core
from
paddle.nn.clip
import
ClipGradByNorm
,
append_gradient_clip_ops
from
paddle.static
import
create_global_var
...
...
@@ -76,9 +76,9 @@ class DGCMomentumOptimizer(Optimizer):
self
.
_dgc_clip_norm
=
None
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClip
ByNorm
):
if
not
isinstance
(
grad_clip
,
ClipGrad
ByNorm
):
raise
TypeError
(
"The type of grad_clip should be '
GradientClipByNorm', because DGCMomentumOptimizer only support GradientClip
ByNorm"
"The type of grad_clip should be '
ClipGradByNorm', because DGCMomentumOptimizer only support ClipGrad
ByNorm"
)
assert
isinstance
(
num_trainers
,
int
),
(
"The type of num_trainers should be 'int', but received %s"
...
...
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -15,9 +15,8 @@
import
paddle
from
paddle
import
framework
from
paddle.autograd
import
no_grad
from
paddle.fluid
import
layers
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.framework
import
core
from
paddle.nn
import
ClipGradByGlobalNorm
,
clip
from
...base.topology
import
ParallelMode
from
...utils.hybrid_parallel_util
import
(
...
...
@@ -62,8 +61,8 @@ class HybridParallelClipGrad:
continue
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
merge_selected_rows
(
g
)
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
)
merge_grad
=
clip
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
merge_grad
)
square
=
paddle
.
square
(
merge_grad
)
sum_square
=
paddle
.
sum
(
square
)
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
浏览文件 @
fe0dc40d
...
...
@@ -30,7 +30,7 @@ import paddle
import
paddle.distributed
as
dist
from
paddle.distributed
import
ParallelMode
,
fleet
from
paddle.fluid
import
core
from
paddle.
fluid.clip
import
ClipGradByGlobalNorm
from
paddle.
nn
import
ClipGradByGlobalNorm
from
paddle.optimizer
import
Optimizer
HybridParallelClipGrad
=
(
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
浏览文件 @
fe0dc40d
...
...
@@ -25,8 +25,8 @@ import paddle.fluid.framework as framework
from
paddle
import
nn
from
paddle.autograd
import
PyLayer
from
paddle.distributed
import
collective
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.fluid.framework
import
EagerParamBase
from
paddle.nn
import
ClipGradByGlobalNorm
from
.group_sharded_storage
import
GradStorage
from
.group_sharded_utils
import
GroupShardedClipGrad
,
Type
,
device_guard
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
浏览文件 @
fe0dc40d
...
...
@@ -23,6 +23,7 @@ from paddle import _legacy_C_ops
from
paddle.fluid
import
core
,
layers
from
paddle.fluid.dygraph
import
to_variable
from
paddle.fluid.framework
import
dygraph_only
from
paddle.nn
import
clip
class
Taskflow
:
...
...
@@ -65,8 +66,8 @@ class GroupShardedClipGrad:
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
layers
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
clip
.
merge_selected_rows
(
g
)
)
square
=
paddle
.
square
(
merge_grad
)
sum_square
=
paddle
.
sum
(
square
)
...
...
python/paddle/distributed/fleet/metrics/metric.py
浏览文件 @
fe0dc40d
...
...
@@ -159,7 +159,7 @@ def auc(stat_pos, stat_neg, scope=None, util=None):
.. code-block:: python
# in model.py
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(output, min=-15.0, max=15.0))
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(output, min=-15.0, max=15.0))
binary_predict = fluid.layers.concat(
input=[paddle.subtract(fluid.layers.ceil(similarity_norm), similarity_norm), similarity_norm], axis=1)
self.auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] =
...
...
python/paddle/fluid/__init__.py
浏览文件 @
fe0dc40d
...
...
@@ -90,7 +90,6 @@ from .transpiler import (
DistributeTranspilerConfig
,
)
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.
import
clip
from
.
import
profiler
from
.
import
unique_name
from
.
import
parallel_executor
...
...
@@ -164,7 +163,6 @@ __all__ = (
'ParamAttr'
,
'WeightNormParamAttr'
,
'DataFeeder'
,
'clip'
,
'profiler'
,
'unique_name'
,
'Scope'
,
...
...
python/paddle/fluid/clip.py
已删除
100644 → 0
浏览文件 @
822ea0f9
此差异已折叠。
点击以展开。
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
浏览文件 @
fe0dc40d
...
...
@@ -185,7 +185,7 @@ class FleetUtil:
# below is part of model
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
...
...
@@ -1374,7 +1374,7 @@ class FleetUtil:
label = fluid.layers.data(name="click", shape=[-1, 1],\
dtype="int64", lod_level=0, append_batch_size=False)
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
...
...
@@ -1574,7 +1574,7 @@ class FleetUtil:
label = fluid.layers.data(name="click", shape=[-1, 1],\
dtype="int64", lod_level=0, append_batch_size=False)
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
fe0dc40d
...
...
@@ -63,10 +63,6 @@ __all__ = [
'fc'
,
'embedding'
,
'autoincreased_step_counter'
,
'clip'
,
'clip_by_norm'
,
'merge_selected_rows'
,
'get_tensor_from_selected_rows'
,
]
OP_NAMEMAPPING
=
{
...
...
@@ -997,199 +993,3 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
)
return
out
@
templatedoc
()
def
clip
(
x
,
min
,
max
,
name
=
None
):
"""
:old_api: paddle.fluid.layers.clip
${comment}
Args:
x(${x_type}): ${x_comment}
min(float): ${min_comment}
max(float): ${max_comment}
name(str, optional): The default value is None.
Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`
Returns:
${out_comment}
Return Type:
${out_type}
Examples:
.. code-block:: python
import paddle.fluid as fluid
input = fluid.data(
name='data', shape=[1], dtype='float32')
reward = fluid.layers.clip(x=input, min=-1.0, max=1.0)
"""
helper
=
LayerHelper
(
"clip"
,
**
locals
())
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'clip'
)
if
name
is
None
:
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
helper
.
name
,
'tmp'
])
)
out
=
helper
.
create_variable
(
type
=
x
.
type
,
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"min"
:
min
,
"max"
:
max
},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
clip_by_norm
(
x
,
max_norm
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
max_norm(${max_norm_type}): ${max_norm_comment}
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor:
out(${out_type}): ${out_comment}
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
input = paddle.to_tensor([[2.0, 2.0], [2.0, 2.0]], dtype='float32')
reward = fluid.layers.clip_by_norm(x=input, max_norm=1.0)
# [[0.5, 0.5], [0.5, 0.5]]
"""
if
in_dygraph_mode
():
return
_C_ops
.
clip_by_norm
(
x
,
max_norm
)
else
:
helper
=
LayerHelper
(
"clip_by_norm"
,
**
locals
())
check_variable_and_dtype
(
x
,
'X'
,
[
'float32'
,
'float16'
],
'clip_by_norm'
)
check_type
(
max_norm
,
'max_norm'
,
(
float
),
'clip_by_norm'
)
if
name
is
None
:
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
helper
.
name
,
'tmp'
])
)
out
=
helper
.
create_variable
(
type
=
x
.
type
,
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip_by_norm"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"max_norm"
:
max_norm
},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
merge_selected_rows
(
x
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
Examples:
.. code-block:: python
import paddle.fluid as fluid
b = fluid.default_main_program().global_block()
var = b.create_var(
name="X", dtype="float32", persistable=True,
type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
y = fluid.layers.merge_selected_rows(var)
"""
if
in_dygraph_mode
():
return
_C_ops
.
merge_selected_rows
(
x
)
else
:
helper
=
LayerHelper
(
"merge_selected_rows"
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"merge_selected_rows"
,
inputs
=
{
"X"
:
x
},
attrs
=
{},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
get_tensor_from_selected_rows
(
x
,
name
=
None
):
"""
This operator gets tensor data from input with SelectedRows type, and outputs a LoDTensor.
.. code-block:: text
input x is SelectedRows:
x.rows = [0, 5, 5, 4, 19]
x.height = 20
x.value = [[1, 1] [2, 2] [2, 2] [3, 3] [6, 6]]
Output is LoDTensor:
out.shape = [5, 2]
out.data = [[1, 1],
[2, 2],
[2, 2],
[3, 3],
[6, 6]]
Args:
x(SelectedRows): Input with SelectedRows type. The data type is float32, float64, int32 or int64.
name(str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` .
Returns:
Variable: LoDTensor transformed from SelectedRows. The data type is same with input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
b = fluid.default_main_program().global_block()
input = b.create_var(name="X", dtype="float32", persistable=True, type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
out = fluid.layers.get_tensor_from_selected_rows(input)
"""
check_type
(
x
,
'x'
,
Variable
,
'get_tensor_from_selected_rows'
)
if
x
.
type
!=
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
raise
TypeError
(
"The type of 'x' in get_tensor_from_selected_rows must be SELECTED_ROWS."
)
helper
=
LayerHelper
(
'get_tensor_from_selected_rows'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
'get_tensor_from_selected_rows'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{},
)
return
out
python/paddle/fluid/optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -38,13 +38,6 @@ from .backward import (
_append_grad_suffix_
,
_get_no_grad_set_name
,
)
from
.clip
import
(
GradientClipBase
,
GradientClipByNorm
,
error_clip_callback
,
append_gradient_clip_ops
,
ClipGradByGlobalNorm
,
)
from
.framework
import
program_guard
from
.initializer
import
Constant
from
.layer_helper
import
LayerHelper
...
...
@@ -160,7 +153,7 @@ class Optimizer:
)
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClipBase
):
if
not
isinstance
(
grad_clip
,
paddle
.
nn
.
clip
.
GradientClipBase
):
raise
TypeError
(
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
...
...
@@ -1030,7 +1023,7 @@ class Optimizer:
params_grads
.
append
((
param
,
grad_var
))
else
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
else
:
assert
isinstance
(
callbacks
,
list
)
program
=
loss
.
block
.
program
...
...
@@ -1260,7 +1253,7 @@ class Optimizer:
# NOTE(zhiqiu): currently, only support ClipGradByGlobalNorm and without regularization.
if
self
.
_flatten_param_grads
and
self
.
regularization
is
None
:
if
self
.
_grad_clip
is
None
or
isinstance
(
self
.
_grad_clip
,
ClipGradByGlobalNorm
self
.
_grad_clip
,
paddle
.
nn
.
ClipGradByGlobalNorm
):
params_grads
=
self
.
flatten_param_grads
(
params_grads
)
...
...
@@ -1268,7 +1261,7 @@ class Optimizer:
if
self
.
_grad_clip
is
not
None
:
params_grads
=
self
.
_grad_clip
(
params_grads
)
else
:
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
params_grads
=
self
.
append_regularization_ops
(
...
...
python/paddle/fluid/tests/test_error_clip.py
浏览文件 @
fe0dc40d
...
...
@@ -38,13 +38,13 @@ with fluid.program_guard(main_program=prog):
prog_clip
=
prog
.
clone
()
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
_set_error_clip
(
fluid
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
)
paddle
.
nn
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
)
)
avg_cost_clip
=
prog_clip
.
block
(
0
).
var
(
avg_cost
.
name
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost_clip
,
callbacks
=
[
fluid
.
clip
.
error_clip_callback
]
loss
=
avg_cost_clip
,
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
)
hidden1_grad
=
prog
.
block
(
0
).
var
(
hidden1
.
name
+
"@GRAD"
)
...
...
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
浏览文件 @
fe0dc40d
...
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
acc_steps
=
2
# accumulated steps for pipeline
...
...
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
浏览文件 @
fe0dc40d
...
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
lr_val
,
momentum
=
0.9
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
acc_steps
=
2
# accumulated steps for pipeline
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_dgc_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -15,10 +15,10 @@
import
unittest
import
paddle
import
paddle.fluid.clip
as
clip
import
paddle.fluid.framework
as
framework
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.regularizer
as
regularizer
import
paddle.nn.clip
as
clip
paddle
.
enable_static
()
...
...
@@ -76,7 +76,7 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
rampup_begin_step
=
0
,
num_trainers
=
2
,
regularization
=
regularization
,
grad_clip
=
clip
.
GradientClip
ByNorm
(
1.0
),
grad_clip
=
clip
.
ClipGrad
ByNorm
(
1.0
),
)
if
use_recompute
:
...
...
@@ -144,14 +144,14 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
print
(
"dgc regular_coeff="
+
str
(
coeff
))
def
test_tpyeError
(
self
):
# the type of DGCMomentumOptimizer(grad_clip=) must be '
GradientClip
ByNorm'
# the type of DGCMomentumOptimizer(grad_clip=) must be '
ClipGrad
ByNorm'
with
self
.
assertRaises
(
TypeError
):
dgc_momentum_optimizer
=
self
.
MockDGCMomentum
(
learning_rate
=
0.01
,
momentum
=
0.2
,
rampup_begin_step
=
0
,
num_trainers
=
2
,
grad_clip
=
clip
.
GradientClip
ByGlobalNorm
(
1.0
),
grad_clip
=
clip
.
ClipGrad
ByGlobalNorm
(
1.0
),
)
def
test_momentum_without_dgc
(
self
):
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -354,7 +354,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
@@ -552,7 +552,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_merge
=
True
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
@@ -940,7 +940,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
@@ -1044,7 +1044,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -640,7 +640,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
)
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'sharding'
)
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
)
...
...
@@ -1309,7 +1309,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
"micro_batch_size"
:
2
,
"accumulate_steps"
:
4
,
}
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
)
...
...
@@ -1547,7 +1547,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
"micro_batch_size"
:
2
,
"accumulate_steps"
:
4
,
}
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
...
...
python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
浏览文件 @
fe0dc40d
...
...
@@ -22,8 +22,8 @@ import paddle
import
paddle.distributed.fleet
as
fleet
import
paddle.fluid.core
as
core
from
paddle.distributed.fleet.meta_optimizers.common
import
CollectiveHelper
from
paddle.fluid.clip
import
ClipGradBase
,
_clip_by_global_norm_using_mp_type
from
paddle.incubate
import
DistributedFusedLamb
from
paddle.nn.clip
import
ClipGradBase
,
_clip_by_global_norm_using_mp_type
from
paddle.vision.models
import
resnet18
as
resnet
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
浏览文件 @
fe0dc40d
...
...
@@ -19,6 +19,7 @@ import numpy as np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.jit.dy2static
import
Call
from
paddle.nn
import
clip
SEED
=
2020
np
.
random
.
seed
(
SEED
)
...
...
@@ -89,11 +90,11 @@ def len_with_selected_rows(place):
type
=
fluid
.
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
,
)
# y is Variable(SelectedRows)
y
=
fluid
.
layers
.
merge_selected_rows
(
var
)
y
=
clip
.
merge_selected_rows
(
var
)
y_len
=
Call
(
len
)(
y
)
# z is inner tensor with shape [4, 2]
z
=
fluid
.
layers
.
get_tensor_from_selected_rows
(
y
)
z
=
clip
.
get_tensor_from_selected_rows
(
y
)
z_len
=
Call
(
len
)(
z
)
# set data for selected_rows
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
浏览文件 @
fe0dc40d
...
...
@@ -22,8 +22,8 @@ from seq2seq_dygraph_model import AttentionModel, BaseModel
from
seq2seq_utils
import
Seq2SeqModelHyperParams
,
get_data_iter
import
paddle.fluid
as
fluid
from
paddle.fluid.clip
import
GradientClipByGlobalNorm
from
paddle.jit
import
ProgramTranslator
from
paddle.nn
import
ClipGradByGlobalNorm
place
=
(
fluid
.
CUDAPlace
(
0
)
if
fluid
.
is_compiled_with_cuda
()
else
fluid
.
CPUPlace
()
...
...
@@ -71,7 +71,7 @@ def train(args, attn_model=False):
dropout
=
args
.
dropout
,
)
gloabl_norm_clip
=
GradientClip
ByGlobalNorm
(
args
.
max_grad_norm
)
gloabl_norm_clip
=
ClipGrad
ByGlobalNorm
(
args
.
max_grad_norm
)
optimizer
=
fluid
.
optimizer
.
SGD
(
args
.
learning_rate
,
parameter_list
=
model
.
parameters
(),
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
浏览文件 @
fe0dc40d
...
...
@@ -127,7 +127,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Clip(
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
add
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
...
...
@@ -219,7 +219,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Clip(
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
subtract
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
...
...
@@ -319,7 +319,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Clip(
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
multiply
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
浏览文件 @
fe0dc40d
...
...
@@ -106,7 +106,7 @@ class TensorRTSubgraphPassHardSwishPluginTest(
class
TensorRTSubgraphPassClipTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
clip
(
x
,
0
,
1
)
return
paddle
.
clip
(
x
,
0
,
1
)
class
TensorRTSubgraphPassTanhTest
(
TensorRTSubgraphPassActivationTest
):
...
...
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
浏览文件 @
fe0dc40d
...
...
@@ -117,13 +117,13 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
fe0dc40d
...
...
@@ -686,7 +686,7 @@ class TestAdamOpV2(unittest.TestCase):
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
a
=
fluid
.
dygraph
.
to_variable
(
value
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
adam
=
paddle
.
optimizer
.
Adam
(
0.1
,
parameters
=
linear
.
parameters
(),
grad_clip
=
clip
)
...
...
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
浏览文件 @
fe0dc40d
...
...
@@ -20,12 +20,13 @@ from op_test import OpTest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.nn
import
clip
class
TestClipByNormOp
(
OpTest
):
def
setUp
(
self
):
self
.
max_relative_error
=
0.006
self
.
python_api
=
fluid
.
layers
.
clip_by_norm
self
.
python_api
=
clip
.
clip_by_norm
self
.
init_dtype
()
self
.
initTestCase
()
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
...
...
python/paddle/fluid/tests/unittests/test_clip_op.py
浏览文件 @
fe0dc40d
...
...
@@ -128,15 +128,9 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
fe0dc40d
...
...
@@ -584,7 +584,7 @@ class TestL2Decay(TranspilerTest):
def
filter
(
param
):
return
param
.
name
==
"fc_w"
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
0.1
,
need_clip
=
filter
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
0.1
,
need_clip
=
filter
)
sgd_optimizer
.
minimize
(
avg_cost
,
grad_clip
=
clip
)
def
transpiler_test_impl
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
浏览文件 @
fe0dc40d
...
...
@@ -504,8 +504,8 @@ class PaddingRNNTestBase(unittest.TestCase):
self
.
feed_order
,
)
=
res_vars
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
config
.
max_grad_norm
)
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor.py
浏览文件 @
fe0dc40d
...
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
)
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
浏览文件 @
fe0dc40d
...
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
)
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
浏览文件 @
fe0dc40d
...
...
@@ -47,7 +47,7 @@ class TestFleetExecutor(unittest.TestCase):
)
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
浏览文件 @
fe0dc40d
...
...
@@ -20,6 +20,7 @@ import paddle.fluid as fluid
import
paddle.fluid.core
as
core
from
paddle.fluid
import
Program
,
program_guard
from
paddle.fluid.op
import
Operator
from
paddle.nn
import
clip
class
TestGetTensorFromSelectedRowsError
(
unittest
.
TestCase
):
...
...
@@ -31,12 +32,12 @@ class TestGetTensorFromSelectedRowsError(unittest.TestCase):
x_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
get_tensor_from_selected_rows
(
x
=
x_data
)
clip
.
get_tensor_from_selected_rows
(
x
=
x_data
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_SELECTED_ROWS
():
fluid
.
layers
.
get_tensor_from_selected_rows
(
x
=
x_var
)
clip
.
get_tensor_from_selected_rows
(
x
=
x_var
)
self
.
assertRaises
(
TypeError
,
test_SELECTED_ROWS
)
...
...
python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
浏览文件 @
fe0dc40d
...
...
@@ -17,12 +17,8 @@ import unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.clip
import
(
GradientClipByGlobalNorm
,
GradientClipByNorm
,
GradientClipByValue
,
)
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.nn
import
ClipGradByGlobalNorm
,
ClipGradByNorm
,
ClipGradByValue
class
TestGradClipByGlobalNorm
(
unittest
.
TestCase
):
...
...
@@ -67,7 +63,7 @@ class TestGradClipByGlobalNorm(unittest.TestCase):
def
get_dygrap_global_norm_result
(
self
):
with
fluid
.
dygraph
.
guard
():
gloabl_norm_clip
=
GradientClip
ByGlobalNorm
(
self
.
max_global_norm
)
gloabl_norm_clip
=
ClipGrad
ByGlobalNorm
(
self
.
max_global_norm
)
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
...
...
@@ -142,7 +138,7 @@ class TestGradClipByNorm(unittest.TestCase):
def
get_dygrap_norm_result
(
self
):
with
fluid
.
dygraph
.
guard
():
norm_clip
=
GradientClip
ByNorm
(
self
.
max_norm
)
norm_clip
=
ClipGrad
ByNorm
(
self
.
max_norm
)
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
...
...
@@ -212,9 +208,7 @@ class TestGradClipByValue(unittest.TestCase):
def
get_dygrap_clip_result
(
self
):
with
fluid
.
dygraph
.
guard
():
value_clip
=
GradientClipByValue
(
max
=
self
.
max_value
,
min
=
self
.
min_value
)
value_clip
=
ClipGradByValue
(
max
=
self
.
max_value
,
min
=
self
.
min_value
)
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
...
...
python/paddle/fluid/tests/unittests/test_gradient_clip.py
浏览文件 @
fe0dc40d
...
...
@@ -20,7 +20,7 @@ from fake_reader import fake_imdb_reader
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.
fluid
.clip
import
_allow_pure_fp16_global_norm_clip
from
paddle.
nn
.clip
import
_allow_pure_fp16_global_norm_clip
paddle
.
enable_static
()
...
...
@@ -173,9 +173,9 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use 'set_gradient_clip'
def
test_old_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
fluid
.
clip
.
set_gradient_clip
(
clip
)
return
fluid
.
clip
.
append_gradient_clip_ops
(
params_grads
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
return
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
self
.
clip_gradient
=
func
self
.
check_gradient_clip
(
fluid
.
CPUPlace
())
...
...
@@ -183,7 +183,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use grad_clip
def
test_new_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -192,7 +192,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use grad_clip under float64
def
test_new_gradient_clip_fp64
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -201,15 +201,15 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# invoke 'set_gradient_clip' in a wrong order
def
test_wrong_API_order
(
self
):
def
backward_func
(
cost
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
5.0
)
fluid
.
clip
.
set_gradient_clip
(
clip
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
5.0
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
,
grad_clip
=
clip
)
# if 'set_gradient_clip' and 'optimize(grad_clip)' together, 'set_gradient_clip' will be ineffective
sgd_optimizer
.
minimize
(
cost
)
# 'set_gradient_clip' must before 'minimize', otherwise, 'set_gradient_clip' will be ineffective
fluid
.
clip
.
set_gradient_clip
(
clip
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
self
.
backward_and_optimize
=
backward_func
for
place
in
self
.
get_places
():
...
...
@@ -269,7 +269,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
with
fluid
.
program_guard
(
main_program
=
prog
,
startup_program
=
startup_program
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
self
.
clip_norm
)
x
=
(
fluid
.
default_main_program
()
.
global_block
()
...
...
@@ -313,7 +313,7 @@ class TestGradientClipByNorm(TestGradientClip):
# test whether the output is right when use grad_clip
def
test_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -321,7 +321,7 @@ class TestGradientClipByNorm(TestGradientClip):
# if grad is None or not need clip
def
test_none_grad
(
self
):
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
self
.
clip_norm
)
x
=
(
fluid
.
default_main_program
()
.
global_block
()
...
...
@@ -371,7 +371,7 @@ class TestGradientClipByValue(TestGradientClip):
# test whether the output is right when use grad_clip
def
test_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -379,7 +379,7 @@ class TestGradientClipByValue(TestGradientClip):
# if grad is None or not need clip
def
test_none_grad
(
self
):
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
self
.
max
,
self
.
min
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
self
.
max
,
self
.
min
)
x
=
(
fluid
.
default_main_program
()
.
global_block
()
...
...
@@ -419,7 +419,7 @@ class TestDygraphGradientClip(unittest.TestCase):
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0
,
parameter_list
=
linear
.
parameters
(),
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.1
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.1
),
)
self
.
check_clip_result
(
loss
,
sgd_optimizer
)
...
...
@@ -430,12 +430,8 @@ class TestDygraphGradientClip(unittest.TestCase):
class
TestDygraphGradientClipByGlobalNorm
(
TestDygraphGradientClip
):
def
setUp
(
self
):
self
.
clip_norm
=
0.8
self
.
clip1
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip2
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip1
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip2
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
...
...
@@ -476,7 +472,7 @@ class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip):
class
TestDygraphGradientClipByNorm
(
TestDygraphGradientClip
):
def
setUp
(
self
):
self
.
clip_norm
=
0.8
self
.
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
clip_norm
=
self
.
clip_norm
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
...
...
@@ -506,7 +502,7 @@ class TestDygraphGradientClipByValue(TestDygraphGradientClip):
def
setUp
(
self
):
self
.
max
=
0.2
self
.
min
=
0.1
self
.
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
self
.
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
...
...
@@ -572,7 +568,7 @@ class TestDygraphGradientClipFP16(unittest.TestCase):
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
_
,
grads
=
zip
(
*
params_grads
)
# clip grads
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
0.8
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
0.8
)
params_grads
=
clip
(
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
# param update
...
...
@@ -616,7 +612,7 @@ class TestDygraphGradientClipFP64(unittest.TestCase):
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
_
,
grads
=
zip
(
*
params_grads
)
# clip grads
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
0.1
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
0.1
)
params_grads
=
clip
(
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
浏览文件 @
fe0dc40d
...
...
@@ -361,7 +361,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
place
=
fluid
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
model
=
MyLayer
(
size
,
vocab_size
,
size
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.001
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.001
)
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
)
...
...
@@ -380,7 +380,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
with
fluid
.
dygraph
.
guard
(
place
):
model
=
MyLayer2
(
size
,
vocab_size
,
size
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.001
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.001
)
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
浏览文件 @
fe0dc40d
...
...
@@ -52,7 +52,7 @@ class TestSimpleNet(unittest.TestCase):
fluid
.
set_flags
(
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
)
# grad_clip =
fluid.clip.GradientClip
ByGlobalNorm(5.0)
# grad_clip =
paddle.nn.ClipGrad
ByGlobalNorm(5.0)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
paddle
.
to_tensor
(
input_word
)
...
...
@@ -91,7 +91,7 @@ class TestSimpleNet(unittest.TestCase):
fluid
.
set_flags
(
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
5.0
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
5.0
)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
to_variable
(
input_word
)
...
...
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
浏览文件 @
fe0dc40d
...
...
@@ -131,13 +131,13 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
...
...
python/paddle/hapi/model.py
浏览文件 @
fe0dc40d
...
...
@@ -1535,7 +1535,7 @@ class Model:
assert
isinstance
(
self
.
_optimizer
.
_grad_clip
,
(
paddle
.
nn
.
ClipGradByGlobalNorm
,
paddle
.
nn
.
ClipGradByNorm
),
),
"Only
GradientClipByNorm and GradientClip
ByGlobalNorm are supported in amp training with level=O2 currently."
),
"Only
ClipGradByNorm and ClipGrad
ByGlobalNorm are supported in amp training with level=O2 currently."
self
.
_adapter
.
_amp_custom_lists
=
{}
self
.
_adapter
.
_amp_configs
=
{}
...
...
python/paddle/incubate/distributed/models/moe/grad_clip.py
浏览文件 @
fe0dc40d
...
...
@@ -15,13 +15,14 @@
import
paddle
import
paddle.distributed
as
dist
from
paddle.fluid
import
core
,
layers
from
paddle.fluid.clip
import
ClipGradBase
,
_squared_l2_norm
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.nn
import
clip
from
paddle.nn.clip
import
ClipGradBase
,
_squared_l2_norm
class
ClipGradForMOEByGlobalNorm
(
ClipGradBase
):
r
"""
The Algrithm is the same as paddle.
fluid.clip
.ClipGradByGlobalNorm
The Algrithm is the same as paddle.
nn
.ClipGradByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .
...
...
@@ -113,8 +114,8 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
continue
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
merge_selected_rows
(
g
)
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
)
merge_grad
=
clip
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
merge_grad
)
sum_square
=
_squared_l2_norm
(
merge_grad
)
if
sum_square
.
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
sum_square_list_fp16
.
append
(
sum_square
)
...
...
python/paddle/incubate/optimizer/distributed_fused_lamb.py
浏览文件 @
fe0dc40d
...
...
@@ -16,11 +16,11 @@ import os
import
paddle
from
paddle.fluid
import
core
,
framework
,
unique_name
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.framework
import
Variable
,
name_scope
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.nn
import
ClipGradByGlobalNorm
def
init_communicator
(
block
,
rank
,
ranks
,
ring_id
):
...
...
python/paddle/nn/clip.py
浏览文件 @
fe0dc40d
此差异已折叠。
点击以展开。
python/paddle/optimizer/adamw.py
浏览文件 @
fe0dc40d
...
...
@@ -20,10 +20,10 @@ import paddle
from
..
import
_C_ops
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid.clip
import
GradientClipBase
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.framework
import
Parameter
,
Variable
from
..fluid.layer_helper
import
LayerHelper
from
..nn.clip
import
GradientClipBase
from
.lr
import
LRScheduler
from
.optimizer
import
Optimizer
...
...
python/paddle/optimizer/optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -18,6 +18,7 @@ from collections import defaultdict
import
numpy
as
np
import
paddle
import
paddle.autograd
as
imperative_base
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
(
...
...
@@ -32,12 +33,6 @@ from paddle.fluid.framework import (
from
..fluid
import
framework
,
unique_name
from
..fluid.backward
import
_get_no_grad_set_name
,
append_backward
from
..fluid.clip
import
(
GradientClipBase
,
append_gradient_clip_ops
,
error_clip_callback
,
)
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.framework
import
Parameter
,
program_guard
from
..fluid.initializer
import
Constant
from
..fluid.layer_helper
import
LayerHelper
...
...
@@ -168,7 +163,7 @@ class Optimizer:
"""
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
def
__init__
(
self
,
learning_rate
,
...
...
@@ -225,7 +220,7 @@ class Optimizer:
%
type
(
learning_rate
)
)
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClipBase
):
if
not
isinstance
(
grad_clip
,
paddle
.
nn
.
clip
.
GradientClipBase
):
raise
TypeError
(
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
...
...
@@ -1042,7 +1037,7 @@ class Optimizer:
params_grads
.
append
((
parameter_list
[
index
],
grad
))
else
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
else
:
assert
isinstance
(
callbacks
,
list
)
program
=
loss
.
block
.
program
...
...
@@ -1103,7 +1098,7 @@ class Optimizer:
params_grads
=
self
.
_grad_clip
(
params_grads
)
else
:
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
params_grads
=
self
.
append_regularization_ops
(
...
...
@@ -1317,7 +1312,7 @@ class Optimizer:
else
:
core
.
clear_gradients
(
param_list
,
set_to_zero
)
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
def
minimize
(
self
,
loss
,
startup_program
=
None
,
parameters
=
None
,
no_grad_set
=
None
):
...
...
@@ -1380,7 +1375,7 @@ class Optimizer:
return
optimize_ops
,
params_grads
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
@
framework
.
dygraph_only
def
step
(
self
):
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录