Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fe0dc40d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fe0dc40d
编写于
1月 03, 2023
作者:
骑
骑马小猫
提交者:
GitHub
1月 03, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[FluidAPI]remove clip api (#48946)
上级
822ea0f9
变更
43
展开全部
隐藏空白更改
内联
并排
Showing
43 changed file
with
1174 addition
and
1279 deletion
+1174
-1279
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
...paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
+3
-3
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
...optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
+3
-4
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
.../meta_parallel/sharding/group_sharded_optimizer_stage2.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
...uted/fleet/meta_parallel/sharding/group_sharded_stage3.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
...buted/fleet/meta_parallel/sharding/group_sharded_utils.py
+3
-2
python/paddle/distributed/fleet/metrics/metric.py
python/paddle/distributed/fleet/metrics/metric.py
+1
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+0
-2
python/paddle/fluid/clip.py
python/paddle/fluid/clip.py
+0
-944
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+3
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-200
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+4
-11
python/paddle/fluid/tests/test_error_clip.py
python/paddle/fluid/tests/test_error_clip.py
+2
-2
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
.../fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+1
-1
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
...unittests/collective/fleet/pipeline_mnist_multi_device.py
+1
-1
python/paddle/fluid/tests/unittests/collective/fleet/test_dgc_optimizer.py
...id/tests/unittests/collective/fleet/test_dgc_optimizer.py
+4
-4
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
...ests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
+4
-4
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py
...ts/collective/fleet/test_fleet_sharding_meta_optimizer.py
+3
-3
python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
...fluid/tests/unittests/distributed_fused_lamb_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
...addle/fluid/tests/unittests/dygraph_to_static/test_len.py
+3
-2
python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
...e/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
+2
-2
python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
...s/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
+3
-3
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
.../tests/unittests/ir/inference/test_trt_activation_pass.py
+1
-1
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
+2
-2
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+2
-1
python/paddle/fluid/tests/unittests/test_clip_op.py
python/paddle/fluid/tests/unittests/test_clip_op.py
+1
-7
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+1
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
.../fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+2
-2
python/paddle/fluid/tests/unittests/test_fleet_executor.py
python/paddle/fluid/tests/unittests/test_fleet_executor.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
...d/tests/unittests/test_fleet_executor_origin_scheduler.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
...id/tests/unittests/test_fleet_executor_with_task_nodes.py
+1
-1
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
.../tests/unittests/test_get_tensor_from_selected_rows_op.py
+3
-2
python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
...n/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
+4
-10
python/paddle/fluid/tests/unittests/test_gradient_clip.py
python/paddle/fluid/tests/unittests/test_gradient_clip.py
+21
-25
python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
...addle/fluid/tests/unittests/test_imperative_auto_prune.py
+2
-2
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
...le/fluid/tests/unittests/test_imperative_selected_rows.py
+2
-2
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
+2
-2
python/paddle/hapi/model.py
python/paddle/hapi/model.py
+1
-1
python/paddle/incubate/distributed/models/moe/grad_clip.py
python/paddle/incubate/distributed/models/moe/grad_clip.py
+5
-4
python/paddle/incubate/optimizer/distributed_fused_lamb.py
python/paddle/incubate/optimizer/distributed_fused_lamb.py
+1
-1
python/paddle/nn/clip.py
python/paddle/nn/clip.py
+1069
-4
python/paddle/optimizer/adamw.py
python/paddle/optimizer/adamw.py
+1
-1
python/paddle/optimizer/optimizer.py
python/paddle/optimizer/optimizer.py
+7
-12
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -20,11 +20,11 @@ __all__ = []
import
paddle
from
paddle.common_ops_import
import
LayerHelper
from
paddle.fluid.clip
import
GradientClipByNorm
,
append_gradient_clip_ops
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.optimizer
import
Momentum
,
Optimizer
from
paddle.framework
import
core
from
paddle.nn.clip
import
ClipGradByNorm
,
append_gradient_clip_ops
from
paddle.static
import
create_global_var
...
...
@@ -76,9 +76,9 @@ class DGCMomentumOptimizer(Optimizer):
self
.
_dgc_clip_norm
=
None
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClip
ByNorm
):
if
not
isinstance
(
grad_clip
,
ClipGrad
ByNorm
):
raise
TypeError
(
"The type of grad_clip should be '
GradientClipByNorm', because DGCMomentumOptimizer only support GradientClip
ByNorm"
"The type of grad_clip should be '
ClipGradByNorm', because DGCMomentumOptimizer only support ClipGrad
ByNorm"
)
assert
isinstance
(
num_trainers
,
int
),
(
"The type of num_trainers should be 'int', but received %s"
...
...
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -15,9 +15,8 @@
import
paddle
from
paddle
import
framework
from
paddle.autograd
import
no_grad
from
paddle.fluid
import
layers
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.framework
import
core
from
paddle.nn
import
ClipGradByGlobalNorm
,
clip
from
...base.topology
import
ParallelMode
from
...utils.hybrid_parallel_util
import
(
...
...
@@ -62,8 +61,8 @@ class HybridParallelClipGrad:
continue
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
merge_selected_rows
(
g
)
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
)
merge_grad
=
clip
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
merge_grad
)
square
=
paddle
.
square
(
merge_grad
)
sum_square
=
paddle
.
sum
(
square
)
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
浏览文件 @
fe0dc40d
...
...
@@ -30,7 +30,7 @@ import paddle
import
paddle.distributed
as
dist
from
paddle.distributed
import
ParallelMode
,
fleet
from
paddle.fluid
import
core
from
paddle.
fluid.clip
import
ClipGradByGlobalNorm
from
paddle.
nn
import
ClipGradByGlobalNorm
from
paddle.optimizer
import
Optimizer
HybridParallelClipGrad
=
(
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
浏览文件 @
fe0dc40d
...
...
@@ -25,8 +25,8 @@ import paddle.fluid.framework as framework
from
paddle
import
nn
from
paddle.autograd
import
PyLayer
from
paddle.distributed
import
collective
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.fluid.framework
import
EagerParamBase
from
paddle.nn
import
ClipGradByGlobalNorm
from
.group_sharded_storage
import
GradStorage
from
.group_sharded_utils
import
GroupShardedClipGrad
,
Type
,
device_guard
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
浏览文件 @
fe0dc40d
...
...
@@ -23,6 +23,7 @@ from paddle import _legacy_C_ops
from
paddle.fluid
import
core
,
layers
from
paddle.fluid.dygraph
import
to_variable
from
paddle.fluid.framework
import
dygraph_only
from
paddle.nn
import
clip
class
Taskflow
:
...
...
@@ -65,8 +66,8 @@ class GroupShardedClipGrad:
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
layers
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
clip
.
merge_selected_rows
(
g
)
)
square
=
paddle
.
square
(
merge_grad
)
sum_square
=
paddle
.
sum
(
square
)
...
...
python/paddle/distributed/fleet/metrics/metric.py
浏览文件 @
fe0dc40d
...
...
@@ -159,7 +159,7 @@ def auc(stat_pos, stat_neg, scope=None, util=None):
.. code-block:: python
# in model.py
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(output, min=-15.0, max=15.0))
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(output, min=-15.0, max=15.0))
binary_predict = fluid.layers.concat(
input=[paddle.subtract(fluid.layers.ceil(similarity_norm), similarity_norm), similarity_norm], axis=1)
self.auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] =
...
...
python/paddle/fluid/__init__.py
浏览文件 @
fe0dc40d
...
...
@@ -90,7 +90,6 @@ from .transpiler import (
DistributeTranspilerConfig
,
)
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.
import
clip
from
.
import
profiler
from
.
import
unique_name
from
.
import
parallel_executor
...
...
@@ -164,7 +163,6 @@ __all__ = (
'ParamAttr'
,
'WeightNormParamAttr'
,
'DataFeeder'
,
'clip'
,
'profiler'
,
'unique_name'
,
'Scope'
,
...
...
python/paddle/fluid/clip.py
已删除
100644 → 0
浏览文件 @
822ea0f9
此差异已折叠。
点击以展开。
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
浏览文件 @
fe0dc40d
...
...
@@ -185,7 +185,7 @@ class FleetUtil:
# below is part of model
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
...
...
@@ -1374,7 +1374,7 @@ class FleetUtil:
label = fluid.layers.data(name="click", shape=[-1, 1],\
dtype="int64", lod_level=0, append_batch_size=False)
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
...
...
@@ -1574,7 +1574,7 @@ class FleetUtil:
label = fluid.layers.data(name="click", shape=[-1, 1],\
dtype="int64", lod_level=0, append_batch_size=False)
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
fe0dc40d
...
...
@@ -63,10 +63,6 @@ __all__ = [
'fc'
,
'embedding'
,
'autoincreased_step_counter'
,
'clip'
,
'clip_by_norm'
,
'merge_selected_rows'
,
'get_tensor_from_selected_rows'
,
]
OP_NAMEMAPPING
=
{
...
...
@@ -997,199 +993,3 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
)
return
out
@
templatedoc
()
def
clip
(
x
,
min
,
max
,
name
=
None
):
"""
:old_api: paddle.fluid.layers.clip
${comment}
Args:
x(${x_type}): ${x_comment}
min(float): ${min_comment}
max(float): ${max_comment}
name(str, optional): The default value is None.
Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`
Returns:
${out_comment}
Return Type:
${out_type}
Examples:
.. code-block:: python
import paddle.fluid as fluid
input = fluid.data(
name='data', shape=[1], dtype='float32')
reward = fluid.layers.clip(x=input, min=-1.0, max=1.0)
"""
helper
=
LayerHelper
(
"clip"
,
**
locals
())
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'clip'
)
if
name
is
None
:
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
helper
.
name
,
'tmp'
])
)
out
=
helper
.
create_variable
(
type
=
x
.
type
,
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"min"
:
min
,
"max"
:
max
},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
clip_by_norm
(
x
,
max_norm
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
max_norm(${max_norm_type}): ${max_norm_comment}
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor:
out(${out_type}): ${out_comment}
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
input = paddle.to_tensor([[2.0, 2.0], [2.0, 2.0]], dtype='float32')
reward = fluid.layers.clip_by_norm(x=input, max_norm=1.0)
# [[0.5, 0.5], [0.5, 0.5]]
"""
if
in_dygraph_mode
():
return
_C_ops
.
clip_by_norm
(
x
,
max_norm
)
else
:
helper
=
LayerHelper
(
"clip_by_norm"
,
**
locals
())
check_variable_and_dtype
(
x
,
'X'
,
[
'float32'
,
'float16'
],
'clip_by_norm'
)
check_type
(
max_norm
,
'max_norm'
,
(
float
),
'clip_by_norm'
)
if
name
is
None
:
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
helper
.
name
,
'tmp'
])
)
out
=
helper
.
create_variable
(
type
=
x
.
type
,
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip_by_norm"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"max_norm"
:
max_norm
},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
merge_selected_rows
(
x
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
Examples:
.. code-block:: python
import paddle.fluid as fluid
b = fluid.default_main_program().global_block()
var = b.create_var(
name="X", dtype="float32", persistable=True,
type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
y = fluid.layers.merge_selected_rows(var)
"""
if
in_dygraph_mode
():
return
_C_ops
.
merge_selected_rows
(
x
)
else
:
helper
=
LayerHelper
(
"merge_selected_rows"
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"merge_selected_rows"
,
inputs
=
{
"X"
:
x
},
attrs
=
{},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
get_tensor_from_selected_rows
(
x
,
name
=
None
):
"""
This operator gets tensor data from input with SelectedRows type, and outputs a LoDTensor.
.. code-block:: text
input x is SelectedRows:
x.rows = [0, 5, 5, 4, 19]
x.height = 20
x.value = [[1, 1] [2, 2] [2, 2] [3, 3] [6, 6]]
Output is LoDTensor:
out.shape = [5, 2]
out.data = [[1, 1],
[2, 2],
[2, 2],
[3, 3],
[6, 6]]
Args:
x(SelectedRows): Input with SelectedRows type. The data type is float32, float64, int32 or int64.
name(str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` .
Returns:
Variable: LoDTensor transformed from SelectedRows. The data type is same with input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
b = fluid.default_main_program().global_block()
input = b.create_var(name="X", dtype="float32", persistable=True, type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
out = fluid.layers.get_tensor_from_selected_rows(input)
"""
check_type
(
x
,
'x'
,
Variable
,
'get_tensor_from_selected_rows'
)
if
x
.
type
!=
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
raise
TypeError
(
"The type of 'x' in get_tensor_from_selected_rows must be SELECTED_ROWS."
)
helper
=
LayerHelper
(
'get_tensor_from_selected_rows'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
'get_tensor_from_selected_rows'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{},
)
return
out
python/paddle/fluid/optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -38,13 +38,6 @@ from .backward import (
_append_grad_suffix_
,
_get_no_grad_set_name
,
)
from
.clip
import
(
GradientClipBase
,
GradientClipByNorm
,
error_clip_callback
,
append_gradient_clip_ops
,
ClipGradByGlobalNorm
,
)
from
.framework
import
program_guard
from
.initializer
import
Constant
from
.layer_helper
import
LayerHelper
...
...
@@ -160,7 +153,7 @@ class Optimizer:
)
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClipBase
):
if
not
isinstance
(
grad_clip
,
paddle
.
nn
.
clip
.
GradientClipBase
):
raise
TypeError
(
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
...
...
@@ -1030,7 +1023,7 @@ class Optimizer:
params_grads
.
append
((
param
,
grad_var
))
else
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
else
:
assert
isinstance
(
callbacks
,
list
)
program
=
loss
.
block
.
program
...
...
@@ -1260,7 +1253,7 @@ class Optimizer:
# NOTE(zhiqiu): currently, only support ClipGradByGlobalNorm and without regularization.
if
self
.
_flatten_param_grads
and
self
.
regularization
is
None
:
if
self
.
_grad_clip
is
None
or
isinstance
(
self
.
_grad_clip
,
ClipGradByGlobalNorm
self
.
_grad_clip
,
paddle
.
nn
.
ClipGradByGlobalNorm
):
params_grads
=
self
.
flatten_param_grads
(
params_grads
)
...
...
@@ -1268,7 +1261,7 @@ class Optimizer:
if
self
.
_grad_clip
is
not
None
:
params_grads
=
self
.
_grad_clip
(
params_grads
)
else
:
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
params_grads
=
self
.
append_regularization_ops
(
...
...
python/paddle/fluid/tests/test_error_clip.py
浏览文件 @
fe0dc40d
...
...
@@ -38,13 +38,13 @@ with fluid.program_guard(main_program=prog):
prog_clip
=
prog
.
clone
()
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
_set_error_clip
(
fluid
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
)
paddle
.
nn
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
)
)
avg_cost_clip
=
prog_clip
.
block
(
0
).
var
(
avg_cost
.
name
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost_clip
,
callbacks
=
[
fluid
.
clip
.
error_clip_callback
]
loss
=
avg_cost_clip
,
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
)
hidden1_grad
=
prog
.
block
(
0
).
var
(
hidden1
.
name
+
"@GRAD"
)
...
...
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
浏览文件 @
fe0dc40d
...
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
acc_steps
=
2
# accumulated steps for pipeline
...
...
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
浏览文件 @
fe0dc40d
...
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
lr_val
,
momentum
=
0.9
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
acc_steps
=
2
# accumulated steps for pipeline
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_dgc_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -15,10 +15,10 @@
import
unittest
import
paddle
import
paddle.fluid.clip
as
clip
import
paddle.fluid.framework
as
framework
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.regularizer
as
regularizer
import
paddle.nn.clip
as
clip
paddle
.
enable_static
()
...
...
@@ -76,7 +76,7 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
rampup_begin_step
=
0
,
num_trainers
=
2
,
regularization
=
regularization
,
grad_clip
=
clip
.
GradientClip
ByNorm
(
1.0
),
grad_clip
=
clip
.
ClipGrad
ByNorm
(
1.0
),
)
if
use_recompute
:
...
...
@@ -144,14 +144,14 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
print
(
"dgc regular_coeff="
+
str
(
coeff
))
def
test_tpyeError
(
self
):
# the type of DGCMomentumOptimizer(grad_clip=) must be '
GradientClip
ByNorm'
# the type of DGCMomentumOptimizer(grad_clip=) must be '
ClipGrad
ByNorm'
with
self
.
assertRaises
(
TypeError
):
dgc_momentum_optimizer
=
self
.
MockDGCMomentum
(
learning_rate
=
0.01
,
momentum
=
0.2
,
rampup_begin_step
=
0
,
num_trainers
=
2
,
grad_clip
=
clip
.
GradientClip
ByGlobalNorm
(
1.0
),
grad_clip
=
clip
.
ClipGrad
ByGlobalNorm
(
1.0
),
)
def
test_momentum_without_dgc
(
self
):
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -354,7 +354,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
@@ -552,7 +552,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_merge
=
True
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
@@ -940,7 +940,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
@@ -1044,7 +1044,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -640,7 +640,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
)
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'sharding'
)
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
)
...
...
@@ -1309,7 +1309,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
"micro_batch_size"
:
2
,
"accumulate_steps"
:
4
,
}
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
)
...
...
@@ -1547,7 +1547,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
"micro_batch_size"
:
2
,
"accumulate_steps"
:
4
,
}
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
avg_cost
,
strategy
,
...
...
python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
浏览文件 @
fe0dc40d
...
...
@@ -22,8 +22,8 @@ import paddle
import
paddle.distributed.fleet
as
fleet
import
paddle.fluid.core
as
core
from
paddle.distributed.fleet.meta_optimizers.common
import
CollectiveHelper
from
paddle.fluid.clip
import
ClipGradBase
,
_clip_by_global_norm_using_mp_type
from
paddle.incubate
import
DistributedFusedLamb
from
paddle.nn.clip
import
ClipGradBase
,
_clip_by_global_norm_using_mp_type
from
paddle.vision.models
import
resnet18
as
resnet
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
浏览文件 @
fe0dc40d
...
...
@@ -19,6 +19,7 @@ import numpy as np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.jit.dy2static
import
Call
from
paddle.nn
import
clip
SEED
=
2020
np
.
random
.
seed
(
SEED
)
...
...
@@ -89,11 +90,11 @@ def len_with_selected_rows(place):
type
=
fluid
.
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
,
)
# y is Variable(SelectedRows)
y
=
fluid
.
layers
.
merge_selected_rows
(
var
)
y
=
clip
.
merge_selected_rows
(
var
)
y_len
=
Call
(
len
)(
y
)
# z is inner tensor with shape [4, 2]
z
=
fluid
.
layers
.
get_tensor_from_selected_rows
(
y
)
z
=
clip
.
get_tensor_from_selected_rows
(
y
)
z_len
=
Call
(
len
)(
z
)
# set data for selected_rows
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
浏览文件 @
fe0dc40d
...
...
@@ -22,8 +22,8 @@ from seq2seq_dygraph_model import AttentionModel, BaseModel
from
seq2seq_utils
import
Seq2SeqModelHyperParams
,
get_data_iter
import
paddle.fluid
as
fluid
from
paddle.fluid.clip
import
GradientClipByGlobalNorm
from
paddle.jit
import
ProgramTranslator
from
paddle.nn
import
ClipGradByGlobalNorm
place
=
(
fluid
.
CUDAPlace
(
0
)
if
fluid
.
is_compiled_with_cuda
()
else
fluid
.
CPUPlace
()
...
...
@@ -71,7 +71,7 @@ def train(args, attn_model=False):
dropout
=
args
.
dropout
,
)
gloabl_norm_clip
=
GradientClip
ByGlobalNorm
(
args
.
max_grad_norm
)
gloabl_norm_clip
=
ClipGrad
ByGlobalNorm
(
args
.
max_grad_norm
)
optimizer
=
fluid
.
optimizer
.
SGD
(
args
.
learning_rate
,
parameter_list
=
model
.
parameters
(),
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
浏览文件 @
fe0dc40d
...
...
@@ -127,7 +127,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Clip(
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
add
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
...
...
@@ -219,7 +219,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Clip(
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
subtract
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
...
...
@@ -319,7 +319,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Clip(
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
multiply
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
浏览文件 @
fe0dc40d
...
...
@@ -106,7 +106,7 @@ class TensorRTSubgraphPassHardSwishPluginTest(
class
TensorRTSubgraphPassClipTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
clip
(
x
,
0
,
1
)
return
paddle
.
clip
(
x
,
0
,
1
)
class
TensorRTSubgraphPassTanhTest
(
TensorRTSubgraphPassActivationTest
):
...
...
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
浏览文件 @
fe0dc40d
...
...
@@ -117,13 +117,13 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
fe0dc40d
...
...
@@ -686,7 +686,7 @@ class TestAdamOpV2(unittest.TestCase):
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
a
=
fluid
.
dygraph
.
to_variable
(
value
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
adam
=
paddle
.
optimizer
.
Adam
(
0.1
,
parameters
=
linear
.
parameters
(),
grad_clip
=
clip
)
...
...
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
浏览文件 @
fe0dc40d
...
...
@@ -20,12 +20,13 @@ from op_test import OpTest
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.nn
import
clip
class
TestClipByNormOp
(
OpTest
):
def
setUp
(
self
):
self
.
max_relative_error
=
0.006
self
.
python_api
=
fluid
.
layers
.
clip_by_norm
self
.
python_api
=
clip
.
clip_by_norm
self
.
init_dtype
()
self
.
initTestCase
()
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
...
...
python/paddle/fluid/tests/unittests/test_clip_op.py
浏览文件 @
fe0dc40d
...
...
@@ -128,15 +128,9 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
fe0dc40d
...
...
@@ -584,7 +584,7 @@ class TestL2Decay(TranspilerTest):
def
filter
(
param
):
return
param
.
name
==
"fc_w"
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
0.1
,
need_clip
=
filter
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
0.1
,
need_clip
=
filter
)
sgd_optimizer
.
minimize
(
avg_cost
,
grad_clip
=
clip
)
def
transpiler_test_impl
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
浏览文件 @
fe0dc40d
...
...
@@ -504,8 +504,8 @@ class PaddingRNNTestBase(unittest.TestCase):
self
.
feed_order
,
)
=
res_vars
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
config
.
max_grad_norm
)
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor.py
浏览文件 @
fe0dc40d
...
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
)
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
浏览文件 @
fe0dc40d
...
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
)
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
浏览文件 @
fe0dc40d
...
...
@@ -47,7 +47,7 @@ class TestFleetExecutor(unittest.TestCase):
)
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
浏览文件 @
fe0dc40d
...
...
@@ -20,6 +20,7 @@ import paddle.fluid as fluid
import
paddle.fluid.core
as
core
from
paddle.fluid
import
Program
,
program_guard
from
paddle.fluid.op
import
Operator
from
paddle.nn
import
clip
class
TestGetTensorFromSelectedRowsError
(
unittest
.
TestCase
):
...
...
@@ -31,12 +32,12 @@ class TestGetTensorFromSelectedRowsError(unittest.TestCase):
x_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
get_tensor_from_selected_rows
(
x
=
x_data
)
clip
.
get_tensor_from_selected_rows
(
x
=
x_data
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_SELECTED_ROWS
():
fluid
.
layers
.
get_tensor_from_selected_rows
(
x
=
x_var
)
clip
.
get_tensor_from_selected_rows
(
x
=
x_var
)
self
.
assertRaises
(
TypeError
,
test_SELECTED_ROWS
)
...
...
python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
浏览文件 @
fe0dc40d
...
...
@@ -17,12 +17,8 @@ import unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.clip
import
(
GradientClipByGlobalNorm
,
GradientClipByNorm
,
GradientClipByValue
,
)
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.nn
import
ClipGradByGlobalNorm
,
ClipGradByNorm
,
ClipGradByValue
class
TestGradClipByGlobalNorm
(
unittest
.
TestCase
):
...
...
@@ -67,7 +63,7 @@ class TestGradClipByGlobalNorm(unittest.TestCase):
def
get_dygrap_global_norm_result
(
self
):
with
fluid
.
dygraph
.
guard
():
gloabl_norm_clip
=
GradientClip
ByGlobalNorm
(
self
.
max_global_norm
)
gloabl_norm_clip
=
ClipGrad
ByGlobalNorm
(
self
.
max_global_norm
)
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
...
...
@@ -142,7 +138,7 @@ class TestGradClipByNorm(unittest.TestCase):
def
get_dygrap_norm_result
(
self
):
with
fluid
.
dygraph
.
guard
():
norm_clip
=
GradientClip
ByNorm
(
self
.
max_norm
)
norm_clip
=
ClipGrad
ByNorm
(
self
.
max_norm
)
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
...
...
@@ -212,9 +208,7 @@ class TestGradClipByValue(unittest.TestCase):
def
get_dygrap_clip_result
(
self
):
with
fluid
.
dygraph
.
guard
():
value_clip
=
GradientClipByValue
(
max
=
self
.
max_value
,
min
=
self
.
min_value
)
value_clip
=
ClipGradByValue
(
max
=
self
.
max_value
,
min
=
self
.
min_value
)
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
...
...
python/paddle/fluid/tests/unittests/test_gradient_clip.py
浏览文件 @
fe0dc40d
...
...
@@ -20,7 +20,7 @@ from fake_reader import fake_imdb_reader
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.
fluid
.clip
import
_allow_pure_fp16_global_norm_clip
from
paddle.
nn
.clip
import
_allow_pure_fp16_global_norm_clip
paddle
.
enable_static
()
...
...
@@ -173,9 +173,9 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use 'set_gradient_clip'
def
test_old_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
fluid
.
clip
.
set_gradient_clip
(
clip
)
return
fluid
.
clip
.
append_gradient_clip_ops
(
params_grads
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
return
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
self
.
clip_gradient
=
func
self
.
check_gradient_clip
(
fluid
.
CPUPlace
())
...
...
@@ -183,7 +183,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use grad_clip
def
test_new_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -192,7 +192,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use grad_clip under float64
def
test_new_gradient_clip_fp64
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -201,15 +201,15 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# invoke 'set_gradient_clip' in a wrong order
def
test_wrong_API_order
(
self
):
def
backward_func
(
cost
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
5.0
)
fluid
.
clip
.
set_gradient_clip
(
clip
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
5.0
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
,
grad_clip
=
clip
)
# if 'set_gradient_clip' and 'optimize(grad_clip)' together, 'set_gradient_clip' will be ineffective
sgd_optimizer
.
minimize
(
cost
)
# 'set_gradient_clip' must before 'minimize', otherwise, 'set_gradient_clip' will be ineffective
fluid
.
clip
.
set_gradient_clip
(
clip
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
self
.
backward_and_optimize
=
backward_func
for
place
in
self
.
get_places
():
...
...
@@ -269,7 +269,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
with
fluid
.
program_guard
(
main_program
=
prog
,
startup_program
=
startup_program
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
self
.
clip_norm
)
x
=
(
fluid
.
default_main_program
()
.
global_block
()
...
...
@@ -313,7 +313,7 @@ class TestGradientClipByNorm(TestGradientClip):
# test whether the output is right when use grad_clip
def
test_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -321,7 +321,7 @@ class TestGradientClipByNorm(TestGradientClip):
# if grad is None or not need clip
def
test_none_grad
(
self
):
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
self
.
clip_norm
)
x
=
(
fluid
.
default_main_program
()
.
global_block
()
...
...
@@ -371,7 +371,7 @@ class TestGradientClipByValue(TestGradientClip):
# test whether the output is right when use grad_clip
def
test_gradient_clip
(
self
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
...
...
@@ -379,7 +379,7 @@ class TestGradientClipByValue(TestGradientClip):
# if grad is None or not need clip
def
test_none_grad
(
self
):
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
self
.
max
,
self
.
min
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
self
.
max
,
self
.
min
)
x
=
(
fluid
.
default_main_program
()
.
global_block
()
...
...
@@ -419,7 +419,7 @@ class TestDygraphGradientClip(unittest.TestCase):
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0
,
parameter_list
=
linear
.
parameters
(),
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.1
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.1
),
)
self
.
check_clip_result
(
loss
,
sgd_optimizer
)
...
...
@@ -430,12 +430,8 @@ class TestDygraphGradientClip(unittest.TestCase):
class
TestDygraphGradientClipByGlobalNorm
(
TestDygraphGradientClip
):
def
setUp
(
self
):
self
.
clip_norm
=
0.8
self
.
clip1
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip2
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip1
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip2
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
...
...
@@ -476,7 +472,7 @@ class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip):
class
TestDygraphGradientClipByNorm
(
TestDygraphGradientClip
):
def
setUp
(
self
):
self
.
clip_norm
=
0.8
self
.
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
clip_norm
=
self
.
clip_norm
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
...
...
@@ -506,7 +502,7 @@ class TestDygraphGradientClipByValue(TestDygraphGradientClip):
def
setUp
(
self
):
self
.
max
=
0.2
self
.
min
=
0.1
self
.
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
self
.
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
...
...
@@ -572,7 +568,7 @@ class TestDygraphGradientClipFP16(unittest.TestCase):
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
_
,
grads
=
zip
(
*
params_grads
)
# clip grads
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
0.8
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
0.8
)
params_grads
=
clip
(
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
# param update
...
...
@@ -616,7 +612,7 @@ class TestDygraphGradientClipFP64(unittest.TestCase):
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
_
,
grads
=
zip
(
*
params_grads
)
# clip grads
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
0.1
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
0.1
)
params_grads
=
clip
(
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
浏览文件 @
fe0dc40d
...
...
@@ -361,7 +361,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
place
=
fluid
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
model
=
MyLayer
(
size
,
vocab_size
,
size
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.001
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.001
)
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
)
...
...
@@ -380,7 +380,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
with
fluid
.
dygraph
.
guard
(
place
):
model
=
MyLayer2
(
size
,
vocab_size
,
size
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.001
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.001
)
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
浏览文件 @
fe0dc40d
...
...
@@ -52,7 +52,7 @@ class TestSimpleNet(unittest.TestCase):
fluid
.
set_flags
(
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
)
# grad_clip =
fluid.clip.GradientClip
ByGlobalNorm(5.0)
# grad_clip =
paddle.nn.ClipGrad
ByGlobalNorm(5.0)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
paddle
.
to_tensor
(
input_word
)
...
...
@@ -91,7 +91,7 @@ class TestSimpleNet(unittest.TestCase):
fluid
.
set_flags
(
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
5.0
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
5.0
)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
to_variable
(
input_word
)
...
...
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
浏览文件 @
fe0dc40d
...
...
@@ -131,13 +131,13 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
...
...
python/paddle/hapi/model.py
浏览文件 @
fe0dc40d
...
...
@@ -1535,7 +1535,7 @@ class Model:
assert
isinstance
(
self
.
_optimizer
.
_grad_clip
,
(
paddle
.
nn
.
ClipGradByGlobalNorm
,
paddle
.
nn
.
ClipGradByNorm
),
),
"Only
GradientClipByNorm and GradientClip
ByGlobalNorm are supported in amp training with level=O2 currently."
),
"Only
ClipGradByNorm and ClipGrad
ByGlobalNorm are supported in amp training with level=O2 currently."
self
.
_adapter
.
_amp_custom_lists
=
{}
self
.
_adapter
.
_amp_configs
=
{}
...
...
python/paddle/incubate/distributed/models/moe/grad_clip.py
浏览文件 @
fe0dc40d
...
...
@@ -15,13 +15,14 @@
import
paddle
import
paddle.distributed
as
dist
from
paddle.fluid
import
core
,
layers
from
paddle.fluid.clip
import
ClipGradBase
,
_squared_l2_norm
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.nn
import
clip
from
paddle.nn.clip
import
ClipGradBase
,
_squared_l2_norm
class
ClipGradForMOEByGlobalNorm
(
ClipGradBase
):
r
"""
The Algrithm is the same as paddle.
fluid.clip
.ClipGradByGlobalNorm
The Algrithm is the same as paddle.
nn
.ClipGradByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .
...
...
@@ -113,8 +114,8 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
continue
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
merge_selected_rows
(
g
)
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
)
merge_grad
=
clip
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
merge_grad
)
sum_square
=
_squared_l2_norm
(
merge_grad
)
if
sum_square
.
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
sum_square_list_fp16
.
append
(
sum_square
)
...
...
python/paddle/incubate/optimizer/distributed_fused_lamb.py
浏览文件 @
fe0dc40d
...
...
@@ -16,11 +16,11 @@ import os
import
paddle
from
paddle.fluid
import
core
,
framework
,
unique_name
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.framework
import
Variable
,
name_scope
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.nn
import
ClipGradByGlobalNorm
def
init_communicator
(
block
,
rank
,
ranks
,
ring_id
):
...
...
python/paddle/nn/clip.py
浏览文件 @
fe0dc40d
此差异已折叠。
点击以展开。
python/paddle/optimizer/adamw.py
浏览文件 @
fe0dc40d
...
...
@@ -20,10 +20,10 @@ import paddle
from
..
import
_C_ops
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid.clip
import
GradientClipBase
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.framework
import
Parameter
,
Variable
from
..fluid.layer_helper
import
LayerHelper
from
..nn.clip
import
GradientClipBase
from
.lr
import
LRScheduler
from
.optimizer
import
Optimizer
...
...
python/paddle/optimizer/optimizer.py
浏览文件 @
fe0dc40d
...
...
@@ -18,6 +18,7 @@ from collections import defaultdict
import
numpy
as
np
import
paddle
import
paddle.autograd
as
imperative_base
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
(
...
...
@@ -32,12 +33,6 @@ from paddle.fluid.framework import (
from
..fluid
import
framework
,
unique_name
from
..fluid.backward
import
_get_no_grad_set_name
,
append_backward
from
..fluid.clip
import
(
GradientClipBase
,
append_gradient_clip_ops
,
error_clip_callback
,
)
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.framework
import
Parameter
,
program_guard
from
..fluid.initializer
import
Constant
from
..fluid.layer_helper
import
LayerHelper
...
...
@@ -168,7 +163,7 @@ class Optimizer:
"""
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
def
__init__
(
self
,
learning_rate
,
...
...
@@ -225,7 +220,7 @@ class Optimizer:
%
type
(
learning_rate
)
)
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClipBase
):
if
not
isinstance
(
grad_clip
,
paddle
.
nn
.
clip
.
GradientClipBase
):
raise
TypeError
(
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
...
...
@@ -1042,7 +1037,7 @@ class Optimizer:
params_grads
.
append
((
parameter_list
[
index
],
grad
))
else
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
else
:
assert
isinstance
(
callbacks
,
list
)
program
=
loss
.
block
.
program
...
...
@@ -1103,7 +1098,7 @@ class Optimizer:
params_grads
=
self
.
_grad_clip
(
params_grads
)
else
:
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
params_grads
=
self
.
append_regularization_ops
(
...
...
@@ -1317,7 +1312,7 @@ class Optimizer:
else
:
core
.
clear_gradients
(
param_list
,
set_to_zero
)
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
def
minimize
(
self
,
loss
,
startup_program
=
None
,
parameters
=
None
,
no_grad_set
=
None
):
...
...
@@ -1380,7 +1375,7 @@ class Optimizer:
return
optimize_ops
,
params_grads
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
@
framework
.
dygraph_only
def
step
(
self
):
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录