Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
fe0dc40d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fe0dc40d
编写于
1月 03, 2023
作者:
骑
骑马小猫
提交者:
GitHub
1月 03, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[FluidAPI]remove clip api (#48946)
上级
822ea0f9
变更
43
展开全部
隐藏空白更改
内联
并排
Showing
43 changed file
with
1174 addition
and
1279 deletion
+1174
-1279
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
...paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
+3
-3
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
...optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
+3
-4
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
.../meta_parallel/sharding/group_sharded_optimizer_stage2.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
...uted/fleet/meta_parallel/sharding/group_sharded_stage3.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
...buted/fleet/meta_parallel/sharding/group_sharded_utils.py
+3
-2
python/paddle/distributed/fleet/metrics/metric.py
python/paddle/distributed/fleet/metrics/metric.py
+1
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+0
-2
python/paddle/fluid/clip.py
python/paddle/fluid/clip.py
+0
-944
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+3
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-200
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+4
-11
python/paddle/fluid/tests/test_error_clip.py
python/paddle/fluid/tests/test_error_clip.py
+2
-2
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
.../fluid/tests/unittests/collective/fleet/pipeline_mnist.py
+1
-1
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
...unittests/collective/fleet/pipeline_mnist_multi_device.py
+1
-1
python/paddle/fluid/tests/unittests/collective/fleet/test_dgc_optimizer.py
...id/tests/unittests/collective/fleet/test_dgc_optimizer.py
+4
-4
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
...ests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
+4
-4
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py
...ts/collective/fleet/test_fleet_sharding_meta_optimizer.py
+3
-3
python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
...fluid/tests/unittests/distributed_fused_lamb_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
...addle/fluid/tests/unittests/dygraph_to_static/test_len.py
+3
-2
python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
...e/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
+2
-2
python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
...s/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
+3
-3
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
.../tests/unittests/ir/inference/test_trt_activation_pass.py
+1
-1
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
+2
-2
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
+2
-1
python/paddle/fluid/tests/unittests/test_clip_op.py
python/paddle/fluid/tests/unittests/test_clip_op.py
+1
-7
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+1
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
.../fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+2
-2
python/paddle/fluid/tests/unittests/test_fleet_executor.py
python/paddle/fluid/tests/unittests/test_fleet_executor.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
...d/tests/unittests/test_fleet_executor_origin_scheduler.py
+1
-1
python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
...id/tests/unittests/test_fleet_executor_with_task_nodes.py
+1
-1
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
.../tests/unittests/test_get_tensor_from_selected_rows_op.py
+3
-2
python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
...n/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
+4
-10
python/paddle/fluid/tests/unittests/test_gradient_clip.py
python/paddle/fluid/tests/unittests/test_gradient_clip.py
+21
-25
python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
...addle/fluid/tests/unittests/test_imperative_auto_prune.py
+2
-2
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
...le/fluid/tests/unittests/test_imperative_selected_rows.py
+2
-2
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
+2
-2
python/paddle/hapi/model.py
python/paddle/hapi/model.py
+1
-1
python/paddle/incubate/distributed/models/moe/grad_clip.py
python/paddle/incubate/distributed/models/moe/grad_clip.py
+5
-4
python/paddle/incubate/optimizer/distributed_fused_lamb.py
python/paddle/incubate/optimizer/distributed_fused_lamb.py
+1
-1
python/paddle/nn/clip.py
python/paddle/nn/clip.py
+1069
-4
python/paddle/optimizer/adamw.py
python/paddle/optimizer/adamw.py
+1
-1
python/paddle/optimizer/optimizer.py
python/paddle/optimizer/optimizer.py
+7
-12
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
浏览文件 @
fe0dc40d
...
@@ -20,11 +20,11 @@ __all__ = []
...
@@ -20,11 +20,11 @@ __all__ = []
import
paddle
import
paddle
from
paddle.common_ops_import
import
LayerHelper
from
paddle.common_ops_import
import
LayerHelper
from
paddle.fluid.clip
import
GradientClipByNorm
,
append_gradient_clip_ops
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.optimizer
import
Momentum
,
Optimizer
from
paddle.fluid.optimizer
import
Momentum
,
Optimizer
from
paddle.framework
import
core
from
paddle.framework
import
core
from
paddle.nn.clip
import
ClipGradByNorm
,
append_gradient_clip_ops
from
paddle.static
import
create_global_var
from
paddle.static
import
create_global_var
...
@@ -76,9 +76,9 @@ class DGCMomentumOptimizer(Optimizer):
...
@@ -76,9 +76,9 @@ class DGCMomentumOptimizer(Optimizer):
self
.
_dgc_clip_norm
=
None
self
.
_dgc_clip_norm
=
None
if
grad_clip
is
not
None
:
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClip
ByNorm
):
if
not
isinstance
(
grad_clip
,
ClipGrad
ByNorm
):
raise
TypeError
(
raise
TypeError
(
"The type of grad_clip should be '
GradientClipByNorm', because DGCMomentumOptimizer only support GradientClip
ByNorm"
"The type of grad_clip should be '
ClipGradByNorm', because DGCMomentumOptimizer only support ClipGrad
ByNorm"
)
)
assert
isinstance
(
num_trainers
,
int
),
(
assert
isinstance
(
num_trainers
,
int
),
(
"The type of num_trainers should be 'int', but received %s"
"The type of num_trainers should be 'int', but received %s"
...
...
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
浏览文件 @
fe0dc40d
...
@@ -15,9 +15,8 @@
...
@@ -15,9 +15,8 @@
import
paddle
import
paddle
from
paddle
import
framework
from
paddle
import
framework
from
paddle.autograd
import
no_grad
from
paddle.autograd
import
no_grad
from
paddle.fluid
import
layers
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.framework
import
core
from
paddle.framework
import
core
from
paddle.nn
import
ClipGradByGlobalNorm
,
clip
from
...base.topology
import
ParallelMode
from
...base.topology
import
ParallelMode
from
...utils.hybrid_parallel_util
import
(
from
...utils.hybrid_parallel_util
import
(
...
@@ -62,8 +61,8 @@ class HybridParallelClipGrad:
...
@@ -62,8 +61,8 @@ class HybridParallelClipGrad:
continue
continue
merge_grad
=
g
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
merge_selected_rows
(
g
)
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
merge_grad
)
square
=
paddle
.
square
(
merge_grad
)
square
=
paddle
.
square
(
merge_grad
)
sum_square
=
paddle
.
sum
(
square
)
sum_square
=
paddle
.
sum
(
square
)
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_optimizer_stage2.py
浏览文件 @
fe0dc40d
...
@@ -30,7 +30,7 @@ import paddle
...
@@ -30,7 +30,7 @@ import paddle
import
paddle.distributed
as
dist
import
paddle.distributed
as
dist
from
paddle.distributed
import
ParallelMode
,
fleet
from
paddle.distributed
import
ParallelMode
,
fleet
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.
fluid.clip
import
ClipGradByGlobalNorm
from
paddle.
nn
import
ClipGradByGlobalNorm
from
paddle.optimizer
import
Optimizer
from
paddle.optimizer
import
Optimizer
HybridParallelClipGrad
=
(
HybridParallelClipGrad
=
(
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
浏览文件 @
fe0dc40d
...
@@ -25,8 +25,8 @@ import paddle.fluid.framework as framework
...
@@ -25,8 +25,8 @@ import paddle.fluid.framework as framework
from
paddle
import
nn
from
paddle
import
nn
from
paddle.autograd
import
PyLayer
from
paddle.autograd
import
PyLayer
from
paddle.distributed
import
collective
from
paddle.distributed
import
collective
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.fluid.framework
import
EagerParamBase
from
paddle.fluid.framework
import
EagerParamBase
from
paddle.nn
import
ClipGradByGlobalNorm
from
.group_sharded_storage
import
GradStorage
from
.group_sharded_storage
import
GradStorage
from
.group_sharded_utils
import
GroupShardedClipGrad
,
Type
,
device_guard
from
.group_sharded_utils
import
GroupShardedClipGrad
,
Type
,
device_guard
...
...
python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
浏览文件 @
fe0dc40d
...
@@ -23,6 +23,7 @@ from paddle import _legacy_C_ops
...
@@ -23,6 +23,7 @@ from paddle import _legacy_C_ops
from
paddle.fluid
import
core
,
layers
from
paddle.fluid
import
core
,
layers
from
paddle.fluid.dygraph
import
to_variable
from
paddle.fluid.dygraph
import
to_variable
from
paddle.fluid.framework
import
dygraph_only
from
paddle.fluid.framework
import
dygraph_only
from
paddle.nn
import
clip
class
Taskflow
:
class
Taskflow
:
...
@@ -65,8 +66,8 @@ class GroupShardedClipGrad:
...
@@ -65,8 +66,8 @@ class GroupShardedClipGrad:
merge_grad
=
g
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
layers
.
merge_selected_rows
(
g
)
clip
.
merge_selected_rows
(
g
)
)
)
square
=
paddle
.
square
(
merge_grad
)
square
=
paddle
.
square
(
merge_grad
)
sum_square
=
paddle
.
sum
(
square
)
sum_square
=
paddle
.
sum
(
square
)
...
...
python/paddle/distributed/fleet/metrics/metric.py
浏览文件 @
fe0dc40d
...
@@ -159,7 +159,7 @@ def auc(stat_pos, stat_neg, scope=None, util=None):
...
@@ -159,7 +159,7 @@ def auc(stat_pos, stat_neg, scope=None, util=None):
.. code-block:: python
.. code-block:: python
# in model.py
# in model.py
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(output, min=-15.0, max=15.0))
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(output, min=-15.0, max=15.0))
binary_predict = fluid.layers.concat(
binary_predict = fluid.layers.concat(
input=[paddle.subtract(fluid.layers.ceil(similarity_norm), similarity_norm), similarity_norm], axis=1)
input=[paddle.subtract(fluid.layers.ceil(similarity_norm), similarity_norm), similarity_norm], axis=1)
self.auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] =
self.auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] =
...
...
python/paddle/fluid/__init__.py
浏览文件 @
fe0dc40d
...
@@ -90,7 +90,6 @@ from .transpiler import (
...
@@ -90,7 +90,6 @@ from .transpiler import (
DistributeTranspilerConfig
,
DistributeTranspilerConfig
,
)
)
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.
import
clip
from
.
import
profiler
from
.
import
profiler
from
.
import
unique_name
from
.
import
unique_name
from
.
import
parallel_executor
from
.
import
parallel_executor
...
@@ -164,7 +163,6 @@ __all__ = (
...
@@ -164,7 +163,6 @@ __all__ = (
'ParamAttr'
,
'ParamAttr'
,
'WeightNormParamAttr'
,
'WeightNormParamAttr'
,
'DataFeeder'
,
'DataFeeder'
,
'clip'
,
'profiler'
,
'profiler'
,
'unique_name'
,
'unique_name'
,
'Scope'
,
'Scope'
,
...
...
python/paddle/fluid/clip.py
已删除
100644 → 0
浏览文件 @
822ea0f9
此差异已折叠。
点击以展开。
python/paddle/fluid/incubate/fleet/utils/fleet_util.py
浏览文件 @
fe0dc40d
...
@@ -185,7 +185,7 @@ class FleetUtil:
...
@@ -185,7 +185,7 @@ class FleetUtil:
# below is part of model
# below is part of model
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
paddle.subtract(\
...
@@ -1374,7 +1374,7 @@ class FleetUtil:
...
@@ -1374,7 +1374,7 @@ class FleetUtil:
label = fluid.layers.data(name="click", shape=[-1, 1],\
label = fluid.layers.data(name="click", shape=[-1, 1],\
dtype="int64", lod_level=0, append_batch_size=False)
dtype="int64", lod_level=0, append_batch_size=False)
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
paddle.subtract(\
...
@@ -1574,7 +1574,7 @@ class FleetUtil:
...
@@ -1574,7 +1574,7 @@ class FleetUtil:
label = fluid.layers.data(name="click", shape=[-1, 1],\
label = fluid.layers.data(name="click", shape=[-1, 1],\
dtype="int64", lod_level=0, append_batch_size=False)
dtype="int64", lod_level=0, append_batch_size=False)
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
emb = my_slot_net(slots, label) # emb can be fc layer of size 1
similarity_norm = fluid.layers.sigmoid(
fluid.layers
.clip(\
similarity_norm = fluid.layers.sigmoid(
paddle
.clip(\
emb, min=-15.0, max=15.0), name="similarity_norm")\
emb, min=-15.0, max=15.0), name="similarity_norm")\
binary_predict = fluid.layers.concat(input=[\
binary_predict = fluid.layers.concat(input=[\
paddle.subtract(\
paddle.subtract(\
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
fe0dc40d
...
@@ -63,10 +63,6 @@ __all__ = [
...
@@ -63,10 +63,6 @@ __all__ = [
'fc'
,
'fc'
,
'embedding'
,
'embedding'
,
'autoincreased_step_counter'
,
'autoincreased_step_counter'
,
'clip'
,
'clip_by_norm'
,
'merge_selected_rows'
,
'get_tensor_from_selected_rows'
,
]
]
OP_NAMEMAPPING
=
{
OP_NAMEMAPPING
=
{
...
@@ -997,199 +993,3 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
...
@@ -997,199 +993,3 @@ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
)
)
return
out
return
out
@
templatedoc
()
def
clip
(
x
,
min
,
max
,
name
=
None
):
"""
:old_api: paddle.fluid.layers.clip
${comment}
Args:
x(${x_type}): ${x_comment}
min(float): ${min_comment}
max(float): ${max_comment}
name(str, optional): The default value is None.
Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`
Returns:
${out_comment}
Return Type:
${out_type}
Examples:
.. code-block:: python
import paddle.fluid as fluid
input = fluid.data(
name='data', shape=[1], dtype='float32')
reward = fluid.layers.clip(x=input, min=-1.0, max=1.0)
"""
helper
=
LayerHelper
(
"clip"
,
**
locals
())
check_variable_and_dtype
(
x
,
'x'
,
[
'float16'
,
'float32'
,
'float64'
],
'clip'
)
if
name
is
None
:
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
helper
.
name
,
'tmp'
])
)
out
=
helper
.
create_variable
(
type
=
x
.
type
,
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"min"
:
min
,
"max"
:
max
},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
clip_by_norm
(
x
,
max_norm
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
max_norm(${max_norm_type}): ${max_norm_comment}
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor:
out(${out_type}): ${out_comment}
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
input = paddle.to_tensor([[2.0, 2.0], [2.0, 2.0]], dtype='float32')
reward = fluid.layers.clip_by_norm(x=input, max_norm=1.0)
# [[0.5, 0.5], [0.5, 0.5]]
"""
if
in_dygraph_mode
():
return
_C_ops
.
clip_by_norm
(
x
,
max_norm
)
else
:
helper
=
LayerHelper
(
"clip_by_norm"
,
**
locals
())
check_variable_and_dtype
(
x
,
'X'
,
[
'float32'
,
'float16'
],
'clip_by_norm'
)
check_type
(
max_norm
,
'max_norm'
,
(
float
),
'clip_by_norm'
)
if
name
is
None
:
name
=
unique_name
.
generate_with_ignorable_key
(
"."
.
join
([
helper
.
name
,
'tmp'
])
)
out
=
helper
.
create_variable
(
type
=
x
.
type
,
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"clip_by_norm"
,
inputs
=
{
"X"
:
x
},
attrs
=
{
"max_norm"
:
max_norm
},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
merge_selected_rows
(
x
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
Examples:
.. code-block:: python
import paddle.fluid as fluid
b = fluid.default_main_program().global_block()
var = b.create_var(
name="X", dtype="float32", persistable=True,
type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
y = fluid.layers.merge_selected_rows(var)
"""
if
in_dygraph_mode
():
return
_C_ops
.
merge_selected_rows
(
x
)
else
:
helper
=
LayerHelper
(
"merge_selected_rows"
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"merge_selected_rows"
,
inputs
=
{
"X"
:
x
},
attrs
=
{},
outputs
=
{
"Out"
:
out
},
)
return
out
@
templatedoc
()
def
get_tensor_from_selected_rows
(
x
,
name
=
None
):
"""
This operator gets tensor data from input with SelectedRows type, and outputs a LoDTensor.
.. code-block:: text
input x is SelectedRows:
x.rows = [0, 5, 5, 4, 19]
x.height = 20
x.value = [[1, 1] [2, 2] [2, 2] [3, 3] [6, 6]]
Output is LoDTensor:
out.shape = [5, 2]
out.data = [[1, 1],
[2, 2],
[2, 2],
[3, 3],
[6, 6]]
Args:
x(SelectedRows): Input with SelectedRows type. The data type is float32, float64, int32 or int64.
name(str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` .
Returns:
Variable: LoDTensor transformed from SelectedRows. The data type is same with input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
b = fluid.default_main_program().global_block()
input = b.create_var(name="X", dtype="float32", persistable=True, type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
out = fluid.layers.get_tensor_from_selected_rows(input)
"""
check_type
(
x
,
'x'
,
Variable
,
'get_tensor_from_selected_rows'
)
if
x
.
type
!=
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
raise
TypeError
(
"The type of 'x' in get_tensor_from_selected_rows must be SELECTED_ROWS."
)
helper
=
LayerHelper
(
'get_tensor_from_selected_rows'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
'get_tensor_from_selected_rows'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{},
)
return
out
python/paddle/fluid/optimizer.py
浏览文件 @
fe0dc40d
...
@@ -38,13 +38,6 @@ from .backward import (
...
@@ -38,13 +38,6 @@ from .backward import (
_append_grad_suffix_
,
_append_grad_suffix_
,
_get_no_grad_set_name
,
_get_no_grad_set_name
,
)
)
from
.clip
import
(
GradientClipBase
,
GradientClipByNorm
,
error_clip_callback
,
append_gradient_clip_ops
,
ClipGradByGlobalNorm
,
)
from
.framework
import
program_guard
from
.framework
import
program_guard
from
.initializer
import
Constant
from
.initializer
import
Constant
from
.layer_helper
import
LayerHelper
from
.layer_helper
import
LayerHelper
...
@@ -160,7 +153,7 @@ class Optimizer:
...
@@ -160,7 +153,7 @@ class Optimizer:
)
)
if
grad_clip
is
not
None
:
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClipBase
):
if
not
isinstance
(
grad_clip
,
paddle
.
nn
.
clip
.
GradientClipBase
):
raise
TypeError
(
raise
TypeError
(
"'grad_clip' should be an instance of GradientClipBase's derived class"
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
)
...
@@ -1030,7 +1023,7 @@ class Optimizer:
...
@@ -1030,7 +1023,7 @@ class Optimizer:
params_grads
.
append
((
param
,
grad_var
))
params_grads
.
append
((
param
,
grad_var
))
else
:
else
:
if
callbacks
is
None
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
else
:
else
:
assert
isinstance
(
callbacks
,
list
)
assert
isinstance
(
callbacks
,
list
)
program
=
loss
.
block
.
program
program
=
loss
.
block
.
program
...
@@ -1260,7 +1253,7 @@ class Optimizer:
...
@@ -1260,7 +1253,7 @@ class Optimizer:
# NOTE(zhiqiu): currently, only support ClipGradByGlobalNorm and without regularization.
# NOTE(zhiqiu): currently, only support ClipGradByGlobalNorm and without regularization.
if
self
.
_flatten_param_grads
and
self
.
regularization
is
None
:
if
self
.
_flatten_param_grads
and
self
.
regularization
is
None
:
if
self
.
_grad_clip
is
None
or
isinstance
(
if
self
.
_grad_clip
is
None
or
isinstance
(
self
.
_grad_clip
,
ClipGradByGlobalNorm
self
.
_grad_clip
,
paddle
.
nn
.
ClipGradByGlobalNorm
):
):
params_grads
=
self
.
flatten_param_grads
(
params_grads
)
params_grads
=
self
.
flatten_param_grads
(
params_grads
)
...
@@ -1268,7 +1261,7 @@ class Optimizer:
...
@@ -1268,7 +1261,7 @@ class Optimizer:
if
self
.
_grad_clip
is
not
None
:
if
self
.
_grad_clip
is
not
None
:
params_grads
=
self
.
_grad_clip
(
params_grads
)
params_grads
=
self
.
_grad_clip
(
params_grads
)
else
:
else
:
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
# Add regularization if any
params_grads
=
self
.
append_regularization_ops
(
params_grads
=
self
.
append_regularization_ops
(
...
...
python/paddle/fluid/tests/test_error_clip.py
浏览文件 @
fe0dc40d
...
@@ -38,13 +38,13 @@ with fluid.program_guard(main_program=prog):
...
@@ -38,13 +38,13 @@ with fluid.program_guard(main_program=prog):
prog_clip
=
prog
.
clone
()
prog_clip
=
prog
.
clone
()
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
_set_error_clip
(
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
_set_error_clip
(
fluid
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
)
paddle
.
nn
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
)
)
)
avg_cost_clip
=
prog_clip
.
block
(
0
).
var
(
avg_cost
.
name
)
avg_cost_clip
=
prog_clip
.
block
(
0
).
var
(
avg_cost
.
name
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost
)
fluid
.
backward
.
append_backward
(
loss
=
avg_cost
)
fluid
.
backward
.
append_backward
(
fluid
.
backward
.
append_backward
(
loss
=
avg_cost_clip
,
callbacks
=
[
fluid
.
clip
.
error_clip_callback
]
loss
=
avg_cost_clip
,
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
)
)
hidden1_grad
=
prog
.
block
(
0
).
var
(
hidden1
.
name
+
"@GRAD"
)
hidden1_grad
=
prog
.
block
(
0
).
var
(
hidden1
.
name
+
"@GRAD"
)
...
...
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py
浏览文件 @
fe0dc40d
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
opt
=
paddle
.
optimizer
.
AdamW
(
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
)
acc_steps
=
2
# accumulated steps for pipeline
acc_steps
=
2
# accumulated steps for pipeline
...
...
python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py
浏览文件 @
fe0dc40d
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
...
@@ -122,7 +122,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
opt
=
fluid
.
optimizer
.
Momentum
(
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
lr_val
,
learning_rate
=
lr_val
,
momentum
=
0.9
,
momentum
=
0.9
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
)
acc_steps
=
2
# accumulated steps for pipeline
acc_steps
=
2
# accumulated steps for pipeline
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_dgc_optimizer.py
浏览文件 @
fe0dc40d
...
@@ -15,10 +15,10 @@
...
@@ -15,10 +15,10 @@
import
unittest
import
unittest
import
paddle
import
paddle
import
paddle.fluid.clip
as
clip
import
paddle.fluid.framework
as
framework
import
paddle.fluid.framework
as
framework
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.regularizer
as
regularizer
import
paddle.fluid.regularizer
as
regularizer
import
paddle.nn.clip
as
clip
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -76,7 +76,7 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
...
@@ -76,7 +76,7 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
rampup_begin_step
=
0
,
rampup_begin_step
=
0
,
num_trainers
=
2
,
num_trainers
=
2
,
regularization
=
regularization
,
regularization
=
regularization
,
grad_clip
=
clip
.
GradientClip
ByNorm
(
1.0
),
grad_clip
=
clip
.
ClipGrad
ByNorm
(
1.0
),
)
)
if
use_recompute
:
if
use_recompute
:
...
@@ -144,14 +144,14 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
...
@@ -144,14 +144,14 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
print
(
"dgc regular_coeff="
+
str
(
coeff
))
print
(
"dgc regular_coeff="
+
str
(
coeff
))
def
test_tpyeError
(
self
):
def
test_tpyeError
(
self
):
# the type of DGCMomentumOptimizer(grad_clip=) must be '
GradientClip
ByNorm'
# the type of DGCMomentumOptimizer(grad_clip=) must be '
ClipGrad
ByNorm'
with
self
.
assertRaises
(
TypeError
):
with
self
.
assertRaises
(
TypeError
):
dgc_momentum_optimizer
=
self
.
MockDGCMomentum
(
dgc_momentum_optimizer
=
self
.
MockDGCMomentum
(
learning_rate
=
0.01
,
learning_rate
=
0.01
,
momentum
=
0.2
,
momentum
=
0.2
,
rampup_begin_step
=
0
,
rampup_begin_step
=
0
,
num_trainers
=
2
,
num_trainers
=
2
,
grad_clip
=
clip
.
GradientClip
ByGlobalNorm
(
1.0
),
grad_clip
=
clip
.
ClipGrad
ByGlobalNorm
(
1.0
),
)
)
def
test_momentum_without_dgc
(
self
):
def
test_momentum_without_dgc
(
self
):
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py
浏览文件 @
fe0dc40d
...
@@ -354,7 +354,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
...
@@ -354,7 +354,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
}
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
@@ -552,7 +552,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
...
@@ -552,7 +552,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_merge
=
True
strategy
.
fuse_grad_merge
=
True
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
@@ -940,7 +940,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
...
@@ -940,7 +940,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
}
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
@@ -1044,7 +1044,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
...
@@ -1044,7 +1044,7 @@ class TestFleetHybridOptimizerBoundary(TestFleetMetaOptimizer):
}
}
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_all_reduce_ops
=
True
strategy
.
fuse_grad_size_in_MB
=
32
strategy
.
fuse_grad_size_in_MB
=
32
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
...
...
python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py
浏览文件 @
fe0dc40d
...
@@ -640,7 +640,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
...
@@ -640,7 +640,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
)
)
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'sharding'
)
self
.
set_strategy
(
strategy
,
'sharding'
)
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
)
)
...
@@ -1309,7 +1309,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
...
@@ -1309,7 +1309,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
"micro_batch_size"
:
2
,
"micro_batch_size"
:
2
,
"accumulate_steps"
:
4
,
"accumulate_steps"
:
4
,
}
}
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
grad_clip
=
clip
)
)
...
@@ -1547,7 +1547,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
...
@@ -1547,7 +1547,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
"micro_batch_size"
:
2
,
"micro_batch_size"
:
2
,
"accumulate_steps"
:
4
,
"accumulate_steps"
:
4
,
}
}
clip
=
paddle
.
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
self
.
optimizer
(
self
.
optimizer
(
avg_cost
,
avg_cost
,
strategy
,
strategy
,
...
...
python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
浏览文件 @
fe0dc40d
...
@@ -22,8 +22,8 @@ import paddle
...
@@ -22,8 +22,8 @@ import paddle
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet
as
fleet
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.distributed.fleet.meta_optimizers.common
import
CollectiveHelper
from
paddle.distributed.fleet.meta_optimizers.common
import
CollectiveHelper
from
paddle.fluid.clip
import
ClipGradBase
,
_clip_by_global_norm_using_mp_type
from
paddle.incubate
import
DistributedFusedLamb
from
paddle.incubate
import
DistributedFusedLamb
from
paddle.nn.clip
import
ClipGradBase
,
_clip_by_global_norm_using_mp_type
from
paddle.vision.models
import
resnet18
as
resnet
from
paddle.vision.models
import
resnet18
as
resnet
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_len.py
浏览文件 @
fe0dc40d
...
@@ -19,6 +19,7 @@ import numpy as np
...
@@ -19,6 +19,7 @@ import numpy as np
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.jit.dy2static
import
Call
from
paddle.jit.dy2static
import
Call
from
paddle.nn
import
clip
SEED
=
2020
SEED
=
2020
np
.
random
.
seed
(
SEED
)
np
.
random
.
seed
(
SEED
)
...
@@ -89,11 +90,11 @@ def len_with_selected_rows(place):
...
@@ -89,11 +90,11 @@ def len_with_selected_rows(place):
type
=
fluid
.
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
,
type
=
fluid
.
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
,
)
)
# y is Variable(SelectedRows)
# y is Variable(SelectedRows)
y
=
fluid
.
layers
.
merge_selected_rows
(
var
)
y
=
clip
.
merge_selected_rows
(
var
)
y_len
=
Call
(
len
)(
y
)
y_len
=
Call
(
len
)(
y
)
# z is inner tensor with shape [4, 2]
# z is inner tensor with shape [4, 2]
z
=
fluid
.
layers
.
get_tensor_from_selected_rows
(
y
)
z
=
clip
.
get_tensor_from_selected_rows
(
y
)
z_len
=
Call
(
len
)(
z
)
z_len
=
Call
(
len
)(
z
)
# set data for selected_rows
# set data for selected_rows
...
...
python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py
浏览文件 @
fe0dc40d
...
@@ -22,8 +22,8 @@ from seq2seq_dygraph_model import AttentionModel, BaseModel
...
@@ -22,8 +22,8 @@ from seq2seq_dygraph_model import AttentionModel, BaseModel
from
seq2seq_utils
import
Seq2SeqModelHyperParams
,
get_data_iter
from
seq2seq_utils
import
Seq2SeqModelHyperParams
,
get_data_iter
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.clip
import
GradientClipByGlobalNorm
from
paddle.jit
import
ProgramTranslator
from
paddle.jit
import
ProgramTranslator
from
paddle.nn
import
ClipGradByGlobalNorm
place
=
(
place
=
(
fluid
.
CUDAPlace
(
0
)
if
fluid
.
is_compiled_with_cuda
()
else
fluid
.
CPUPlace
()
fluid
.
CUDAPlace
(
0
)
if
fluid
.
is_compiled_with_cuda
()
else
fluid
.
CPUPlace
()
...
@@ -71,7 +71,7 @@ def train(args, attn_model=False):
...
@@ -71,7 +71,7 @@ def train(args, attn_model=False):
dropout
=
args
.
dropout
,
dropout
=
args
.
dropout
,
)
)
gloabl_norm_clip
=
GradientClip
ByGlobalNorm
(
args
.
max_grad_norm
)
gloabl_norm_clip
=
ClipGrad
ByGlobalNorm
(
args
.
max_grad_norm
)
optimizer
=
fluid
.
optimizer
.
SGD
(
optimizer
=
fluid
.
optimizer
.
SGD
(
args
.
learning_rate
,
args
.
learning_rate
,
parameter_list
=
model
.
parameters
(),
parameter_list
=
model
.
parameters
(),
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py
浏览文件 @
fe0dc40d
...
@@ -127,7 +127,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Clip(
...
@@ -127,7 +127,7 @@ class ElementwiseActivationMkldnnFusePassTest_Add_Clip(
):
):
def
set_params
(
self
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
add
self
.
operand
=
paddle
.
add
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
self
.
act_beta
=
10.0
...
@@ -219,7 +219,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Clip(
...
@@ -219,7 +219,7 @@ class ElementwiseActivationMkldnnFusePassTest_Sub_Clip(
):
):
def
set_params
(
self
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
subtract
self
.
operand
=
paddle
.
subtract
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
self
.
act_beta
=
10.0
...
@@ -319,7 +319,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Clip(
...
@@ -319,7 +319,7 @@ class ElementwiseActivationMkldnnFusePassTest_Mul_Clip(
):
):
def
set_params
(
self
):
def
set_params
(
self
):
self
.
operand
=
paddle
.
multiply
self
.
operand
=
paddle
.
multiply
self
.
act
=
fluid
.
layers
.
clip
self
.
act
=
paddle
.
clip
self
.
act_alpha
=
0.0
self
.
act_alpha
=
0.0
self
.
act_beta
=
10.0
self
.
act_beta
=
10.0
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
浏览文件 @
fe0dc40d
...
@@ -106,7 +106,7 @@ class TensorRTSubgraphPassHardSwishPluginTest(
...
@@ -106,7 +106,7 @@ class TensorRTSubgraphPassHardSwishPluginTest(
class
TensorRTSubgraphPassClipTest
(
TensorRTSubgraphPassActivationTest
):
class
TensorRTSubgraphPassClipTest
(
TensorRTSubgraphPassActivationTest
):
def
append_act
(
self
,
x
):
def
append_act
(
self
,
x
):
return
fluid
.
layers
.
clip
(
x
,
0
,
1
)
return
paddle
.
clip
(
x
,
0
,
1
)
class
TensorRTSubgraphPassTanhTest
(
TensorRTSubgraphPassActivationTest
):
class
TensorRTSubgraphPassTanhTest
(
TensorRTSubgraphPassActivationTest
):
...
...
python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py
浏览文件 @
fe0dc40d
...
@@ -117,13 +117,13 @@ class TestClipOpError(unittest.TestCase):
...
@@ -117,13 +117,13 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
fe0dc40d
...
@@ -686,7 +686,7 @@ class TestAdamOpV2(unittest.TestCase):
...
@@ -686,7 +686,7 @@ class TestAdamOpV2(unittest.TestCase):
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
a
=
fluid
.
dygraph
.
to_variable
(
value
)
a
=
fluid
.
dygraph
.
to_variable
(
value
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
)
adam
=
paddle
.
optimizer
.
Adam
(
adam
=
paddle
.
optimizer
.
Adam
(
0.1
,
parameters
=
linear
.
parameters
(),
grad_clip
=
clip
0.1
,
parameters
=
linear
.
parameters
(),
grad_clip
=
clip
)
)
...
...
python/paddle/fluid/tests/unittests/test_clip_by_norm_op.py
浏览文件 @
fe0dc40d
...
@@ -20,12 +20,13 @@ from op_test import OpTest
...
@@ -20,12 +20,13 @@ from op_test import OpTest
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.nn
import
clip
class
TestClipByNormOp
(
OpTest
):
class
TestClipByNormOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
max_relative_error
=
0.006
self
.
max_relative_error
=
0.006
self
.
python_api
=
fluid
.
layers
.
clip_by_norm
self
.
python_api
=
clip
.
clip_by_norm
self
.
init_dtype
()
self
.
init_dtype
()
self
.
initTestCase
()
self
.
initTestCase
()
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
...
...
python/paddle/fluid/tests/unittests/test_clip_op.py
浏览文件 @
fe0dc40d
...
@@ -128,15 +128,9 @@ class TestClipOpError(unittest.TestCase):
...
@@ -128,15 +128,9 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
paddle
.
disable_static
()
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
fe0dc40d
...
@@ -584,7 +584,7 @@ class TestL2Decay(TranspilerTest):
...
@@ -584,7 +584,7 @@ class TestL2Decay(TranspilerTest):
def
filter
(
param
):
def
filter
(
param
):
return
param
.
name
==
"fc_w"
return
param
.
name
==
"fc_w"
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
0.1
,
need_clip
=
filter
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
0.1
,
need_clip
=
filter
)
sgd_optimizer
.
minimize
(
avg_cost
,
grad_clip
=
clip
)
sgd_optimizer
.
minimize
(
avg_cost
,
grad_clip
=
clip
)
def
transpiler_test_impl
(
self
):
def
transpiler_test_impl
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
浏览文件 @
fe0dc40d
...
@@ -504,8 +504,8 @@ class PaddingRNNTestBase(unittest.TestCase):
...
@@ -504,8 +504,8 @@ class PaddingRNNTestBase(unittest.TestCase):
self
.
feed_order
,
self
.
feed_order
,
)
=
res_vars
)
=
res_vars
fluid
.
clip
.
set_gradient_clip
(
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
config
.
max_grad_norm
clip_norm
=
config
.
max_grad_norm
)
)
)
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor.py
浏览文件 @
fe0dc40d
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
)
)
opt
=
paddle
.
optimizer
.
AdamW
(
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
)
opt
.
minimize
(
loss
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor_origin_scheduler.py
浏览文件 @
fe0dc40d
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
...
@@ -64,7 +64,7 @@ class TestFleetExecutor(unittest.TestCase):
)
)
opt
=
paddle
.
optimizer
.
AdamW
(
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
)
opt
.
minimize
(
loss
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_fleet_executor_with_task_nodes.py
浏览文件 @
fe0dc40d
...
@@ -47,7 +47,7 @@ class TestFleetExecutor(unittest.TestCase):
...
@@ -47,7 +47,7 @@ class TestFleetExecutor(unittest.TestCase):
)
)
opt
=
paddle
.
optimizer
.
AdamW
(
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_val
,
learning_rate
=
lr_val
,
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
1.0
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
1.0
),
)
)
opt
.
minimize
(
loss
)
opt
.
minimize
(
loss
)
# TODO: section_program will be removed in the future
# TODO: section_program will be removed in the future
...
...
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
浏览文件 @
fe0dc40d
...
@@ -20,6 +20,7 @@ import paddle.fluid as fluid
...
@@ -20,6 +20,7 @@ import paddle.fluid as fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.fluid
import
Program
,
program_guard
from
paddle.fluid
import
Program
,
program_guard
from
paddle.fluid.op
import
Operator
from
paddle.fluid.op
import
Operator
from
paddle.nn
import
clip
class
TestGetTensorFromSelectedRowsError
(
unittest
.
TestCase
):
class
TestGetTensorFromSelectedRowsError
(
unittest
.
TestCase
):
...
@@ -31,12 +32,12 @@ class TestGetTensorFromSelectedRowsError(unittest.TestCase):
...
@@ -31,12 +32,12 @@ class TestGetTensorFromSelectedRowsError(unittest.TestCase):
x_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
x_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
def
test_Variable
():
fluid
.
layers
.
get_tensor_from_selected_rows
(
x
=
x_data
)
clip
.
get_tensor_from_selected_rows
(
x
=
x_data
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_SELECTED_ROWS
():
def
test_SELECTED_ROWS
():
fluid
.
layers
.
get_tensor_from_selected_rows
(
x
=
x_var
)
clip
.
get_tensor_from_selected_rows
(
x
=
x_var
)
self
.
assertRaises
(
TypeError
,
test_SELECTED_ROWS
)
self
.
assertRaises
(
TypeError
,
test_SELECTED_ROWS
)
...
...
python/paddle/fluid/tests/unittests/test_grad_clip_minimize.py
浏览文件 @
fe0dc40d
...
@@ -17,12 +17,8 @@ import unittest
...
@@ -17,12 +17,8 @@ import unittest
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.clip
import
(
GradientClipByGlobalNorm
,
GradientClipByNorm
,
GradientClipByValue
,
)
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.nn
import
ClipGradByGlobalNorm
,
ClipGradByNorm
,
ClipGradByValue
class
TestGradClipByGlobalNorm
(
unittest
.
TestCase
):
class
TestGradClipByGlobalNorm
(
unittest
.
TestCase
):
...
@@ -67,7 +63,7 @@ class TestGradClipByGlobalNorm(unittest.TestCase):
...
@@ -67,7 +63,7 @@ class TestGradClipByGlobalNorm(unittest.TestCase):
def
get_dygrap_global_norm_result
(
self
):
def
get_dygrap_global_norm_result
(
self
):
with
fluid
.
dygraph
.
guard
():
with
fluid
.
dygraph
.
guard
():
gloabl_norm_clip
=
GradientClip
ByGlobalNorm
(
self
.
max_global_norm
)
gloabl_norm_clip
=
ClipGrad
ByGlobalNorm
(
self
.
max_global_norm
)
p_g_var
=
[]
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
new_p
=
to_variable
(
p
)
...
@@ -142,7 +138,7 @@ class TestGradClipByNorm(unittest.TestCase):
...
@@ -142,7 +138,7 @@ class TestGradClipByNorm(unittest.TestCase):
def
get_dygrap_norm_result
(
self
):
def
get_dygrap_norm_result
(
self
):
with
fluid
.
dygraph
.
guard
():
with
fluid
.
dygraph
.
guard
():
norm_clip
=
GradientClip
ByNorm
(
self
.
max_norm
)
norm_clip
=
ClipGrad
ByNorm
(
self
.
max_norm
)
p_g_var
=
[]
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
new_p
=
to_variable
(
p
)
...
@@ -212,9 +208,7 @@ class TestGradClipByValue(unittest.TestCase):
...
@@ -212,9 +208,7 @@ class TestGradClipByValue(unittest.TestCase):
def
get_dygrap_clip_result
(
self
):
def
get_dygrap_clip_result
(
self
):
with
fluid
.
dygraph
.
guard
():
with
fluid
.
dygraph
.
guard
():
value_clip
=
GradientClipByValue
(
value_clip
=
ClipGradByValue
(
max
=
self
.
max_value
,
min
=
self
.
min_value
)
max
=
self
.
max_value
,
min
=
self
.
min_value
)
p_g_var
=
[]
p_g_var
=
[]
for
p
,
g
in
self
.
para_and_grad
:
for
p
,
g
in
self
.
para_and_grad
:
new_p
=
to_variable
(
p
)
new_p
=
to_variable
(
p
)
...
...
python/paddle/fluid/tests/unittests/test_gradient_clip.py
浏览文件 @
fe0dc40d
...
@@ -20,7 +20,7 @@ from fake_reader import fake_imdb_reader
...
@@ -20,7 +20,7 @@ from fake_reader import fake_imdb_reader
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.
fluid
.clip
import
_allow_pure_fp16_global_norm_clip
from
paddle.
nn
.clip
import
_allow_pure_fp16_global_norm_clip
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -173,9 +173,9 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
...
@@ -173,9 +173,9 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use 'set_gradient_clip'
# test whether the output is right when use 'set_gradient_clip'
def
test_old_gradient_clip
(
self
):
def
test_old_gradient_clip
(
self
):
def
func
(
params_grads
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
fluid
.
clip
.
set_gradient_clip
(
clip
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
return
fluid
.
clip
.
append_gradient_clip_ops
(
params_grads
)
return
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
self
.
clip_gradient
=
func
self
.
clip_gradient
=
func
self
.
check_gradient_clip
(
fluid
.
CPUPlace
())
self
.
check_gradient_clip
(
fluid
.
CPUPlace
())
...
@@ -183,7 +183,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
...
@@ -183,7 +183,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use grad_clip
# test whether the output is right when use grad_clip
def
test_new_gradient_clip
(
self
):
def
test_new_gradient_clip
(
self
):
def
func
(
params_grads
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
self
.
clip_gradient
=
func
...
@@ -192,7 +192,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
...
@@ -192,7 +192,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# test whether the output is right when use grad_clip under float64
# test whether the output is right when use grad_clip under float64
def
test_new_gradient_clip_fp64
(
self
):
def
test_new_gradient_clip_fp64
(
self
):
def
func
(
params_grads
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
self
.
clip_gradient
=
func
...
@@ -201,15 +201,15 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
...
@@ -201,15 +201,15 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
# invoke 'set_gradient_clip' in a wrong order
# invoke 'set_gradient_clip' in a wrong order
def
test_wrong_API_order
(
self
):
def
test_wrong_API_order
(
self
):
def
backward_func
(
cost
):
def
backward_func
(
cost
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
5.0
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
5.0
)
fluid
.
clip
.
set_gradient_clip
(
clip
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
,
grad_clip
=
clip
learning_rate
=
0.01
,
grad_clip
=
clip
)
)
# if 'set_gradient_clip' and 'optimize(grad_clip)' together, 'set_gradient_clip' will be ineffective
# if 'set_gradient_clip' and 'optimize(grad_clip)' together, 'set_gradient_clip' will be ineffective
sgd_optimizer
.
minimize
(
cost
)
sgd_optimizer
.
minimize
(
cost
)
# 'set_gradient_clip' must before 'minimize', otherwise, 'set_gradient_clip' will be ineffective
# 'set_gradient_clip' must before 'minimize', otherwise, 'set_gradient_clip' will be ineffective
fluid
.
clip
.
set_gradient_clip
(
clip
)
paddle
.
nn
.
clip
.
set_gradient_clip
(
clip
)
self
.
backward_and_optimize
=
backward_func
self
.
backward_and_optimize
=
backward_func
for
place
in
self
.
get_places
():
for
place
in
self
.
get_places
():
...
@@ -269,7 +269,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
...
@@ -269,7 +269,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip):
with
fluid
.
program_guard
(
with
fluid
.
program_guard
(
main_program
=
prog
,
startup_program
=
startup_program
main_program
=
prog
,
startup_program
=
startup_program
):
):
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
self
.
clip_norm
)
x
=
(
x
=
(
fluid
.
default_main_program
()
fluid
.
default_main_program
()
.
global_block
()
.
global_block
()
...
@@ -313,7 +313,7 @@ class TestGradientClipByNorm(TestGradientClip):
...
@@ -313,7 +313,7 @@ class TestGradientClipByNorm(TestGradientClip):
# test whether the output is right when use grad_clip
# test whether the output is right when use grad_clip
def
test_gradient_clip
(
self
):
def
test_gradient_clip
(
self
):
def
func
(
params_grads
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
clip_norm
=
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
clip_norm
=
self
.
clip_norm
)
return
clip
(
params_grads
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
self
.
clip_gradient
=
func
...
@@ -321,7 +321,7 @@ class TestGradientClipByNorm(TestGradientClip):
...
@@ -321,7 +321,7 @@ class TestGradientClipByNorm(TestGradientClip):
# if grad is None or not need clip
# if grad is None or not need clip
def
test_none_grad
(
self
):
def
test_none_grad
(
self
):
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
self
.
clip_norm
)
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
self
.
clip_norm
)
x
=
(
x
=
(
fluid
.
default_main_program
()
fluid
.
default_main_program
()
.
global_block
()
.
global_block
()
...
@@ -371,7 +371,7 @@ class TestGradientClipByValue(TestGradientClip):
...
@@ -371,7 +371,7 @@ class TestGradientClipByValue(TestGradientClip):
# test whether the output is right when use grad_clip
# test whether the output is right when use grad_clip
def
test_gradient_clip
(
self
):
def
test_gradient_clip
(
self
):
def
func
(
params_grads
):
def
func
(
params_grads
):
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
return
clip
(
params_grads
)
return
clip
(
params_grads
)
self
.
clip_gradient
=
func
self
.
clip_gradient
=
func
...
@@ -379,7 +379,7 @@ class TestGradientClipByValue(TestGradientClip):
...
@@ -379,7 +379,7 @@ class TestGradientClipByValue(TestGradientClip):
# if grad is None or not need clip
# if grad is None or not need clip
def
test_none_grad
(
self
):
def
test_none_grad
(
self
):
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
self
.
max
,
self
.
min
)
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
self
.
max
,
self
.
min
)
x
=
(
x
=
(
fluid
.
default_main_program
()
fluid
.
default_main_program
()
.
global_block
()
.
global_block
()
...
@@ -419,7 +419,7 @@ class TestDygraphGradientClip(unittest.TestCase):
...
@@ -419,7 +419,7 @@ class TestDygraphGradientClip(unittest.TestCase):
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0
,
learning_rate
=
0.0
,
parameter_list
=
linear
.
parameters
(),
parameter_list
=
linear
.
parameters
(),
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.1
),
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.1
),
)
)
self
.
check_clip_result
(
loss
,
sgd_optimizer
)
self
.
check_clip_result
(
loss
,
sgd_optimizer
)
...
@@ -430,12 +430,8 @@ class TestDygraphGradientClip(unittest.TestCase):
...
@@ -430,12 +430,8 @@ class TestDygraphGradientClip(unittest.TestCase):
class
TestDygraphGradientClipByGlobalNorm
(
TestDygraphGradientClip
):
class
TestDygraphGradientClipByGlobalNorm
(
TestDygraphGradientClip
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
clip_norm
=
0.8
self
.
clip_norm
=
0.8
self
.
clip1
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
self
.
clip1
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
clip_norm
=
self
.
clip_norm
self
.
clip2
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
)
self
.
clip2
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
self
.
clip_norm
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
# if grad is None
...
@@ -476,7 +472,7 @@ class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip):
...
@@ -476,7 +472,7 @@ class TestDygraphGradientClipByGlobalNorm(TestDygraphGradientClip):
class
TestDygraphGradientClipByNorm
(
TestDygraphGradientClip
):
class
TestDygraphGradientClipByNorm
(
TestDygraphGradientClip
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
clip_norm
=
0.8
self
.
clip_norm
=
0.8
self
.
clip
=
fluid
.
clip
.
GradientClip
ByNorm
(
clip_norm
=
self
.
clip_norm
)
self
.
clip
=
paddle
.
nn
.
ClipGrad
ByNorm
(
clip_norm
=
self
.
clip_norm
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
# if grad is None
...
@@ -506,7 +502,7 @@ class TestDygraphGradientClipByValue(TestDygraphGradientClip):
...
@@ -506,7 +502,7 @@ class TestDygraphGradientClipByValue(TestDygraphGradientClip):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
max
=
0.2
self
.
max
=
0.2
self
.
min
=
0.1
self
.
min
=
0.1
self
.
clip
=
fluid
.
clip
.
GradientClip
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
self
.
clip
=
paddle
.
nn
.
ClipGrad
ByValue
(
max
=
self
.
max
,
min
=
self
.
min
)
def
check_clip_result
(
self
,
loss
,
optimizer
):
def
check_clip_result
(
self
,
loss
,
optimizer
):
# if grad is None
# if grad is None
...
@@ -572,7 +568,7 @@ class TestDygraphGradientClipFP16(unittest.TestCase):
...
@@ -572,7 +568,7 @@ class TestDygraphGradientClipFP16(unittest.TestCase):
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
_
,
grads
=
zip
(
*
params_grads
)
_
,
grads
=
zip
(
*
params_grads
)
# clip grads
# clip grads
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
0.8
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
0.8
)
params_grads
=
clip
(
params_grads
)
params_grads
=
clip
(
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
# param update
# param update
...
@@ -616,7 +612,7 @@ class TestDygraphGradientClipFP64(unittest.TestCase):
...
@@ -616,7 +612,7 @@ class TestDygraphGradientClipFP64(unittest.TestCase):
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
params_grads
.
append
((
param
,
param
.
_grad_ivar
()))
_
,
grads
=
zip
(
*
params_grads
)
_
,
grads
=
zip
(
*
params_grads
)
# clip grads
# clip grads
clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
clip_norm
=
0.1
)
clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
clip_norm
=
0.1
)
params_grads
=
clip
(
params_grads
)
params_grads
=
clip
(
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
_
,
grads_clip
=
zip
(
*
params_grads
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py
浏览文件 @
fe0dc40d
...
@@ -361,7 +361,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
...
@@ -361,7 +361,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
place
):
with
fluid
.
dygraph
.
guard
(
place
):
model
=
MyLayer
(
size
,
vocab_size
,
size
)
model
=
MyLayer
(
size
,
vocab_size
,
size
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.001
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.001
)
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
)
)
...
@@ -380,7 +380,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
...
@@ -380,7 +380,7 @@ class TestImperativeAutoPrune(unittest.TestCase):
with
fluid
.
dygraph
.
guard
(
place
):
with
fluid
.
dygraph
.
guard
(
place
):
model
=
MyLayer2
(
size
,
vocab_size
,
size
)
model
=
MyLayer2
(
size
,
vocab_size
,
size
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
0.001
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
0.001
)
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
optimizer
=
fluid
.
optimizer
.
AdamOptimizer
(
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
0.001
,
parameter_list
=
model
.
parameters
(),
grad_clip
=
grad_clip
)
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
浏览文件 @
fe0dc40d
...
@@ -52,7 +52,7 @@ class TestSimpleNet(unittest.TestCase):
...
@@ -52,7 +52,7 @@ class TestSimpleNet(unittest.TestCase):
fluid
.
set_flags
(
fluid
.
set_flags
(
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
)
)
# grad_clip =
fluid.clip.GradientClip
ByGlobalNorm(5.0)
# grad_clip =
paddle.nn.ClipGrad
ByGlobalNorm(5.0)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
paddle
.
to_tensor
(
input_word
)
input
=
paddle
.
to_tensor
(
input_word
)
...
@@ -91,7 +91,7 @@ class TestSimpleNet(unittest.TestCase):
...
@@ -91,7 +91,7 @@ class TestSimpleNet(unittest.TestCase):
fluid
.
set_flags
(
fluid
.
set_flags
(
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
{
'FLAGS_sort_sum_gradient'
:
sort_sum_gradient
}
)
)
grad_clip
=
fluid
.
clip
.
GradientClip
ByGlobalNorm
(
5.0
)
grad_clip
=
paddle
.
nn
.
ClipGrad
ByGlobalNorm
(
5.0
)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
to_variable
(
input_word
)
input
=
to_variable
(
input_word
)
...
...
python/paddle/fluid/tests/unittests/xpu/test_clip_op_xpu.py
浏览文件 @
fe0dc40d
...
@@ -131,13 +131,13 @@ class TestClipOpError(unittest.TestCase):
...
@@ -131,13 +131,13 @@ class TestClipOpError(unittest.TestCase):
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
input_data
=
np
.
random
.
random
((
2
,
4
)).
astype
(
"float32"
)
def
test_Variable
():
def
test_Variable
():
fluid
.
layers
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
input_data
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
self
.
assertRaises
(
TypeError
,
test_Variable
)
def
test_dtype
():
def
test_dtype
():
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
1
],
dtype
=
'int32'
)
fluid
.
layers
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
paddle
.
clip
(
x
=
x2
,
min
=-
1.0
,
max
=
1.0
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
self
.
assertRaises
(
TypeError
,
test_dtype
)
paddle
.
disable_static
()
paddle
.
disable_static
()
...
...
python/paddle/hapi/model.py
浏览文件 @
fe0dc40d
...
@@ -1535,7 +1535,7 @@ class Model:
...
@@ -1535,7 +1535,7 @@ class Model:
assert
isinstance
(
assert
isinstance
(
self
.
_optimizer
.
_grad_clip
,
self
.
_optimizer
.
_grad_clip
,
(
paddle
.
nn
.
ClipGradByGlobalNorm
,
paddle
.
nn
.
ClipGradByNorm
),
(
paddle
.
nn
.
ClipGradByGlobalNorm
,
paddle
.
nn
.
ClipGradByNorm
),
),
"Only
GradientClipByNorm and GradientClip
ByGlobalNorm are supported in amp training with level=O2 currently."
),
"Only
ClipGradByNorm and ClipGrad
ByGlobalNorm are supported in amp training with level=O2 currently."
self
.
_adapter
.
_amp_custom_lists
=
{}
self
.
_adapter
.
_amp_custom_lists
=
{}
self
.
_adapter
.
_amp_configs
=
{}
self
.
_adapter
.
_amp_configs
=
{}
...
...
python/paddle/incubate/distributed/models/moe/grad_clip.py
浏览文件 @
fe0dc40d
...
@@ -15,13 +15,14 @@
...
@@ -15,13 +15,14 @@
import
paddle
import
paddle
import
paddle.distributed
as
dist
import
paddle.distributed
as
dist
from
paddle.fluid
import
core
,
layers
from
paddle.fluid
import
core
,
layers
from
paddle.fluid.clip
import
ClipGradBase
,
_squared_l2_norm
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.fluid.dygraph
import
base
as
imperative_base
from
paddle.nn
import
clip
from
paddle.nn.clip
import
ClipGradBase
,
_squared_l2_norm
class
ClipGradForMOEByGlobalNorm
(
ClipGradBase
):
class
ClipGradForMOEByGlobalNorm
(
ClipGradBase
):
r
"""
r
"""
The Algrithm is the same as paddle.
fluid.clip
.ClipGradByGlobalNorm
The Algrithm is the same as paddle.
nn
.ClipGradByGlobalNorm
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
Given a list of Tensor :math:`t\_list` , calculate the global norm for the elements of all tensors in
:math:`t\_list` , and limit it to ``clip_norm`` .
:math:`t\_list` , and limit it to ``clip_norm`` .
...
@@ -113,8 +114,8 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
...
@@ -113,8 +114,8 @@ class ClipGradForMOEByGlobalNorm(ClipGradBase):
continue
continue
merge_grad
=
g
merge_grad
=
g
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
if
g
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
merge_grad
=
layers
.
merge_selected_rows
(
g
)
merge_grad
=
clip
.
merge_selected_rows
(
g
)
merge_grad
=
layers
.
get_tensor_from_selected_rows
(
merge_grad
)
merge_grad
=
clip
.
get_tensor_from_selected_rows
(
merge_grad
)
sum_square
=
_squared_l2_norm
(
merge_grad
)
sum_square
=
_squared_l2_norm
(
merge_grad
)
if
sum_square
.
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
if
sum_square
.
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
sum_square_list_fp16
.
append
(
sum_square
)
sum_square_list_fp16
.
append
(
sum_square
)
...
...
python/paddle/incubate/optimizer/distributed_fused_lamb.py
浏览文件 @
fe0dc40d
...
@@ -16,11 +16,11 @@ import os
...
@@ -16,11 +16,11 @@ import os
import
paddle
import
paddle
from
paddle.fluid
import
core
,
framework
,
unique_name
from
paddle.fluid
import
core
,
framework
,
unique_name
from
paddle.fluid.clip
import
ClipGradByGlobalNorm
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.framework
import
Variable
,
name_scope
from
paddle.fluid.framework
import
Variable
,
name_scope
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.nn
import
ClipGradByGlobalNorm
def
init_communicator
(
block
,
rank
,
ranks
,
ring_id
):
def
init_communicator
(
block
,
rank
,
ranks
,
ring_id
):
...
...
python/paddle/nn/clip.py
浏览文件 @
fe0dc40d
此差异已折叠。
点击以展开。
python/paddle/optimizer/adamw.py
浏览文件 @
fe0dc40d
...
@@ -20,10 +20,10 @@ import paddle
...
@@ -20,10 +20,10 @@ import paddle
from
..
import
_C_ops
from
..
import
_C_ops
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid.clip
import
GradientClipBase
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.framework
import
Parameter
,
Variable
from
..fluid.framework
import
Parameter
,
Variable
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.layer_helper
import
LayerHelper
from
..nn.clip
import
GradientClipBase
from
.lr
import
LRScheduler
from
.lr
import
LRScheduler
from
.optimizer
import
Optimizer
from
.optimizer
import
Optimizer
...
...
python/paddle/optimizer/optimizer.py
浏览文件 @
fe0dc40d
...
@@ -18,6 +18,7 @@ from collections import defaultdict
...
@@ -18,6 +18,7 @@ from collections import defaultdict
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
paddle.autograd
as
imperative_base
from
paddle
import
_C_ops
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
(
from
paddle.fluid.framework
import
(
...
@@ -32,12 +33,6 @@ from paddle.fluid.framework import (
...
@@ -32,12 +33,6 @@ from paddle.fluid.framework import (
from
..fluid
import
framework
,
unique_name
from
..fluid
import
framework
,
unique_name
from
..fluid.backward
import
_get_no_grad_set_name
,
append_backward
from
..fluid.backward
import
_get_no_grad_set_name
,
append_backward
from
..fluid.clip
import
(
GradientClipBase
,
append_gradient_clip_ops
,
error_clip_callback
,
)
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.framework
import
Parameter
,
program_guard
from
..fluid.framework
import
Parameter
,
program_guard
from
..fluid.initializer
import
Constant
from
..fluid.initializer
import
Constant
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.layer_helper
import
LayerHelper
...
@@ -168,7 +163,7 @@ class Optimizer:
...
@@ -168,7 +163,7 @@ class Optimizer:
"""
"""
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
def
__init__
(
def
__init__
(
self
,
self
,
learning_rate
,
learning_rate
,
...
@@ -225,7 +220,7 @@ class Optimizer:
...
@@ -225,7 +220,7 @@ class Optimizer:
%
type
(
learning_rate
)
%
type
(
learning_rate
)
)
)
if
grad_clip
is
not
None
:
if
grad_clip
is
not
None
:
if
not
isinstance
(
grad_clip
,
GradientClipBase
):
if
not
isinstance
(
grad_clip
,
paddle
.
nn
.
clip
.
GradientClipBase
):
raise
TypeError
(
raise
TypeError
(
"'grad_clip' should be an instance of GradientClipBase's derived class"
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
)
...
@@ -1042,7 +1037,7 @@ class Optimizer:
...
@@ -1042,7 +1037,7 @@ class Optimizer:
params_grads
.
append
((
parameter_list
[
index
],
grad
))
params_grads
.
append
((
parameter_list
[
index
],
grad
))
else
:
else
:
if
callbacks
is
None
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
paddle
.
nn
.
clip
.
error_clip_callback
]
else
:
else
:
assert
isinstance
(
callbacks
,
list
)
assert
isinstance
(
callbacks
,
list
)
program
=
loss
.
block
.
program
program
=
loss
.
block
.
program
...
@@ -1103,7 +1098,7 @@ class Optimizer:
...
@@ -1103,7 +1098,7 @@ class Optimizer:
params_grads
=
self
.
_grad_clip
(
params_grads
)
params_grads
=
self
.
_grad_clip
(
params_grads
)
else
:
else
:
params_grads
=
append_gradient_clip_ops
(
params_grads
)
params_grads
=
paddle
.
nn
.
clip
.
append_gradient_clip_ops
(
params_grads
)
# Add regularization if any
# Add regularization if any
params_grads
=
self
.
append_regularization_ops
(
params_grads
=
self
.
append_regularization_ops
(
...
@@ -1317,7 +1312,7 @@ class Optimizer:
...
@@ -1317,7 +1312,7 @@ class Optimizer:
else
:
else
:
core
.
clear_gradients
(
param_list
,
set_to_zero
)
core
.
clear_gradients
(
param_list
,
set_to_zero
)
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
def
minimize
(
def
minimize
(
self
,
loss
,
startup_program
=
None
,
parameters
=
None
,
no_grad_set
=
None
self
,
loss
,
startup_program
=
None
,
parameters
=
None
,
no_grad_set
=
None
):
):
...
@@ -1380,7 +1375,7 @@ class Optimizer:
...
@@ -1380,7 +1375,7 @@ class Optimizer:
return
optimize_ops
,
params_grads
return
optimize_ops
,
params_grads
@
imperative_base
.
no_grad
@
imperative_base
.
no_grad
()
@
framework
.
dygraph_only
@
framework
.
dygraph_only
def
step
(
self
):
def
step
(
self
):
"""
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录