Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6c9fa665
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
6c9fa665
编写于
12月 25, 2022
作者:
W
wanghuancoder
提交者:
GitHub
12月 25, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
delete legacy dygraph code in python/paddle/optimizer (#49308)
上级
983ae1d7
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
348 addition
and
600 deletion
+348
-600
python/paddle/optimizer/adadelta.py
python/paddle/optimizer/adadelta.py
+23
-23
python/paddle/optimizer/adam.py
python/paddle/optimizer/adam.py
+64
-134
python/paddle/optimizer/adamax.py
python/paddle/optimizer/adamax.py
+32
-51
python/paddle/optimizer/adamw.py
python/paddle/optimizer/adamw.py
+83
-119
python/paddle/optimizer/lamb.py
python/paddle/optimizer/lamb.py
+41
-69
python/paddle/optimizer/lr.py
python/paddle/optimizer/lr.py
+1
-10
python/paddle/optimizer/momentum.py
python/paddle/optimizer/momentum.py
+51
-105
python/paddle/optimizer/optimizer.py
python/paddle/optimizer/optimizer.py
+27
-53
python/paddle/optimizer/sgd.py
python/paddle/optimizer/sgd.py
+26
-36
未找到文件。
python/paddle/optimizer/adadelta.py
浏览文件 @
6c9fa665
...
@@ -170,7 +170,7 @@ class Adadelta(Optimizer):
...
@@ -170,7 +170,7 @@ class Adadelta(Optimizer):
self
.
_epsilon
,
self
.
_epsilon
,
)
)
return
None
return
None
else
:
if
not
isinstance
(
block
,
framework
.
Block
):
if
not
isinstance
(
block
,
framework
.
Block
):
raise
TypeError
(
"block is not instance of framework.Block."
)
raise
TypeError
(
"block is not instance of framework.Block."
)
...
...
python/paddle/optimizer/adam.py
浏览文件 @
6c9fa665
...
@@ -16,7 +16,7 @@ import warnings
...
@@ -16,7 +16,7 @@ import warnings
from
collections
import
defaultdict
from
collections
import
defaultdict
import
paddle
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
from
paddle
import
_C_ops
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.dygraph
import
base
as
imperative_base
...
@@ -393,50 +393,7 @@ class Adam(Optimizer):
...
@@ -393,50 +393,7 @@ class Adam(Optimizer):
)
)
return
None
return
None
else
:
if
framework
.
_in_legacy_dygraph
():
_beta1
=
(
self
.
_beta1
if
not
isinstance
(
self
.
_beta1
,
Variable
)
else
self
.
_beta1
.
numpy
().
item
(
0
)
)
_beta2
=
(
self
.
_beta2
if
not
isinstance
(
self
.
_beta2
,
Variable
)
else
self
.
_beta2
.
numpy
().
item
(
0
)
)
_
,
_
,
_
,
_
,
_
,
_
=
_legacy_C_ops
.
adam
(
param_and_grad
[
0
],
param_and_grad
[
1
],
lr
,
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
param_and_grad
[
0
],
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
'epsilon'
,
self
.
_epsilon
,
'lazy_mode'
,
self
.
_lazy_mode
,
'min_row_size_to_use_multithread'
,
1000
,
'beta1'
,
_beta1
,
'beta2'
,
_beta2
,
'multi_precision'
,
find_master
,
)
return
None
inputs
=
{
inputs
=
{
"Param"
:
[
param_and_grad
[
0
]],
"Param"
:
[
param_and_grad
[
0
]],
"Grad"
:
[
param_and_grad
[
1
]],
"Grad"
:
[
param_and_grad
[
1
]],
...
@@ -729,15 +686,13 @@ class Adam(Optimizer):
...
@@ -729,15 +686,13 @@ class Adam(Optimizer):
else
self
.
_beta2
.
numpy
().
item
(
0
)
else
self
.
_beta2
.
numpy
().
item
(
0
)
)
)
if
framework
.
_non_static
_mode
():
if
framework
.
in_dygraph
_mode
():
master_weight
=
self
.
_master_weight_dict
[
key
]
master_weight
=
self
.
_master_weight_dict
[
key
]
master_weight
=
(
master_weight
=
(
master_weight
[
param_group_idx
]
master_weight
[
param_group_idx
]
if
master_weight
is
not
None
if
master_weight
is
not
None
else
None
else
None
)
)
if
in_dygraph_mode
():
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
merged_adam_
(
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
merged_adam_
(
self
.
_param_dict
[
key
][
param_group_idx
],
self
.
_param_dict
[
key
][
param_group_idx
],
grad_dict
[
key
],
grad_dict
[
key
],
...
@@ -753,31 +708,6 @@ class Adam(Optimizer):
...
@@ -753,31 +708,6 @@ class Adam(Optimizer):
find_master
,
find_master
,
False
,
False
,
)
)
else
:
_
,
_
,
_
,
_
,
_
,
_
=
_legacy_C_ops
.
merged_adam
(
self
.
_param_dict
[
key
][
param_group_idx
],
grad_dict
[
key
],
lr_dict
[
key
],
self
.
_moment1_dict
[
key
][
param_group_idx
],
self
.
_moment2_dict
[
key
][
param_group_idx
],
self
.
_beta1_pow_acc_dict
[
key
][
param_group_idx
],
self
.
_beta2_pow_acc_dict
[
key
][
param_group_idx
],
master_weight
,
self
.
_param_dict
[
key
][
param_group_idx
],
self
.
_moment1_dict
[
key
][
param_group_idx
],
self
.
_moment2_dict
[
key
][
param_group_idx
],
self
.
_beta1_pow_acc_dict
[
key
][
param_group_idx
],
self
.
_beta2_pow_acc_dict
[
key
][
param_group_idx
],
master_weight
,
'epsilon'
,
self
.
_epsilon
,
'beta1'
,
_beta1
,
'beta2'
,
_beta2
,
'multi_precision'
,
find_master
,
)
else
:
else
:
inputs
=
{
inputs
=
{
"Param"
:
self
.
_param_dict
[
key
][
param_group_idx
],
"Param"
:
self
.
_param_dict
[
key
][
param_group_idx
],
...
...
python/paddle/optimizer/adamax.py
浏览文件 @
6c9fa665
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
paddle
import
_C_ops
,
_legacy_C_ops
from
paddle
import
_C_ops
from
..fluid
import
framework
from
..fluid
import
framework
from
..fluid.dygraph
import
no_grad
from
..fluid.dygraph
import
no_grad
...
@@ -210,24 +210,6 @@ class Adamax(Optimizer):
...
@@ -210,24 +210,6 @@ class Adamax(Optimizer):
self
.
_beta2
,
self
.
_beta2
,
self
.
_epsilon
,
self
.
_epsilon
,
)
)
elif
framework
.
_in_legacy_dygraph
():
_legacy_C_ops
.
adamax
(
param_and_grad
[
0
],
param_and_grad
[
1
],
self
.
_create_param_lr
(
param_and_grad
),
moment
,
inf_norm
,
beta1_pow_acc
,
param_and_grad
[
0
],
moment
,
inf_norm
,
"beta1"
,
self
.
_beta1
,
"beta2"
,
self
.
_beta2
,
"epsilon"
,
self
.
_epsilon
,
)
else
:
else
:
# create the adamax optimize op
# create the adamax optimize op
adamax_op
=
block
.
append_op
(
adamax_op
=
block
.
append_op
(
...
@@ -271,7 +253,7 @@ class Adamax(Optimizer):
...
@@ -271,7 +253,7 @@ class Adamax(Optimizer):
beta1_pow_acc
,
self
.
_beta1
,
0.0
,
True
beta1_pow_acc
,
self
.
_beta1
,
0.0
,
True
)
)
beta1_pow_acc
.
copy_
(
tmp
,
False
)
beta1_pow_acc
.
copy_
(
tmp
,
False
)
continue
else
:
with
param
.
block
.
program
.
_optimized_guard
(
with
param
.
block
.
program
.
_optimized_guard
(
[
param
,
grad
]
[
param
,
grad
]
),
name_scope
(
'adamax'
):
),
name_scope
(
'adamax'
):
...
@@ -301,8 +283,7 @@ class Adamax(Optimizer):
...
@@ -301,8 +283,7 @@ class Adamax(Optimizer):
beta1_pow_acc
,
self
.
_beta1
,
0.0
,
True
beta1_pow_acc
,
self
.
_beta1
,
0.0
,
True
)
)
beta1_pow_acc
.
copy_
(
tmp
,
False
)
beta1_pow_acc
.
copy_
(
tmp
,
False
)
continue
else
:
with
param
.
block
.
program
.
_optimized_guard
(
with
param
.
block
.
program
.
_optimized_guard
(
[
param
,
grad
]
[
param
,
grad
]
),
name_scope
(
'adamax'
):
),
name_scope
(
'adamax'
):
...
...
python/paddle/optimizer/adamw.py
浏览文件 @
6c9fa665
...
@@ -18,7 +18,7 @@ from collections.abc import Callable
...
@@ -18,7 +18,7 @@ from collections.abc import Callable
import
paddle
import
paddle
from
..
import
_C_ops
,
_legacy_C_ops
from
..
import
_C_ops
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid.clip
import
GradientClipBase
from
..fluid.clip
import
GradientClipBase
from
..fluid.dygraph
import
base
as
imperative_base
from
..fluid.dygraph
import
base
as
imperative_base
...
@@ -473,7 +473,7 @@ class AdamW(Optimizer):
...
@@ -473,7 +473,7 @@ class AdamW(Optimizer):
lr
=
self
.
_create_param_lr
(
param_and_grad
)
lr
=
self
.
_create_param_lr
(
param_and_grad
)
# create the adamw optimize op
# create the adamw optimize op
if
framework
.
_non_static
_mode
():
if
framework
.
in_dygraph
_mode
():
lr_ratio_
=
(
lr_ratio_
=
(
1.0
1.0
if
self
.
_lr_ratio
is
None
if
self
.
_lr_ratio
is
None
...
@@ -491,7 +491,6 @@ class AdamW(Optimizer):
...
@@ -491,7 +491,6 @@ class AdamW(Optimizer):
else
self
.
_beta2
.
numpy
().
item
(
0
)
else
self
.
_beta2
.
numpy
().
item
(
0
)
)
)
if
framework
.
in_dygraph_mode
():
found_inf
=
self
.
_get_auxiliary_var
(
'found_inf'
)
found_inf
=
self
.
_get_auxiliary_var
(
'found_inf'
)
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
adamw_
(
_
,
_
,
_
,
_
,
_
,
_
=
_C_ops
.
adamw_
(
param_and_grad
[
0
],
param_and_grad
[
0
],
...
@@ -514,43 +513,8 @@ class AdamW(Optimizer):
...
@@ -514,43 +513,8 @@ class AdamW(Optimizer):
find_master
,
find_master
,
False
,
False
,
)
)
else
:
_
,
_
,
_
,
_
,
_
,
_
=
_legacy_C_ops
.
adamw
(
param_and_grad
[
0
],
param_and_grad
[
1
],
lr
,
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
param_and_grad
[
0
],
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
'epsilon'
,
self
.
_epsilon
,
'lazy_mode'
,
self
.
_lazy_mode
,
'min_row_size_to_use_multithread'
,
1000
,
'beta1'
,
_beta1
,
'beta2'
,
_beta2
,
"with_decay"
,
with_decay
,
'coeff'
,
self
.
_weight_decay
,
'multi_precision'
,
find_master
,
'lr_ratio'
,
lr_ratio_
,
)
return
None
return
None
else
:
inputs
=
{
inputs
=
{
"Param"
:
[
param_and_grad
[
0
]],
"Param"
:
[
param_and_grad
[
0
]],
"Grad"
:
[
param_and_grad
[
1
]],
"Grad"
:
[
param_and_grad
[
1
]],
...
...
python/paddle/optimizer/lamb.py
浏览文件 @
6c9fa665
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
# limitations under the License.
# limitations under the License.
import
paddle
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
from
paddle
import
_C_ops
from
paddle.fluid.executor
import
global_scope
from
paddle.fluid.executor
import
global_scope
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid
import
core
,
framework
,
unique_name
...
@@ -313,35 +313,7 @@ class Lamb(Optimizer):
...
@@ -313,35 +313,7 @@ class Lamb(Optimizer):
find_master
,
find_master
,
)
)
return
None
return
None
if
framework
.
_non_static_mode
():
else
:
_legacy_C_ops
.
lamb
(
param_and_grad
[
0
],
param_and_grad
[
1
],
lr
,
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
param_and_grad
[
0
],
moment1
,
moment2
,
beta1_pow_acc
,
beta2_pow_acc
,
master_weight
,
'beta1'
,
self
.
_beta1
,
'beta2'
,
self
.
_beta2
,
'epsilon'
,
self
.
_epsilon
,
'weight_decay'
,
weight_decay
,
'multi_precision'
,
find_master
,
)
return
None
# create the lamb optimize op
# create the lamb optimize op
inputs
=
{
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Param"
:
param_and_grad
[
0
],
...
...
python/paddle/optimizer/lr.py
浏览文件 @
6c9fa665
...
@@ -20,8 +20,6 @@ import numpy
...
@@ -20,8 +20,6 @@ import numpy
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle
import
Tensor
from
paddle
import
Tensor
from
..fluid.framework
import
_in_legacy_dygraph
__all__
=
[
# noqa
__all__
=
[
# noqa
'LRScheduler'
,
'LRScheduler'
,
'NoamDecay'
,
'NoamDecay'
,
...
@@ -1395,15 +1393,8 @@ class ReduceOnPlateau(LRScheduler):
...
@@ -1395,15 +1393,8 @@ class ReduceOnPlateau(LRScheduler):
else
:
else
:
self
.
last_epoch
=
epoch
self
.
last_epoch
=
epoch
if
not
_in_legacy_dygraph
():
tmp
=
core
.
eager
.
Tensor
else
:
# need to declarate explicitly
from
paddle.framework
import
VarBase
as
Tensor
tmp
=
Tensor
# loss must be float, numpy.ndarray or 1-D Tensor with shape [1]
# loss must be float, numpy.ndarray or 1-D Tensor with shape [1]
if
isinstance
(
metrics
,
(
tmp
,
numpy
.
ndarray
)):
if
isinstance
(
metrics
,
(
core
.
eager
.
Tensor
,
numpy
.
ndarray
)):
assert
len
(
metrics
.
shape
)
==
1
and
metrics
.
shape
[
0
]
==
1
,
(
assert
len
(
metrics
.
shape
)
==
1
and
metrics
.
shape
[
0
]
==
1
,
(
"the metrics.shape "
"the metrics.shape "
"should be (1L,), but the current metrics.shape is {}. Maybe that "
"should be (1L,), but the current metrics.shape is {}. Maybe that "
...
...
python/paddle/optimizer/momentum.py
浏览文件 @
6c9fa665
...
@@ -15,8 +15,8 @@
...
@@ -15,8 +15,8 @@
import
warnings
import
warnings
import
paddle
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
from
paddle
import
_C_ops
from
paddle.fluid.framework
import
_in_legacy_dygraph
,
in_dygraph_mode
from
paddle.fluid.framework
import
in_dygraph_mode
from
paddle.fluid.regularizer
import
L2DecayRegularizer
from
paddle.fluid.regularizer
import
L2DecayRegularizer
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid
import
core
,
framework
,
unique_name
...
@@ -333,30 +333,6 @@ class Momentum(Optimizer):
...
@@ -333,30 +333,6 @@ class Momentum(Optimizer):
else
None
else
None
)
)
if
_in_legacy_dygraph
():
if
isinstance
(
param_and_grad
,
dict
):
self
.
_update_regularization
(
param_and_grad
[
'weight_decay'
])
_
,
_
,
_
=
_legacy_C_ops
.
momentum
(
param_and_grad
[
0
],
param_and_grad
[
1
],
velocity_acc
,
lr
,
master_weight
,
param_and_grad
[
0
],
velocity_acc
,
master_weight
,
'mu'
,
self
.
_momentum
,
'use_nesterov'
,
self
.
_use_nesterov
,
'regularization_method'
,
regularization_method
,
'regularization_coeff'
,
regularization_coeff
,
'multi_precision'
,
find_master
,
)
return
None
if
in_dygraph_mode
():
if
in_dygraph_mode
():
if
isinstance
(
param_and_grad
,
dict
):
if
isinstance
(
param_and_grad
,
dict
):
self
.
_update_regularization
(
param_and_grad
[
'weight_decay'
])
self
.
_update_regularization
(
param_and_grad
[
'weight_decay'
])
...
@@ -373,7 +349,7 @@ class Momentum(Optimizer):
...
@@ -373,7 +349,7 @@ class Momentum(Optimizer):
find_master
,
find_master
,
self
.
_rescale_grad
,
self
.
_rescale_grad
,
)
)
else
:
attrs
=
{
attrs
=
{
"mu"
:
self
.
_momentum
,
"mu"
:
self
.
_momentum
,
"use_nesterov"
:
self
.
_use_nesterov
,
"use_nesterov"
:
self
.
_use_nesterov
,
...
@@ -553,7 +529,6 @@ class Momentum(Optimizer):
...
@@ -553,7 +529,6 @@ class Momentum(Optimizer):
else
None
else
None
)
)
if
framework
.
_non_static_mode
():
if
in_dygraph_mode
():
if
in_dygraph_mode
():
_
,
_
,
_
=
_C_ops
.
merged_momentum_
(
_
,
_
,
_
=
_C_ops
.
merged_momentum_
(
self
.
_param_dict
[
key
][
param_group_idx
],
self
.
_param_dict
[
key
][
param_group_idx
],
...
@@ -563,40 +538,11 @@ class Momentum(Optimizer):
...
@@ -563,40 +538,11 @@ class Momentum(Optimizer):
master_weight
,
master_weight
,
self
.
_momentum
,
self
.
_momentum
,
self
.
_use_nesterov
,
self
.
_use_nesterov
,
self
.
_regularization_method_dict
[
key
][
self
.
_regularization_method_dict
[
key
][
param_group_idx
],
param_group_idx
self
.
_regularization_coeff_dict
[
key
][
param_group_idx
],
],
self
.
_regularization_coeff_dict
[
key
][
param_group_idx
],
find_master
,
find_master
,
self
.
_rescale_grad
,
self
.
_rescale_grad
,
)
)
else
:
_
,
_
,
_
=
_legacy_C_ops
.
merged_momentum
(
self
.
_param_dict
[
key
][
param_group_idx
],
grad_dict
[
key
],
self
.
_velocity_dict
[
key
][
param_group_idx
],
lr_dict
[
key
],
master_weight
,
self
.
_param_dict
[
key
][
param_group_idx
],
self
.
_velocity_dict
[
key
][
param_group_idx
],
master_weight
,
'mu'
,
self
.
_momentum
,
'use_nesterov'
,
self
.
_use_nesterov
,
'regularization_method'
,
self
.
_regularization_method_dict
[
key
][
param_group_idx
],
'regularization_coeff'
,
self
.
_regularization_coeff_dict
[
key
][
param_group_idx
],
'multi_precision'
,
find_master
,
)
else
:
else
:
inputs
=
{
inputs
=
{
"Param"
:
self
.
_param_dict
[
key
][
param_group_idx
],
"Param"
:
self
.
_param_dict
[
key
][
param_group_idx
],
...
...
python/paddle/optimizer/optimizer.py
浏览文件 @
6c9fa665
...
@@ -18,13 +18,12 @@ from collections import defaultdict
...
@@ -18,13 +18,12 @@ from collections import defaultdict
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
from
paddle
import
_C_ops
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
(
from
paddle.fluid.framework
import
(
Variable
,
Variable
,
_current_expected_place
,
_current_expected_place
,
_in_eager_without_dygraph_check
,
_in_eager_without_dygraph_check
,
_in_legacy_dygraph
,
default_main_program
,
default_main_program
,
device_guard
,
device_guard
,
in_dygraph_mode
,
in_dygraph_mode
,
...
@@ -534,17 +533,6 @@ class Optimizer:
...
@@ -534,17 +533,6 @@ class Optimizer:
current_lr
.
dtype
,
current_lr
.
dtype
,
place
,
place
,
)
)
elif
_in_legacy_dygraph
():
_legacy_C_ops
.
fill_constant
(
current_lr
,
'value'
,
float
(
value
),
'dtype'
,
current_lr
.
dtype
,
'shape'
,
list
(
current_lr
.
shape
),
)
else
:
else
:
global_block
=
framework
.
default_main_program
().
global_block
()
global_block
=
framework
.
default_main_program
().
global_block
()
global_block
.
append_op
(
global_block
.
append_op
(
...
@@ -1042,10 +1030,9 @@ class Optimizer:
...
@@ -1042,10 +1030,9 @@ class Optimizer:
if
self
.
_dtype
is
None
:
if
self
.
_dtype
is
None
:
self
.
_dtype
=
loss
.
dtype
self
.
_dtype
=
loss
.
dtype
if
framework
.
_non_static
_mode
():
if
framework
.
in_dygraph
_mode
():
parameter_list
=
parameters
if
parameters
else
self
.
_parameter_list
parameter_list
=
parameters
if
parameters
else
self
.
_parameter_list
if
framework
.
in_dygraph_mode
():
# It is very time-consuming to call c++ functions in a loop on the python side.
# It is very time-consuming to call c++ functions in a loop on the python side.
# We put this part of the code on the c++ side to improve the speed in eager mode.
# We put this part of the code on the c++ side to improve the speed in eager mode.
params_grads
=
[]
params_grads
=
[]
...
@@ -1053,17 +1040,6 @@ class Optimizer:
...
@@ -1053,17 +1040,6 @@ class Optimizer:
for
index
,
grad
in
enumerate
(
grads
):
for
index
,
grad
in
enumerate
(
grads
):
if
grad
is
not
None
:
if
grad
is
not
None
:
params_grads
.
append
((
parameter_list
[
index
],
grad
))
params_grads
.
append
((
parameter_list
[
index
],
grad
))
else
:
# Keep the original code to support legacy mode.
# Delete the else branch when the legacy mode exits.
params_grads
=
[]
for
param
in
parameter_list
:
if
param
.
stop_gradient
:
continue
if
param
.
_grad_ivar
()
is
not
None
:
# create gradient tensor
grad_var
=
param
.
_grad_ivar
()
params_grads
.
append
((
param
,
grad_var
))
else
:
else
:
if
callbacks
is
None
:
if
callbacks
is
None
:
callbacks
=
[
error_clip_callback
]
callbacks
=
[
error_clip_callback
]
...
@@ -1207,9 +1183,7 @@ class Optimizer:
...
@@ -1207,9 +1183,7 @@ class Optimizer:
if
framework
.
in_dygraph_mode
():
if
framework
.
in_dygraph_mode
():
return
_C_ops
.
add_n
([
grad
,
regularization_term
])
return
_C_ops
.
add_n
([
grad
,
regularization_term
])
elif
framework
.
_in_legacy_dygraph
():
else
:
return
_legacy_C_ops
.
sum
([
grad
,
regularization_term
])
new_grad
=
grad
new_grad
=
grad
if
grad
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
if
grad
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
# FIXME(zcd): If the grad is SELECTED_ROWS, after regularization,
# FIXME(zcd): If the grad is SELECTED_ROWS, after regularization,
...
...
python/paddle/optimizer/sgd.py
浏览文件 @
6c9fa665
...
@@ -15,11 +15,11 @@
...
@@ -15,11 +15,11 @@
import
warnings
import
warnings
import
paddle
import
paddle
from
paddle
import
_C_ops
,
_legacy_C_ops
from
paddle
import
_C_ops
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid
import
core
,
framework
,
unique_name
from
..fluid.dygraph
import
no_grad
from
..fluid.dygraph
import
no_grad
from
..fluid.framework
import
_in_legacy_dygraph
,
in_dygraph_mode
from
..fluid.framework
import
in_dygraph_mode
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.layer_helper
import
LayerHelper
from
.optimizer
import
Optimizer
from
.optimizer
import
Optimizer
...
@@ -166,17 +166,7 @@ class SGD(Optimizer):
...
@@ -166,17 +166,7 @@ class SGD(Optimizer):
find_master
,
find_master
,
)
)
return
None
return
None
if
_in_legacy_dygraph
():
else
:
_legacy_C_ops
.
sgd
(
param_and_grad
[
0
],
lr
,
param_and_grad
[
1
],
master_weight
,
param_and_grad
[
0
],
master_weight
,
)
return
None
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
# create the optimize op
# create the optimize op
inputs
=
{
inputs
=
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录