Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
62fd3209
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
62fd3209
编写于
3月 09, 2020
作者:
W
WangXi
提交者:
GitHub
3月 09, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix dgc param regularizer, test=develop (#22888)
上级
07e13b84
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
36 addition
and
13 deletion
+36
-13
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+21
-12
python/paddle/fluid/tests/unittests/test_dgc_optimizer.py
python/paddle/fluid/tests/unittests/test_dgc_optimizer.py
+15
-1
未找到文件。
python/paddle/fluid/optimizer.py
浏览文件 @
62fd3209
...
@@ -1149,21 +1149,23 @@ class DGCMomentumOptimizer(Optimizer):
...
@@ -1149,21 +1149,23 @@ class DGCMomentumOptimizer(Optimizer):
self
.
_num_trainers
=
num_trainers
self
.
_num_trainers
=
num_trainers
self
.
_clip_norm
=
local_grad_clip_norm
*
(
num_trainers
**-
0.5
)
self
.
_clip_norm
=
local_grad_clip_norm
*
(
num_trainers
**-
0.5
)
self
.
_get_dgc_regularization_param
()
self
.
regular_type
,
self
.
regular_coeff
=
self
.
_get_regularization_param
(
self
.
regularization
)
def
_get_
dgc_regularization_param
(
self
):
def
_get_
regularization_param
(
self
,
regularization
):
self
.
regular_coeff
=
0.
0
regular_type
=
0
self
.
regular_type
=
0
regular_coeff
=
0.
0
if
self
.
regularization
is
not
None
:
if
regularization
is
not
None
:
self
.
regular_coeff
=
self
.
regularization
.
_regularization_coeff
regular_coeff
=
regularization
.
_regularization_coeff
from
.regularizer
import
L1Decay
,
L2Decay
from
.regularizer
import
L1Decay
,
L2Decay
if
isinstance
(
self
.
regularization
,
L1Decay
):
if
isinstance
(
regularization
,
L1Decay
):
self
.
regular_type
=
1
regular_type
=
1
elif
isinstance
(
self
.
regularization
,
L2Decay
):
elif
isinstance
(
regularization
,
L2Decay
):
self
.
regular_type
=
2
regular_type
=
2
else
:
else
:
assert
False
,
'regularization must be None|L1Decay|L2Deacy'
assert
False
,
'regularization must be None|L1Decay|L2Deacy'
return
regular_type
,
regular_coeff
def
_is_use_dgc
(
self
,
param_var
,
grad_var
):
def
_is_use_dgc
(
self
,
param_var
,
grad_var
):
var_numel
=
abs
(
reduce
(
lambda
x
,
y
:
x
*
y
,
param_var
.
shape
))
var_numel
=
abs
(
reduce
(
lambda
x
,
y
:
x
*
y
,
param_var
.
shape
))
...
@@ -1364,6 +1366,13 @@ class DGCMomentumOptimizer(Optimizer):
...
@@ -1364,6 +1366,13 @@ class DGCMomentumOptimizer(Optimizer):
block
=
framework
.
default_main_program
().
global_block
()
block
=
framework
.
default_main_program
().
global_block
()
op_maker
=
core
.
op_proto_and_checker_maker
op_maker
=
core
.
op_proto_and_checker_maker
regular_type
=
self
.
regular_type
regular_coeff
=
self
.
regular_coeff
# The regularizer of the Parameters have higher priority
if
param_var
.
regularizer
is
not
None
:
regular_type
,
regular_coeff
=
self
.
_get_regularization_param
(
param_var
.
regularizer
)
dgc_op
=
block
.
append_op
(
dgc_op
=
block
.
append_op
(
type
=
"dgc"
,
type
=
"dgc"
,
inputs
=
{
inputs
=
{
...
@@ -1388,8 +1397,8 @@ class DGCMomentumOptimizer(Optimizer):
...
@@ -1388,8 +1397,8 @@ class DGCMomentumOptimizer(Optimizer):
"use_nesterov"
:
self
.
_use_nesterov
,
"use_nesterov"
:
self
.
_use_nesterov
,
"rampup_begin_step"
:
float
(
self
.
_rampup_begin_step
),
"rampup_begin_step"
:
float
(
self
.
_rampup_begin_step
),
"rampup_step"
:
float
(
self
.
_rampup_step
),
"rampup_step"
:
float
(
self
.
_rampup_step
),
"regular_coeff"
:
float
(
self
.
regular_coeff
),
"regular_coeff"
:
float
(
regular_coeff
),
"regular_type"
:
int
(
self
.
regular_type
),
"regular_type"
:
int
(
regular_type
),
},
},
stop_gradient
=
True
)
stop_gradient
=
True
)
...
...
python/paddle/fluid/tests/unittests/test_dgc_optimizer.py
浏览文件 @
62fd3209
...
@@ -44,7 +44,9 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
...
@@ -44,7 +44,9 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
shape
=
[
dims
[
0
],
dims
[
1
]],
shape
=
[
dims
[
0
],
dims
[
1
]],
lod_level
=
0
,
lod_level
=
0
,
name
=
"mul.x"
,
name
=
"mul.x"
,
optimize_attr
=
{
'learning_rate'
:
1.1
})
optimize_attr
=
{
'learning_rate'
:
1.1
},
regularizer
=
None
if
regularization
is
not
None
else
regularizer
.
L2DecayRegularizer
(
2e-4
))
mul_y
=
block
.
create_var
(
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
dtype
=
"float32"
,
shape
=
[
dims
[
1
],
dims
[
2
]],
shape
=
[
dims
[
1
],
dims
[
2
]],
...
@@ -102,6 +104,14 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
...
@@ -102,6 +104,14 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
self
.
assertEqual
(
init_ops
[
0
].
type
,
"fill_constant"
)
self
.
assertEqual
(
init_ops
[
0
].
type
,
"fill_constant"
)
self
.
assertAlmostEqual
(
init_ops
[
0
].
attr
(
'value'
),
learning_rate
)
self
.
assertAlmostEqual
(
init_ops
[
0
].
attr
(
'value'
),
learning_rate
)
# check dgc op regularization coeff
train_ops
=
program
.
global_block
().
ops
for
op
in
train_ops
:
if
op
.
type
==
"dgc"
:
coeff
=
2e-4
if
regularization
is
None
else
1e-4
self
.
assertAlmostEqual
(
op
.
attr
(
'regular_coeff'
),
coeff
)
print
(
"dgc regular_coeff="
+
str
(
coeff
))
with
open
(
"test_dgc_optimizer_"
+
name
+
".log"
,
"w"
)
as
f
:
with
open
(
"test_dgc_optimizer_"
+
name
+
".log"
,
"w"
)
as
f
:
program_to_code
(
program
,
fout
=
f
)
program_to_code
(
program
,
fout
=
f
)
...
@@ -116,6 +126,10 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
...
@@ -116,6 +126,10 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
name
=
"dgc_momentum"
,
name
=
"dgc_momentum"
,
regularization
=
regularizer
.
L2Decay
(
1e-4
))
regularization
=
regularizer
.
L2Decay
(
1e-4
))
# check param.regularizer in dgc
self
.
check_dgc_momentum_optimizer
(
dims
=
[
16
,
1024
,
8
],
name
=
"dgc_momentum"
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录