Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d8d73ff3
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d8d73ff3
编写于
3月 31, 2019
作者:
Q
Qiyang Min
提交者:
GitHub
3月 31, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15584 from velconia/imperative_lr_scheduler
Support imperative learning rate scheduler
上级
1ebd7434
64b09294
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
773 addition
and
230 deletion
+773
-230
python/paddle/fluid/dygraph/__init__.py
python/paddle/fluid/dygraph/__init__.py
+4
-0
python/paddle/fluid/dygraph/learning_rate_scheduler.py
python/paddle/fluid/dygraph/learning_rate_scheduler.py
+224
-0
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+111
-71
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+57
-20
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-2
python/paddle/fluid/tests/unittests/test_imperative_mnist.py
python/paddle/fluid/tests/unittests/test_imperative_mnist.py
+217
-0
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
...paddle/fluid/tests/unittests/test_imperative_optimizer.py
+158
-137
未找到文件。
python/paddle/fluid/dygraph/__init__.py
浏览文件 @
d8d73ff3
...
...
@@ -32,6 +32,9 @@ from .profiler import *
from
.
import
checkpoint
from
.checkpoint
import
*
from
.
import
learning_rate_scheduler
from
.learning_rate_scheduler
import
*
__all__
=
[]
__all__
+=
layers
.
__all__
__all__
+=
base
.
__all__
...
...
@@ -39,3 +42,4 @@ __all__ += nn.__all__
__all__
+=
tracer
.
__all__
__all__
+=
profiler
.
__all__
__all__
+=
checkpoint
.
__all__
__all__
+=
learning_rate_scheduler
.
__all__
python/paddle/fluid/dygraph/learning_rate_scheduler.py
0 → 100644
浏览文件 @
d8d73ff3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
math
from
..
import
unique_name
__all__
=
[
'NoamDecay'
,
'PiecewiseDecay'
,
'NaturalExpDecay'
,
'ExponentialDecay'
,
'InverseTimeDecay'
,
'PolynomialDecay'
,
'CosineDecay'
]
class
LearningRateDecay
(
object
):
"""
Base class of learning rate decay
"""
def
__init__
(
self
,
begin
=
0
,
step
=
1
,
dtype
=
'float32'
):
self
.
step_num
=
begin
self
.
step_size
=
step
self
.
dtype
=
dtype
def
__call__
(
self
):
lr
=
self
.
step
()
if
isinstance
(
lr
,
float
):
lr
=
self
.
create_lr_var
(
lr
)
self
.
step_num
+=
self
.
step_size
return
lr
def
create_lr_var
(
self
,
lr
):
from
..
import
layers
lr
=
layers
.
create_global_var
(
name
=
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
value
=
float
(
lr
),
dtype
=
self
.
dtype
,
persistable
=
True
)
return
lr
def
step
(
self
):
raise
NotImplementedError
()
class
PiecewiseDecay
(
LearningRateDecay
):
def
__init__
(
self
,
boundaries
,
values
,
begin
,
step
=
1
,
dtype
=
'float32'
):
super
(
PiecewiseDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
boundaries
=
boundaries
self
.
values
=
values
self
.
vars
=
[]
for
value
in
values
:
self
.
vars
.
append
(
self
.
create_lr_var
(
value
))
def
step
(
self
):
for
i
in
range
(
len
(
self
.
boundaries
)):
if
self
.
step_num
<
self
.
boundaries
[
i
]:
return
self
.
vars
[
i
]
return
self
.
vars
[
len
(
self
.
values
)
-
1
]
class
NaturalExpDecay
(
LearningRateDecay
):
def
__init__
(
self
,
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
,
begin
=
0
,
step
=
1
,
dtype
=
'float32'
):
super
(
NaturalExpDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
learning_rate
=
learning_rate
self
.
decay_steps
=
decay_steps
self
.
decay_rate
=
decay_rate
self
.
staircase
=
staircase
def
step
(
self
):
from
..
import
layers
div_res
=
self
.
create_lr_var
(
self
.
step_num
/
self
.
decay_steps
)
if
self
.
staircase
:
div_res
=
layers
.
floor
(
div_res
)
decayed_lr
=
self
.
learning_rate
*
layers
.
exp
(
-
1
*
self
.
decay_rate
*
div_res
)
return
decayed_lr
class
ExponentialDecay
(
LearningRateDecay
):
def
__init__
(
self
,
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
,
begin
=
0
,
step
=
1
,
dtype
=
'float32'
):
super
(
ExponentialDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
learning_rate
=
learning_rate
self
.
decay_steps
=
decay_steps
self
.
decay_rate
=
decay_rate
self
.
staircase
=
staircase
def
step
(
self
):
from
..
import
layers
div_res
=
self
.
create_lr_var
(
self
.
step_num
/
self
.
decay_steps
)
if
self
.
staircase
:
div_res
=
layers
.
floor
(
div_res
)
decayed_lr
=
self
.
learning_rate
*
(
self
.
decay_rate
**
div_res
)
return
decayed_lr
class
InverseTimeDecay
(
LearningRateDecay
):
def
__init__
(
self
,
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
,
begin
=
0
,
step
=
1
,
dtype
=
'float32'
):
super
(
InverseTimeDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
learning_rate
=
learning_rate
self
.
decay_steps
=
decay_steps
self
.
decay_rate
=
decay_rate
self
.
staircase
=
staircase
def
step
(
self
):
from
..
import
layers
div_res
=
self
.
create_lr_var
(
self
.
step_num
/
self
.
decay_steps
)
if
self
.
staircase
:
div_res
=
layers
.
floor
(
div_res
)
decayed_lr
=
self
.
learning_rate
/
(
1
+
self
.
decay_rate
*
div_res
)
return
decayed_lr
class
PolynomialDecay
(
LearningRateDecay
):
def
__init__
(
self
,
learning_rate
,
decay_steps
,
end_learning_rate
=
0.0001
,
power
=
1.0
,
cycle
=
False
,
begin
=
0
,
step
=
1
,
dtype
=
'float32'
):
super
(
PolynomialDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
learning_rate
=
learning_rate
self
.
decay_steps
=
decay_steps
self
.
end_learning_rate
=
end_learning_rate
self
.
power
=
power
self
.
cycle
=
cycle
def
step
(
self
):
from
..
import
layers
tmp_step_num
=
self
.
step_num
tmp_decay_steps
=
self
.
decay_steps
if
self
.
cycle
:
div_res
=
layers
.
ceil
(
self
.
create_lr_var
(
tmp_step_num
/
float
(
self
.
decay_steps
)))
if
tmp_step_num
==
0
:
div_res
=
self
.
create_lr_var
(
1.0
)
tmp_decay_steps
=
self
.
decay_steps
*
div_res
else
:
tmp_step_num
=
self
.
create_lr_var
(
tmp_step_num
if
tmp_step_num
<
self
.
decay_steps
else
self
.
decay_steps
)
decayed_lr
=
(
self
.
learning_rate
-
self
.
end_learning_rate
)
*
\
((
1
-
tmp_step_num
/
tmp_decay_steps
)
**
self
.
power
)
+
self
.
end_learning_rate
return
decayed_lr
class
CosineDecay
(
LearningRateDecay
):
def
__init__
(
self
,
learning_rate
,
step_each_epoch
,
epochs
,
begin
=
0
,
step
=
1
,
dtype
=
'float32'
):
super
(
CosineDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
learning_rate
=
learning_rate
self
.
step_each_epoch
=
step_each_epoch
self
.
epochs
=
epochs
def
step
(
self
):
from
..
import
layers
cur_epoch
=
layers
.
floor
(
self
.
create_lr_var
(
self
.
step_num
/
self
.
step_each_epoch
))
decayed_lr
=
self
.
learning_rate
*
0.5
*
(
layers
.
cos
(
cur_epoch
*
math
.
pi
/
self
.
epochs
)
+
1
)
return
decayed_lr
class
NoamDecay
(
LearningRateDecay
):
def
__init__
(
self
,
d_model
,
warmup_steps
,
begin
=
1
,
step
=
1
,
dtype
=
'float32'
):
super
(
NoamDecay
,
self
).
__init__
(
begin
,
step
,
dtype
)
self
.
d_model
=
d_model
self
.
warmup_steps
=
warmup_steps
def
step
(
self
):
from
..
import
layers
a
=
self
.
create_lr_var
(
self
.
step_num
**-
0.5
)
b
=
self
.
create_lr_var
((
self
.
warmup_steps
**-
1.5
)
*
self
.
step_num
)
lr_value
=
(
self
.
d_model
**-
0.5
)
*
layers
.
elementwise_min
(
a
,
b
)
return
lr_value
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
d8d73ff3
...
...
@@ -22,13 +22,16 @@ strategy according to this module.
from
__future__
import
print_function
import
math
from
.
import
control_flow
from
.
import
nn
from
.
import
ops
from
.
import
tensor
from
..initializer
import
init_on_cpu
from
..framework
import
default_main_program
,
Parameter
,
unique_name
,
name_scope
import
math
from
..dygraph
import
base
as
imperative_base
from
..dygraph
import
learning_rate_scheduler
as
imperate_lr
__all__
=
[
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
...
...
@@ -66,13 +69,17 @@ def noam_decay(d_model, warmup_steps):
The decayed learning rate.
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
(
1
)
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
NoamDecay
(
d_model
,
warmup_steps
)
return
decay
else
:
global_step
=
_decay_step_counter
(
1
)
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
return
lr_value
return
lr_value
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
...
@@ -112,14 +119,19 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
ExponentialDecay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
)
return
decay
else
:
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
return
decayed_lr
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
...
@@ -141,14 +153,19 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
The decayed learning rate
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
NaturalExpDecay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
)
return
decay
else
:
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
return
decayed_lr
return
decayed_lr
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
...
@@ -187,15 +204,20 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
sgd_optimizer.minimize(avg_cost)
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
InverseTimeDecay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
)
return
decay
else
:
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
return
decayed_lr
def
polynomial_decay
(
learning_rate
,
...
...
@@ -227,27 +249,33 @@ def polynomial_decay(learning_rate,
Variable: The decayed learning rate
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
cycle
:
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
PolynomialDecay
(
learning_rate
,
decay_steps
,
end_learning_rate
,
power
,
cycle
)
return
decay
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
global_step
=
_decay_step_counter
()
if
cycle
:
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
def
piecewise_decay
(
boundaries
,
values
):
...
...
@@ -279,34 +307,38 @@ def piecewise_decay(boundaries, values):
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
global_step
=
_decay_step_counter
()
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
PiecewiseDecay
(
boundaries
,
values
,
0
)
return
decay
else
:
global_step
=
_decay_step_counter
()
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
return
lr
return
lr
def
cosine_decay
(
learning_rate
,
step_each_epoch
,
epochs
):
...
...
@@ -336,12 +368,17 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
imperative_base
.
enabled
():
decay
=
imperate_lr
.
CosineDecay
(
learning_rate
,
step_each_epoch
,
epochs
)
return
decay
else
:
global_step
=
_decay_step_counter
()
cur_epoch
=
ops
.
floor
(
global_step
/
step_each_epoch
)
decayed_lr
=
learning_rate
*
0.5
*
(
ops
.
cos
(
cur_epoch
*
math
.
pi
/
epochs
)
+
1
)
return
decayed_lr
cur_epoch
=
ops
.
floor
(
global_step
/
step_each_epoch
)
decayed_lr
=
learning_rate
*
0.5
*
(
ops
.
cos
(
cur_epoch
*
math
.
pi
/
epochs
)
+
1
)
return
decayed_lr
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
...
...
@@ -363,6 +400,9 @@ def append_LARS(params_grads, learning_rate, weight_decay):
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
assert
not
imperative_base
.
enabled
(
),
"append_LARS is NOT supported in dygraph mode now"
def
_balanced_weight
(
param_norm
,
grad_norm
):
if
weight_decay
==
1.0
:
return
grad_norm
+
param_norm
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
d8d73ff3
...
...
@@ -30,6 +30,8 @@ from .initializer import Constant
from
.layer_helper
import
LayerHelper
from
.layers
import
ops
from
.regularizer
import
append_regularization_ops
from
.dygraph
import
base
as
imperative_base
from
.dygraph.learning_rate_scheduler
import
LearningRateDecay
from
paddle.fluid
import
core
from
paddle.fluid.layers
import
tensor
from
functools
import
reduce
...
...
@@ -53,9 +55,19 @@ class Optimizer(object):
"""
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
name
=
None
):
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
):
raise
TypeError
(
"learning rate should be float or Variable"
)
if
framework
.
_in_dygraph_mode
():
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
LearningRateDecay
):
raise
TypeError
(
"learning rate should be float or LearningRateDecay, got %s here"
%
type
(
learning_rate
))
else
:
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
):
raise
TypeError
(
"learning rate should be float or Variable, got %s here"
%
type
(
learning_rate
))
self
.
_name
=
name
self
.
regularization
=
regularization
self
.
_learning_rate
=
learning_rate
...
...
@@ -79,24 +91,49 @@ class Optimizer(object):
return
self
.
_opti_name_list
def
_create_global_learning_rate
(
self
):
lr
=
self
.
_global_learning_rate
()
if
imperative_base
.
enabled
():
# create learning rate Variable
if
isinstance
(
self
.
_learning_rate
,
float
):
lr
=
self
.
_global_learning_rate
()
if
isinstance
(
lr
,
framework
.
Variable
):
return
else
:
if
not
isinstance
(
self
.
_learning_rate
,
float
):
if
isinstance
(
lr
,
framework
.
Variable
):
return
else
:
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
layers
.
create_global_var
(
name
=
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
value
=
float
(
self
.
_learning_rate
),
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
,
persistable
=
True
)
# get learning rate Variable from LearningRateDecay
elif
isinstance
(
self
.
_learning_rate
,
LearningRateDecay
):
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
self
.
_learning_rate
()
else
:
raise
TypeError
(
"learning rate variable is create outside optimizer,"
"can not create new learning rate variable for new program"
)
"optimizer's learning rate must be float or LearningRateDecay"
)
else
:
lr
=
self
.
_global_learning_rate
()
# create learning rate in the current main program
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
layers
.
create_global_var
(
name
=
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
value
=
float
(
self
.
_learning_rate
),
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
,
persistable
=
True
)
if
isinstance
(
lr
,
framework
.
Variable
):
return
else
:
if
not
isinstance
(
self
.
_learning_rate
,
float
):
raise
TypeError
(
"learning rate variable is create outside optimizer,"
"can not create new learning rate variable for new program"
)
# create learning rate in the current main program
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
layers
.
create_global_var
(
name
=
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
value
=
float
(
self
.
_learning_rate
),
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
,
persistable
=
True
)
def
_global_learning_rate
(
self
,
program
=
None
):
"""
...
...
@@ -605,10 +642,10 @@ class DGCMomentumOptimizer(MomentumOptimizer):
DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication.
This optimizer will do two things:
1. Compress the gradient by get TopK import value from tensor
\
and use it for allreduce to reduce network bandwidth.
2. Call momentum to optimize on the cost.
Args:
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
d8d73ff3
...
...
@@ -78,7 +78,7 @@ list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
list
(
REMOVE_ITEM TEST_OPS test_bilinear_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_nearest_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_
optimizer
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_
mnist
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
...
...
@@ -89,7 +89,7 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL)
py_test_modules
(
test_nearest_interp_op MODULES test_nearest_interp_op SERIAL
)
py_test_modules
(
test_imperative_resnet MODULES test_imperative_resnet ENVS
FLAGS_cudnn_deterministic=1
)
py_test_modules
(
test_imperative_
optimizer MODULES test_imperative_optimizer
ENVS
py_test_modules
(
test_imperative_
mnist MODULES test_imperative_mnist
ENVS
FLAGS_cudnn_deterministic=1
)
if
(
WITH_DISTRIBUTE
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_mnist.py
0 → 100644
浏览文件 @
d8d73ff3
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
contextlib
import
unittest
import
numpy
as
np
import
six
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.dygraph.base
import
to_variable
from
test_imperative_base
import
new_program_scope
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_channels
,
num_filters
,
filter_size
,
pool_size
,
pool_stride
,
pool_padding
=
0
,
pool_type
=
'max'
,
global_pooling
=
False
,
conv_stride
=
1
,
conv_padding
=
0
,
conv_dilation
=
1
,
conv_groups
=
1
,
act
=
None
,
use_cudnn
=
False
,
param_attr
=
None
,
bias_attr
=
None
):
super
(
SimpleImgConvPool
,
self
).
__init__
(
name_scope
)
self
.
_conv2d
=
Conv2D
(
self
.
full_name
(),
num_channels
=
num_channels
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
conv_stride
,
padding
=
conv_padding
,
dilation
=
conv_dilation
,
groups
=
conv_groups
,
param_attr
=
None
,
bias_attr
=
None
,
use_cudnn
=
use_cudnn
)
self
.
_pool2d
=
Pool2D
(
self
.
full_name
(),
pool_size
=
pool_size
,
pool_type
=
pool_type
,
pool_stride
=
pool_stride
,
pool_padding
=
pool_padding
,
global_pooling
=
global_pooling
,
use_cudnn
=
use_cudnn
)
def
forward
(
self
,
inputs
):
x
=
self
.
_conv2d
(
inputs
)
x
=
self
.
_pool2d
(
x
)
return
x
class
MNIST
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MNIST
,
self
).
__init__
(
name_scope
)
self
.
_simple_img_conv_pool_1
=
SimpleImgConvPool
(
self
.
full_name
(),
1
,
20
,
5
,
2
,
2
,
act
=
"relu"
)
self
.
_simple_img_conv_pool_2
=
SimpleImgConvPool
(
self
.
full_name
(),
20
,
50
,
5
,
2
,
2
,
act
=
"relu"
)
pool_2_shape
=
50
*
4
*
4
SIZE
=
10
scale
=
(
2.0
/
(
pool_2_shape
**
2
*
SIZE
))
**
0.5
self
.
_fc
=
FC
(
self
.
full_name
(),
10
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)),
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_2
(
x
)
x
=
self
.
_fc
(
x
)
return
x
class
TestImperativeMnist
(
unittest
.
TestCase
):
def
test_mnist_float32
(
self
):
seed
=
90
epoch_num
=
1
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
mnist
=
MNIST
(
"mnist"
)
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
128
,
drop_last
=
True
)
dy_param_init_value
=
{}
for
epoch
in
range
(
epoch_num
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
dy_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
128
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
_stop_gradient
=
True
cost
=
mnist
(
img
)
loss
=
fluid
.
layers
.
cross_entropy
(
cost
,
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
dy_out
=
avg_loss
.
_numpy
()
if
epoch
==
0
and
batch_id
==
0
:
for
param
in
mnist
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
avg_loss
.
_backward
()
sgd
.
minimize
(
avg_loss
)
mnist
.
clear_gradients
()
dy_param_value
=
{}
for
param
in
mnist
.
parameters
():
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
with
new_program_scope
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
mnist
=
MNIST
(
"mnist"
)
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
128
,
drop_last
=
True
)
img
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
mnist
(
img
)
loss
=
fluid
.
layers
.
cross_entropy
(
cost
,
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
sgd
.
minimize
(
avg_loss
)
# initialize params and fetch them
static_param_init_value
=
{}
static_param_name_list
=
[]
for
param
in
mnist
.
parameters
():
static_param_name_list
.
append
(
param
.
name
)
out
=
exe
.
run
(
fluid
.
default_startup_program
(),
fetch_list
=
static_param_name_list
)
for
i
in
range
(
len
(
static_param_name_list
)):
static_param_init_value
[
static_param_name_list
[
i
]]
=
out
[
i
]
for
epoch
in
range
(
epoch_num
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
static_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
([
128
,
1
])
fetch_list
=
[
avg_loss
.
name
]
fetch_list
.
extend
(
static_param_name_list
)
out
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"pixel"
:
static_x_data
,
"label"
:
y_data
},
fetch_list
=
fetch_list
)
static_param_value
=
{}
static_out
=
out
[
0
]
for
i
in
range
(
1
,
len
(
out
)):
static_param_value
[
static_param_name_list
[
i
-
1
]]
=
out
[
i
]
self
.
assertTrue
(
np
.
allclose
(
dy_x_data
.
all
(),
static_x_data
.
all
()))
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init_value
[
key
]))
self
.
assertTrue
(
np
.
allclose
(
static_out
,
dy_out
))
for
key
,
value
in
six
.
iteritems
(
static_param_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_value
[
key
],
atol
=
1e-5
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
浏览文件 @
d8d73ff3
...
...
@@ -22,131 +22,71 @@ import six
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.dygraph.nn
import
Conv2D
,
Pool2D
,
FC
from
paddle.fluid.optimizer
import
SGDOptimizer
,
Adam
from
paddle.fluid.dygraph.nn
import
FC
from
paddle.fluid.dygraph.base
import
to_variable
from
test_imperative_base
import
new_program_scope
class
SimpleImgConvPool
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
num_channels
,
num_filters
,
filter_size
,
pool_size
,
pool_stride
,
pool_padding
=
0
,
pool_type
=
'max'
,
global_pooling
=
False
,
conv_stride
=
1
,
conv_padding
=
0
,
conv_dilation
=
1
,
conv_groups
=
1
,
act
=
None
,
use_cudnn
=
False
,
param_attr
=
None
,
bias_attr
=
None
):
super
(
SimpleImgConvPool
,
self
).
__init__
(
name_scope
)
self
.
_conv2d
=
Conv2D
(
self
.
full_name
(),
num_channels
=
num_channels
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
conv_stride
,
padding
=
conv_padding
,
dilation
=
conv_dilation
,
groups
=
conv_groups
,
param_attr
=
None
,
bias_attr
=
None
,
use_cudnn
=
use_cudnn
)
self
.
_pool2d
=
Pool2D
(
self
.
full_name
(),
pool_size
=
pool_size
,
pool_type
=
pool_type
,
pool_stride
=
pool_stride
,
pool_padding
=
pool_padding
,
global_pooling
=
global_pooling
,
use_cudnn
=
use_cudnn
)
class
MLP
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
,
param_attr
=
None
,
bias_attr
=
None
):
super
(
MLP
,
self
).
__init__
(
name_scope
)
def
forward
(
self
,
inputs
):
x
=
self
.
_conv2d
(
inputs
)
x
=
self
.
_pool2d
(
x
)
return
x
class
MNIST
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
name_scope
):
super
(
MNIST
,
self
).
__init__
(
name_scope
)
self
.
_fc1
=
FC
(
self
.
full_name
(),
10
)
self
.
_fc2
=
FC
(
self
.
full_name
(),
10
)
self
.
_simple_img_conv_pool_1
=
SimpleImgConvPool
(
self
.
full_name
(),
1
,
20
,
5
,
2
,
2
,
act
=
"relu"
)
self
.
_simple_img_conv_pool_2
=
SimpleImgConvPool
(
self
.
full_name
(),
20
,
50
,
5
,
2
,
2
,
act
=
"relu"
)
def
forward
(
self
,
inputs
):
y
=
self
.
_fc1
(
inputs
)
y
=
self
.
_fc2
(
y
)
return
y
pool_2_shape
=
50
*
4
*
4
SIZE
=
10
scale
=
(
2.0
/
(
pool_2_shape
**
2
*
SIZE
))
**
0.5
self
.
_fc
=
FC
(
self
.
full_name
(),
10
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)),
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_2
(
x
)
x
=
self
.
_fc
(
x
)
return
x
class
TestImperativeOptimizerBase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
batch_num
=
20
def
get_optimizer
(
self
):
raise
NotImplementedError
()
class
TestDygraphMnist
(
unittest
.
TestCase
):
def
test_mnist_float32
(
self
):
def
_check_mlp
(
self
):
seed
=
90
epoch_num
=
1
with
fluid
.
dygraph
.
guard
():
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
m
nist
=
MNIST
(
"mnist"
)
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
m
lp
=
MLP
(
'mlp'
)
optimizer
=
self
.
get_optimizer
(
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
128
,
drop_last
=
True
)
dy_param_init_value
=
{}
for
epoch
in
range
(
epoch_num
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
dy_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
128
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
_stop_gradient
=
True
cost
=
mnist
(
img
)
loss
=
fluid
.
layers
.
cross_entropy
(
cost
,
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
dy_out
=
avg_loss
.
_numpy
()
if
epoch
==
0
and
batch_id
==
0
:
for
param
in
mnist
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
avg_loss
.
_backward
()
sgd
.
minimize
(
avg_loss
)
mnist
.
clear_gradients
()
dy_param_value
=
{}
for
param
in
mnist
.
parameters
():
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
batch_id
>=
self
.
batch_num
:
break
dy_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
128
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
_stop_gradient
=
True
cost
=
mlp
(
img
)
avg_loss
=
fluid
.
layers
.
reduce_mean
(
cost
)
dy_out
=
avg_loss
.
_numpy
()
if
batch_id
==
0
:
for
param
in
mlp
.
parameters
():
dy_param_init_value
[
param
.
name
]
=
param
.
_numpy
()
avg_loss
.
_backward
()
optimizer
.
minimize
(
avg_loss
)
mlp
.
clear_gradients
()
dy_param_value
=
{}
for
param
in
mlp
.
parameters
():
dy_param_value
[
param
.
name
]
=
param
.
_numpy
()
with
new_program_scope
():
fluid
.
default_startup_program
().
random_seed
=
seed
...
...
@@ -155,23 +95,22 @@ class TestDygraphMnist(unittest.TestCase):
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
m
nist
=
MNIST
(
"mnist"
)
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
m
lp
=
MLP
(
'mlp'
)
optimizer
=
self
.
get_optimizer
(
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
128
,
drop_last
=
True
)
img
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
mnist
(
img
)
loss
=
fluid
.
layers
.
cross_entropy
(
cost
,
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
sgd
.
minimize
(
avg_loss
)
cost
=
mlp
(
img
)
avg_loss
=
fluid
.
layers
.
reduce_mean
(
cost
)
optimizer
.
minimize
(
avg_loss
)
# initialize params and fetch them
static_param_init_value
=
{}
static_param_name_list
=
[]
for
param
in
m
nist
.
parameters
():
for
param
in
m
lp
.
parameters
():
static_param_name_list
.
append
(
param
.
name
)
out
=
exe
.
run
(
fluid
.
default_startup_program
(),
...
...
@@ -180,29 +119,26 @@ class TestDygraphMnist(unittest.TestCase):
for
i
in
range
(
len
(
static_param_name_list
)):
static_param_init_value
[
static_param_name_list
[
i
]]
=
out
[
i
]
for
epoch
in
range
(
epoch_num
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
static_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
([
128
,
1
])
fetch_list
=
[
avg_loss
.
name
]
fetch_list
.
extend
(
static_param_name_list
)
out
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"pixel"
:
static_x_data
,
"label"
:
y_data
},
fetch_list
=
fetch_list
)
static_param_value
=
{}
static_out
=
out
[
0
]
for
i
in
range
(
1
,
len
(
out
)):
static_param_value
[
static_param_name_list
[
i
-
1
]]
=
out
[
i
]
self
.
assertTrue
(
np
.
allclose
(
dy_x_data
.
all
(),
static_x_data
.
all
()))
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
batch_id
>=
self
.
batch_num
:
break
static_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
[
128
,
1
])
fetch_list
=
[
avg_loss
.
name
]
fetch_list
.
extend
(
static_param_name_list
)
out
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"pixel"
:
static_x_data
,
"label"
:
y_data
},
fetch_list
=
fetch_list
)
static_param_value
=
{}
static_out
=
out
[
0
]
for
i
in
range
(
1
,
len
(
out
)):
static_param_value
[
static_param_name_list
[
i
-
1
]]
=
out
[
i
]
for
key
,
value
in
six
.
iteritems
(
static_param_init_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init_value
[
key
]))
...
...
@@ -210,7 +146,92 @@ class TestDygraphMnist(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
static_out
,
dy_out
))
for
key
,
value
in
six
.
iteritems
(
static_param_value
):
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_value
[
key
],
atol
=
1e-5
))
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_value
[
key
]))
class
TestImperativeOptimizerPiecewiseDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
bd
=
[
3
,
6
,
9
]
optimizer
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
[
0.1
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]))
return
optimizer
def
test_sgd
(
self
):
self
.
_check_mlp
()
class
TestImperativeOptimizerNaturalExpDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
optimizer
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
natural_exp_decay
(
learning_rate
=
0.1
,
decay_steps
=
10000
,
decay_rate
=
0.5
,
staircase
=
True
))
return
optimizer
def
test_sgd
(
self
):
self
.
_check_mlp
()
class
TestImperativeOptimizerExponentialDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
optimizer
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
exponential_decay
(
learning_rate
=
0.1
,
decay_steps
=
10000
,
decay_rate
=
0.5
,
staircase
=
True
))
return
optimizer
def
test_sgd
(
self
):
self
.
_check_mlp
()
class
TestImperativeOptimizerInverseTimeDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
optimizer
=
Adam
(
learning_rate
=
fluid
.
layers
.
inverse_time_decay
(
learning_rate
=
0.1
,
decay_steps
=
10000
,
decay_rate
=
0.5
,
staircase
=
True
))
return
optimizer
def
test_adam
(
self
):
self
.
_check_mlp
()
class
TestImperativeOptimizerPolynomialDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
optimizer
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
polynomial_decay
(
learning_rate
=
0.1
,
decay_steps
=
5
,
cycle
=
self
.
cycle
))
return
optimizer
def
test_sgd_cycle
(
self
):
self
.
cycle
=
True
self
.
_check_mlp
()
def
test_sgd
(
self
):
self
.
cycle
=
False
self
.
_check_mlp
()
class
TestImperativeOptimizerCosineDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
optimizer
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
cosine_decay
(
learning_rate
=
0.1
,
step_each_epoch
=
10000
,
epochs
=
120
))
return
optimizer
def
test_sgd
(
self
):
self
.
_check_mlp
()
class
TestImperativeOptimizerNoamDecay
(
TestImperativeOptimizerBase
):
def
get_optimizer
(
self
):
optimizer
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
noam_decay
(
d_model
=
512
,
warmup_steps
=
8000
))
return
optimizer
def
test_sgd
(
self
):
self
.
_check_mlp
()
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录