Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0e1e098c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0e1e098c
编写于
9月 28, 2020
作者:
W
WangXi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add lams lamb, test=develop
上级
8dd3d4b6
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
157 addition
and
174 deletion
+157
-174
python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
...paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
+1
-1
python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
...addle/distributed/fleet/meta_optimizers/lamb_optimizer.py
+4
-0
python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
...addle/distributed/fleet/meta_optimizers/lars_optimizer.py
+4
-0
python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
...paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py
+43
-55
python/paddle/fluid/tests/unittests/test_dgc_optimizer.py
python/paddle/fluid/tests/unittests/test_dgc_optimizer.py
+13
-4
python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py
...le/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py
+21
-32
python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py
...le/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py
+27
-53
python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py
...id/tests/unittests/test_fleet_recompute_meta_optimizer.py
+44
-29
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -46,7 +46,7 @@ class AMPOptimizer(MetaOptimizerBase):
custom_white_list
=
set
(
config
[
'custom_white_list'
])
custom_black_list
=
set
(
config
[
'custom_black_list'
])
custom_black_varnames
=
set
(
config
[
'custom_black_varnames'
])
self
.
amp_lists
=
mixed_precision
.
AutoMixedPrecisionLists
(
amp_lists
=
mixed_precision
.
AutoMixedPrecisionLists
(
custom_white_list
,
custom_black_list
,
custom_black_varnames
)
self
.
wrapped_opt
=
mixed_precision
.
decorate
(
...
...
python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -98,6 +98,10 @@ class LambOptimizer(MetaOptimizerBase):
def
apply_gradients
(
self
,
params_grads
):
return
self
.
lamb_opt
.
apply_gradients
(
params_grads
=
params_grads
)
def
apply_optimize
(
self
,
loss
,
startup_program
,
params_grads
):
return
self
.
lamb_opt
.
apply_optimize
(
loss
,
startup_program
=
startup_program
,
params_grads
=
params_grads
)
def
minimize_impl
(
self
,
loss
,
startup_program
=
None
,
...
...
python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -85,6 +85,10 @@ class LarsOptimizer(MetaOptimizerBase):
def
apply_gradients
(
self
,
params_grads
):
return
self
.
lars_opt
.
apply_gradients
(
params_grads
=
params_grads
)
def
apply_optimize
(
self
,
loss
,
startup_program
,
params_grads
):
return
self
.
lars_opt
.
apply_optimize
(
loss
,
startup_program
=
startup_program
,
params_grads
=
params_grads
)
def
minimize_impl
(
self
,
loss
,
startup_program
=
None
,
...
...
python/paddle/fluid/tests/unittests/
test_fleet_combine_meta_optimizer
.py
→
python/paddle/fluid/tests/unittests/
fleet_meta_optimizer_base
.py
浏览文件 @
0e1e098c
...
...
@@ -19,10 +19,8 @@ import os
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
paddle
.
enable_static
()
class
TestFleetCombineOptimizer
(
unittest
.
TestCase
):
class
TestFleetMetaOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"1"
os
.
environ
[
...
...
@@ -50,19 +48,21 @@ class TestFleetCombineOptimizer(unittest.TestCase):
avg_cost
=
paddle
.
fluid
.
layers
.
mean
(
x
=
cost
)
strategy
=
paddle
.
distributed
.
fleet
.
DistributedStrategy
()
strategy
.
dgc
=
True
strategy
.
dgc_configs
=
{
"rampup_begin_step"
:
128
,
"rampup_step"
:
100
,
"sparsity"
:
[
0.996
,
0.999
]
}
return
avg_cost
,
strategy
def
optimizer
(
self
,
loss
,
strategy
,
train_prog
,
startup_prog
):
def
optimizer
(
self
,
loss
,
strategy
,
train_prog
,
startup_prog
,
name
=
'momentum'
):
with
fluid
.
program_guard
(
train_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
if
name
==
'momentum'
:
optimizer
=
paddle
.
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
elif
name
==
'adam'
:
optimizer
=
paddle
.
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.01
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
loss
)
...
...
@@ -70,53 +70,41 @@ class TestFleetCombineOptimizer(unittest.TestCase):
def
set_strategy
(
self
,
strategy
,
name
):
if
name
==
'amp'
:
strategy
.
amp
=
True
strategy
.
amp_configs
=
{
"init_loss_scaling"
:
32768
,
"decr_every_n_nan_or_inf"
:
2
,
"incr_every_n_steps"
:
1000
,
"incr_ratio"
:
2.0
,
"use_dynamic_loss_scaling"
:
True
,
"decr_ratio"
:
0.5
,
"custom_white_list"
:
[
'softmax'
],
"custom_black_list"
:
[
'tanh'
],
}
elif
name
==
'dgc'
:
strategy
.
dgc
=
True
strategy
.
dgc_configs
=
{
"rampup_begin_step"
:
128
,
"rampup_step"
:
100
,
"sparsity"
:
[
0.996
,
0.999
]
}
elif
name
==
'recompute'
:
strategy
.
recompute
=
True
strategy
.
recompute_configs
=
{
"checkpoints"
:
[
"fc_0.tmp_2"
,
"fc_1.tmp_2"
]
}
def
test_dgc_recompute_optimizer
(
self
):
train_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'dgc'
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
self
.
assertIn
(
'dgc'
,
ops
)
self
.
assertIn
(
'dgc_momentum'
,
ops
)
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
def
test_amp_recompute_optimizer
(
self
):
train_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'amp'
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
print
(
train_prog
)
self
.
assertIn
(
'cast'
,
ops
)
self
.
assertIn
(
'check_finite_and_unscale'
,
ops
)
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
if
__name__
==
"__main__"
:
unittest
.
main
()
elif
name
==
'lars'
:
strategy
.
lars
=
True
strategy
.
lars_configs
=
{
"lars_coeff"
:
0.001
,
"lars_weight_decay"
:
0.0005
,
"epsilon"
:
0
,
"exclude_from_weight_decay"
:
[
"batch_norm"
,
".b"
],
}
elif
name
==
'lamb'
:
strategy
.
lamb
=
True
strategy
.
lamb_configs
=
{
'lamb_weight_decay'
:
0.01
,
'exclude_from_weight_decay'
:
[],
}
else
:
raise
NotImplementedError
()
python/paddle/fluid/tests/unittests/test_dgc_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -16,12 +16,14 @@ from __future__ import print_function
import
unittest
import
paddle
import
paddle.fluid.framework
as
framework
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.regularizer
as
regularizer
import
paddle.fluid.clip
as
clip
import
paddle.compat
as
cpt
from
paddle.fluid.backward
import
append_backward
paddle
.
enable_static
()
class
TestDGCMomentumOptimizer
(
unittest
.
TestCase
):
...
...
@@ -86,13 +88,17 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
block
.
append_op
(
type
=
"mean"
,
inputs
=
{
"X"
:
mul_out
},
outputs
=
{
"Out"
:
mean_out
})
# params_grads = append_backward(mean_out)
params_grads
=
dgc_momentum_optimizer
.
backward
(
mean_out
)
params_grads
=
dgc_momentum_optimizer
.
backward
(
mean_out
,
startup_program
=
init_program
)
with
framework
.
program_guard
(
program
,
init_program
):
opts
=
dgc_momentum_optimizer
.
apply_gradients
(
params_grads
)
accumulator_count
=
1
if
name
==
"momentum"
else
2
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
dgc_momentum_optimizer
.
get_accumulators
()),
accumulator_count
)
with
framework
.
program_guard
(
program
,
init_program
):
opts
=
dgc_momentum_optimizer
.
apply_gradients
(
params_grads
)
self
.
assertEqual
(
len
(
opts
),
2
)
sgd_op
=
opts
[
-
1
]
self
.
assertEqual
([
op
.
type
for
op
in
opts
],
[
"scale"
,
name
])
...
...
@@ -108,8 +114,11 @@ class TestDGCMomentumOptimizer(unittest.TestCase):
self
.
assertTrue
(
mul_x
.
name
in
velocity_acc
)
# Check init_program
# dgc not apply include: lr, dgc(count, nranks, begin step), (u,)
# dgc apply include: lr, dgc(count, nranks, begin_step), (u,v,k,encode,gather)
init_ops_count
=
5
if
name
==
"momentum"
else
9
init_ops
=
init_program
.
global_block
().
ops
self
.
assertEqual
(
len
(
init_ops
),
1
)
self
.
assertEqual
(
len
(
init_ops
),
init_ops_count
)
self
.
assertEqual
(
init_ops
[
0
].
type
,
"fill_constant"
)
self
.
assertAlmostEqual
(
init_ops
[
0
].
attr
(
'value'
),
learning_rate
)
...
...
python/paddle/fluid/tests/unittests/test_fleet_amp_meta_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -16,53 +16,42 @@ import paddle.distributed.fleet as fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
import
unittest
import
paddle
import
paddle.fluid
as
fluid
import
os
from
fleet_meta_optimizer_base
import
TestFleetMetaOptimizer
paddle
.
enable_static
()
class
TestFleetAMPOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"0"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001"
class
TestFleetAMPOptimizer
(
TestFleetMetaOptimizer
):
def
test_amp_optimizer
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
input_x
=
paddle
.
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
32
],
dtype
=
'float32'
)
input_y
=
paddle
.
fluid
.
layers
.
data
(
name
=
"y"
,
shape
=
[
1
],
dtype
=
'int64'
)
fc_1
=
paddle
.
fluid
.
layers
.
fc
(
input
=
input_x
,
size
=
64
,
act
=
'tanh'
)
fc_2
=
paddle
.
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
64
,
act
=
'tanh'
)
prediction
=
paddle
.
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
2
,
act
=
'softmax'
)
cost
=
paddle
.
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
input_y
)
avg_cost
=
paddle
.
fluid
.
layers
.
mean
(
x
=
cost
)
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'amp'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
strategy
=
paddle
.
distributed
.
fleet
.
DistributedStrategy
()
strategy
.
amp
=
True
strategy
.
amp_configs
=
{
"init_loss_scaling"
:
32768
,
"decr_every_n_nan_or_inf"
:
2
,
"incr_every_n_steps"
:
1000
,
"incr_ratio"
:
2.0
,
"use_dynamic_loss_scaling"
:
True
,
"decr_ratio"
:
0.5
,
"custom_white_list"
:
[
'softmax'
],
"custom_black_list"
:
[
'tanh'
],
}
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertIn
(
'cast'
,
ops
)
self
.
assertIn
(
'check_finite_and_unscale'
,
ops
)
optimizer
=
paddle
.
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
def
test_amp_recompute_optimizer
(
self
):
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'amp'
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
strategy
=
fleet
.
_final_strategy
()
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
self
.
assertIn
(
'cast'
,
ops
)
self
.
assertIn
(
'check_finite_and_unscale'
,
ops
)
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -18,66 +18,27 @@ from paddle import fluid
import
os
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
from
fleet_meta_optimizer_base
import
TestFleetMetaOptimizer
paddle
.
enable_static
()
class
TestFleetDGCOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"1"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001,127.0.0.1:36002"
def
net
(
self
,
main_prog
,
startup_prog
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
input_x
=
paddle
.
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
32
],
dtype
=
'float32'
)
input_y
=
paddle
.
fluid
.
layers
.
data
(
name
=
"y"
,
shape
=
[
1
],
dtype
=
'int64'
)
fc_1
=
paddle
.
fluid
.
layers
.
fc
(
input
=
input_x
,
size
=
64
,
act
=
'tanh'
)
fc_2
=
paddle
.
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
256
,
act
=
'tanh'
)
prediction
=
paddle
.
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
2
,
act
=
'softmax'
)
cost
=
paddle
.
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
input_y
)
avg_cost
=
paddle
.
fluid
.
layers
.
mean
(
x
=
cost
)
strategy
=
paddle
.
distributed
.
fleet
.
DistributedStrategy
()
strategy
.
dgc
=
True
strategy
.
dgc_configs
=
{
"rampup_begin_step"
:
128
,
"rampup_step"
:
100
,
"sparsity"
:
[
0.996
,
0.999
]
}
return
avg_cost
,
strategy
class
TestFleetDGCOptimizer
(
TestFleetMetaOptimizer
):
def
test_dgc_optimizer
(
self
):
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
optimizer
=
paddle
.
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
self
.
set_strategy
(
strategy
,
'dgc'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertIn
(
'dgc'
,
ops
)
self
.
assertIn
(
'dgc_momentum'
,
ops
)
def
test_dgc_not_apply_with_adam
(
self
):
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
optimizer
=
paddle
.
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.01
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
self
.
set_strategy
(
strategy
,
'dgc'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
'adam'
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertNotIn
(
'dgc'
,
ops
)
...
...
@@ -87,18 +48,31 @@ class TestFleetDGCOptimizer(unittest.TestCase):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"0"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001"
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
optimizer
=
paddle
.
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
self
.
set_strategy
(
strategy
,
'dgc'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertNotIn
(
'dgc'
,
ops
)
self
.
assertNotIn
(
'dgc_momentum'
,
ops
)
def
test_dgc_recompute_optimizer
(
self
):
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'dgc'
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
self
.
assertIn
(
'dgc'
,
ops
)
self
.
assertIn
(
'dgc_momentum'
,
ops
)
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fleet_recompute_meta_optimizer.py
浏览文件 @
0e1e098c
...
...
@@ -14,40 +14,55 @@
import
unittest
import
paddle
import
paddle.fluid
as
fluid
import
os
from
fleet_meta_optimizer_base
import
TestFleetMetaOptimizer
paddle
.
enable_static
()
class
TestFleetRecomputeMetaOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"POD_IP"
]
=
"127.0.0.1"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001"
os
.
environ
[
"PADDLE_TRAINERS_NUM"
]
=
"2"
os
.
environ
[
"PADDLE_PSERVERS_IP_PORT_LIST"
]
=
\
"127.0.0.1:36001,127.0.0.2:36001"
class
TestFleetRecomputeMetaOptimizer
(
TestFleetMetaOptimizer
):
def
test_recompute_optimizer
(
self
):
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet.base.role_maker
as
role_maker
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
input_x
=
paddle
.
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
32
],
dtype
=
'float32'
)
input_y
=
paddle
.
fluid
.
layers
.
data
(
name
=
"y"
,
shape
=
[
1
],
dtype
=
'int64'
)
fc_1
=
paddle
.
fluid
.
layers
.
fc
(
input
=
input_x
,
size
=
64
,
act
=
'tanh'
)
fc_2
=
paddle
.
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
64
,
act
=
'tanh'
)
prediction
=
paddle
.
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
2
,
act
=
'softmax'
)
cost
=
paddle
.
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
input_y
)
avg_cost
=
paddle
.
fluid
.
layers
.
mean
(
x
=
cost
)
strategy
=
paddle
.
distributed
.
fleet
.
DistributedStrategy
()
strategy
.
recompute
=
True
strategy
.
recompute_configs
=
{
"checkpoints"
:
[
"fc_1.tmp_0"
]}
optimizer
=
paddle
.
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
def
test_recompute_lars_optimizer
(
self
):
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
set_strategy
(
strategy
,
'lars'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
self
.
assertIn
(
'lars_momentum'
,
ops
)
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
def
test_recompute_lamb_optimizer
(
self
):
train_prog
,
startup_prog
=
fluid
.
Program
(),
fluid
.
Program
()
avg_cost
,
strategy
=
self
.
net
(
train_prog
,
startup_prog
)
self
.
set_strategy
(
strategy
,
'recompute'
)
self
.
set_strategy
(
strategy
,
'lamb'
)
self
.
optimizer
(
avg_cost
,
strategy
,
train_prog
,
startup_prog
,
'adam'
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
outs
=
[
op
.
output
(
'Out'
)[
0
]
for
op
in
avg_cost
.
block
.
ops
if
op
.
type
==
'mul'
]
self
.
assertIn
(
'lamb'
,
ops
)
self
.
assertIn
(
'subprog'
,
''
.
join
(
outs
))
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录