Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2c9d0f3c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2c9d0f3c
编写于
8月 05, 2020
作者:
W
WangXi
提交者:
GitHub
8月 05, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
【paddle.fleet】Add dgc to fleet meta optimizer (#25738)
Add dgc to fleet meta optimizer, rm dgc from optimizer all
上级
4adac0e3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
206 addition
and
6 deletion
+206
-6
paddle/fluid/framework/distributed_strategy.proto
paddle/fluid/framework/distributed_strategy.proto
+7
-0
python/paddle/fleet/base/distributed_strategy.py
python/paddle/fleet/base/distributed_strategy.py
+9
-0
python/paddle/fleet/base/meta_optimizer_factory.py
python/paddle/fleet/base/meta_optimizer_factory.py
+2
-0
python/paddle/fleet/meta_optimizers/__init__.py
python/paddle/fleet/meta_optimizers/__init__.py
+2
-0
python/paddle/fleet/meta_optimizers/dgc_optimizer.py
python/paddle/fleet/meta_optimizers/dgc_optimizer.py
+94
-0
python/paddle/fleet/meta_optimizers/meta_optimizer_base.py
python/paddle/fleet/meta_optimizers/meta_optimizer_base.py
+2
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+2
-3
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-0
python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py
.../paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py
+0
-2
python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py
...le/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py
+86
-0
未找到文件。
paddle/fluid/framework/distributed_strategy.proto
浏览文件 @
2c9d0f3c
...
...
@@ -43,6 +43,12 @@ message GradientMergeConfig {
optional
bool
avg
=
2
[
default
=
true
];
}
message
DGCConfig
{
optional
int32
rampup_begin_step
=
1
[
default
=
0
];
optional
int32
rampup_step
=
2
[
default
=
1
];
repeated
float
sparsity
=
3
;
}
message
LarsConfig
{
optional
float
lars_coeff
=
1
[
default
=
0.001
];
optional
float
lars_weight_decay
=
2
[
default
=
0.0005
];
...
...
@@ -114,6 +120,7 @@ message DistributedStrategy {
optional
AMPConfig
amp_configs
=
102
;
optional
LocalSGDConfig
localsgd_configs
=
103
;
optional
GradientMergeConfig
gradient_merge_configs
=
104
;
optional
DGCConfig
dgc_configs
=
105
;
optional
PipelineConfig
pipeline_configs
=
106
;
optional
AsyncConfig
a_sync_configs
=
107
;
optional
LarsConfig
lars_configs
=
108
;
...
...
python/paddle/fleet/base/distributed_strategy.py
浏览文件 @
2c9d0f3c
...
...
@@ -604,6 +604,15 @@ class DistributedStrategy(object):
else
:
print
(
"WARNING: lars should have value of bool type"
)
@
property
def
lars_configs
(
self
):
return
get_msg_dict
(
self
.
strategy
.
lars_configs
)
@
lars_configs
.
setter
def
lars_configs
(
self
,
configs
):
check_configs_key
(
self
.
strategy
.
lars_configs
,
configs
,
"lars_configs"
)
assign_configs_value
(
self
.
strategy
.
lars_configs
,
configs
)
@
property
def
lamb
(
self
):
return
self
.
strategy
.
lamb
...
...
python/paddle/fleet/base/meta_optimizer_factory.py
浏览文件 @
2c9d0f3c
...
...
@@ -19,6 +19,7 @@ from ..meta_optimizers import GraphExecutionOptimizer
from
..meta_optimizers
import
PipelineOptimizer
from
..meta_optimizers
import
LocalSGDOptimizer
from
..meta_optimizers
import
LarsOptimizer
from
..meta_optimizers
import
DGCOptimizer
__all__
=
[
"MetaOptimizerFactory"
]
...
...
@@ -30,6 +31,7 @@ meta_optimizer_names = [
"PipelineOptimizer"
,
"LocalSGDOptimizer"
,
"LarsOptimizer"
,
"DGCOptimizer"
,
]
...
...
python/paddle/fleet/meta_optimizers/__init__.py
浏览文件 @
2c9d0f3c
...
...
@@ -18,6 +18,7 @@ from .graph_execution_optimizer import GraphExecutionOptimizer
from
.pipeline_optimizer
import
PipelineOptimizer
from
.localsgd_optimizer
import
LocalSGDOptimizer
from
.lars_optimizer
import
LarsOptimizer
from
.dgc_optimizer
import
DGCOptimizer
__all__
=
[
'AMPOptimizer'
,
...
...
@@ -26,4 +27,5 @@ __all__ = [
'PipelineOptimizer'
,
'LocalSGDOptimizer'
,
'LarsOptimizer'
,
'DGCOptimizer'
,
]
python/paddle/fleet/meta_optimizers/dgc_optimizer.py
0 → 100644
浏览文件 @
2c9d0f3c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
from
paddle.fluid.optimizer
import
Momentum
,
DGCMomentumOptimizer
from
.meta_optimizer_base
import
MetaOptimizerBase
import
logging
__all__
=
[
"DGCOptimizer"
]
class
DGCOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
super
(
DGCOptimizer
,
self
).
__init__
(
optimizer
)
self
.
inner_opt
=
optimizer
self
.
dgc_opt
=
None
# we do not allow meta optimizer to be inner optimizer currently
self
.
meta_optimizers_white_list
=
[]
def
_set_basic_info
(
self
,
loss
,
role_maker
,
user_defined_optimizer
,
user_defined_strategy
):
super
(
DGCOptimizer
,
self
).
_set_basic_info
(
loss
,
role_maker
,
user_defined_optimizer
,
user_defined_strategy
)
opt
=
self
.
inner_opt
if
not
isinstance
(
opt
,
Momentum
):
return
configs
=
self
.
user_defined_strategy
.
dgc_configs
if
len
(
configs
[
'sparsity'
])
==
0
:
# default is [0.999]
configs
[
'sparsity'
]
=
[
0.999
]
self
.
dgc_opt
=
DGCMomentumOptimizer
(
learning_rate
=
opt
.
_learning_rate
,
momentum
=
opt
.
_momentum
,
rampup_begin_step
=
configs
[
'rampup_begin_step'
],
rampup_step
=
configs
[
'rampup_step'
],
sparsity
=
configs
[
'sparsity'
],
parameter_list
=
opt
.
_parameter_list
,
use_nesterov
=
opt
.
_use_nesterov
,
num_trainers
=
self
.
role_maker
.
worker_num
(),
regularization
=
opt
.
regularization
,
grad_clip
=
opt
.
_grad_clip
,
name
=
opt
.
_name
)
def
_can_apply
(
self
):
if
self
.
user_defined_strategy
.
dgc
:
if
not
isinstance
(
self
.
inner_opt
,
Momentum
):
logging
.
warn
(
"dgc only works on Momentum optimizer"
)
return
False
if
self
.
role_maker
.
worker_num
()
<=
1
:
logging
.
warn
(
"dgc only works on multi cards"
)
return
False
return
True
return
False
def
_disable_strategy
(
self
,
dist_strategy
):
dist_strategy
.
dgc
=
False
dist_strategy
.
dgc_configs
=
{
'rampup_begin_step'
:
0
,
'rampup_step'
:
1
,
'sparsity'
:
[
0.999
]
}
def
backward
(
self
,
loss
,
startup_program
=
None
,
parameter_list
=
None
,
no_grad_set
=
None
,
callbacks
=
None
):
return
self
.
dgc_opt
.
backward
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
,
callbacks
)
def
minimize_impl
(
self
,
loss
,
startup_program
=
None
,
parameter_list
=
None
,
no_grad_set
=
None
):
optimize_ops
,
params_grads
=
\
self
.
dgc_opt
.
minimize
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
)
return
optimize_ops
,
params_grads
python/paddle/fleet/meta_optimizers/meta_optimizer_base.py
浏览文件 @
2c9d0f3c
...
...
@@ -40,7 +40,8 @@ class MetaOptimizerBase(object):
return
True
def
_disable_strategy
(
self
,
dist_strategy
):
raise
NotImplementedError
(
"you should implement disable strategy"
)
raise
NotImplementedError
(
"you should implement disable strategy in {}"
.
format
(
type
(
self
).
__name__
))
def
minimize_impl
(
self
,
loss
,
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
2c9d0f3c
...
...
@@ -47,9 +47,8 @@ __all__ = [
'AdamOptimizer'
,
'AdamaxOptimizer'
,
'DpsgdOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'FtrlOptimizer'
,
'Adadelta'
,
'AdadeltaOptimizer'
,
'ModelAverage'
,
'LarsMomentum'
,
'LarsMomentumOptimizer'
,
'DGCMomentumOptimizer'
,
'LambOptimizer'
,
'ExponentialMovingAverage'
,
'PipelineOptimizer'
,
'LookaheadOptimizer'
,
'RecomputeOptimizer'
'LarsMomentumOptimizer'
,
'LambOptimizer'
,
'ExponentialMovingAverage'
,
'PipelineOptimizer'
,
'LookaheadOptimizer'
,
'RecomputeOptimizer'
]
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
2c9d0f3c
...
...
@@ -39,6 +39,7 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_amp_meta_optimizer)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_gradient_merge_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_localsgd_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_lars_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_dgc_meta_optimizer
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_private_function
)
list
(
APPEND MIXED_DIST_TEST_OPS test_fleet_graph_executor
)
foreach
(
TEST_OP
${
MIXED_DIST_TEST_OPS
}
)
...
...
@@ -388,6 +389,7 @@ if(WITH_DISTRIBUTE)
py_test_modules
(
test_dgc_op MODULES test_dgc_op
)
py_test_modules
(
test_dgc_momentum_op MODULES test_dgc_momentum_op
)
py_test_modules
(
test_dgc_optimizer MODULES test_dgc_optimizer
)
py_test_modules
(
test_fleet_dgc_meta_optimizer MODULES test_fleet_dgc_meta_optimizer
)
else
()
# if not with dgc, must close all dgc tests
list
(
REMOVE_ITEM DIST_TEST_OPS
"test_dist_mnist_dgc_nccl"
)
...
...
python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py
浏览文件 @
2c9d0f3c
...
...
@@ -39,7 +39,6 @@ class TestDistMnistNCCL2DGC(TestDistBase):
self
.
_nccl2_mode
=
True
self
.
_use_dgc
=
True
@
unittest
.
skip
(
reason
=
"Skip unstable ut"
)
def
test_dist_train
(
self
):
import
paddle.fluid
as
fluid
if
fluid
.
core
.
is_compiled_with_cuda
():
...
...
@@ -69,7 +68,6 @@ class TestDistMnistNCCL2DGCMultiCards(TestDistBase):
self
.
_nccl2_mode
=
True
self
.
_use_dgc
=
True
@
unittest
.
skip
(
reason
=
"Skip unstable ut"
)
def
test_dist_train
(
self
):
import
paddle.fluid
as
fluid
if
fluid
.
core
.
is_compiled_with_cuda
():
...
...
python/paddle/fluid/tests/unittests/test_fleet_dgc_meta_optimizer.py
0 → 100644
浏览文件 @
2c9d0f3c
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle
import
os
import
paddle.fleet
as
fleet
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
class
TestFleetDGCOptimizer
(
unittest
.
TestCase
):
def
setUp
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"1"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001,127.0.0.1:36002"
def
net
(
self
):
role
=
role_maker
.
PaddleCloudRoleMaker
(
is_collective
=
True
)
fleet
.
init
(
role
)
input_x
=
paddle
.
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
32
],
dtype
=
'float32'
)
input_y
=
paddle
.
fluid
.
layers
.
data
(
name
=
"y"
,
shape
=
[
1
],
dtype
=
'int64'
)
fc_1
=
paddle
.
fluid
.
layers
.
fc
(
input
=
input_x
,
size
=
64
,
act
=
'tanh'
)
fc_2
=
paddle
.
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
256
,
act
=
'tanh'
)
prediction
=
paddle
.
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
2
,
act
=
'softmax'
)
cost
=
paddle
.
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
input_y
)
avg_cost
=
paddle
.
fluid
.
layers
.
mean
(
x
=
cost
)
strategy
=
paddle
.
fleet
.
DistributedStrategy
()
strategy
.
dgc
=
True
strategy
.
dgc_configs
=
{
"rampup_begin_step"
:
128
,
"rampup_step"
:
100
,
"sparsity"
:
[
0.996
,
0.999
]
}
return
avg_cost
,
strategy
def
test_dgc_optimizer
(
self
):
avg_cost
,
strategy
=
self
.
net
()
optimizer
=
paddle
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertIn
(
'dgc'
,
ops
)
self
.
assertIn
(
'dgc_momentum'
,
ops
)
def
test_dgc_not_apply_with_adam
(
self
):
avg_cost
,
strategy
=
self
.
net
()
optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
0.01
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertNotIn
(
'dgc'
,
ops
)
self
.
assertNotIn
(
'dgc_momentum'
,
ops
)
def
test_dgc_not_apply_with_one_worker
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"0"
os
.
environ
[
"PADDLE_TRAINER_ENDPOINTS"
]
=
"127.0.0.1:36001"
avg_cost
,
strategy
=
self
.
net
()
optimizer
=
paddle
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
strategy
)
optimizer
.
minimize
(
avg_cost
)
ops
=
[
op
.
type
for
op
in
avg_cost
.
block
.
ops
]
self
.
assertNotIn
(
'dgc'
,
ops
)
self
.
assertNotIn
(
'dgc_momentum'
,
ops
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录