Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0f741880
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0f741880
编写于
9月 14, 2021
作者:
Z
Zeng Jinle
提交者:
GitHub
9月 14, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix RawProgramOptimizer bug (#35704)
* fix raw optimizer gm * update * update ut
上级
83932715
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
46 addition
and
14 deletion
+46
-14
python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
...istributed/fleet/meta_optimizers/raw_program_optimizer.py
+9
-9
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+1
-1
python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py
...ests/unittests/dist_mnist_gradient_merge_raw_optimizer.py
+19
-1
python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py
...e/fluid/tests/unittests/test_dist_mnist_gradient_merge.py
+17
-3
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
浏览文件 @
0f741880
...
...
@@ -164,13 +164,13 @@ class RawProgramOptimizer(MetaOptimizerBase):
def
_insert_allreduce_ops_for_gm
(
self
,
gm_block
):
block
=
self
.
main_program
.
global_block
()
last_backward
_op_idx
=
None
for
i
,
op
in
enumerate
(
reversed
(
gm_block
.
ops
)):
if
is_backward_op
(
op
)
and
last_backward
_op_idx
is
None
:
last_backward_idx
=
i
first_optimize
_op_idx
=
None
for
i
,
op
in
reversed
(
list
(
enumerate
(
gm_block
.
ops
)
)):
if
is_backward_op
(
op
)
and
first_optimize
_op_idx
is
None
:
first_optimize_op_idx
=
i
+
1
break
if
last_backward
_op_idx
is
None
:
last_backward
_op_idx
=
0
if
first_optimize
_op_idx
is
None
:
first_optimize
_op_idx
=
0
param_vars
=
[]
grad_vars
=
[]
...
...
@@ -191,7 +191,7 @@ class RawProgramOptimizer(MetaOptimizerBase):
return
gm_block
.
_insert_op
(
last_backward
_op_idx
,
first_optimize
_op_idx
,
type
=
"c_sync_calc_stream"
,
inputs
=
{
'X'
:
grad_vars
[
0
]},
outputs
=
{
'Out'
:
grad_vars
[
0
]},
...
...
@@ -203,7 +203,7 @@ class RawProgramOptimizer(MetaOptimizerBase):
# NOTE: can perform fuse allreduce inside the loop in the future
for
i
,
(
p
,
g
)
in
enumerate
(
zip
(
param_vars
,
grad_vars
)):
gm_block
.
_insert_op
(
last_backward
_op_idx
+
insert_op_num
,
first_optimize
_op_idx
+
insert_op_num
,
type
=
"c_allreduce_sum"
,
inputs
=
{
'X'
:
g
},
outputs
=
{
'Out'
:
g
},
...
...
@@ -214,7 +214,7 @@ class RawProgramOptimizer(MetaOptimizerBase):
insert_op_num
+=
1
gm_block
.
_insert_op
(
last_backward
_op_idx
+
insert_op_num
,
first_optimize
_op_idx
+
insert_op_num
,
type
=
"c_sync_comm_stream"
,
inputs
=
{
'X'
:
grad_vars
[
-
1
]},
outputs
=
{
'Out'
:
grad_vars
[
-
1
]},
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
0f741880
...
...
@@ -789,7 +789,7 @@ endif()
if
(
WITH_DISTRIBUTE AND NOT APPLE
)
if
(
WITH_GPU OR WITH_ROCM
)
set_tests_properties
(
test_c_comm_init_op PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 1
2
0
)
set_tests_properties
(
test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 1
6
0
)
endif
()
endif
()
...
...
python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py
浏览文件 @
0f741880
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
paddle
import
paddle.nn
as
nn
import
paddle.fluid
as
fluid
...
...
@@ -44,9 +45,10 @@ class TestDistMnistGradientMergeRawOptimizer(TestDistRunnerBase):
strategy
.
build_strategy
=
build_strategy
strategy
.
gradient_merge
=
True
avg
=
os
.
environ
[
'enable_gm_avg'
]
==
"True"
strategy
.
gradient_merge_configs
=
{
"k_steps"
:
2
,
"avg"
:
False
,
"avg"
:
avg
,
}
strategy
.
without_graph_optimization
=
True
...
...
@@ -65,9 +67,25 @@ class TestDistMnistGradientMergeRawOptimizer(TestDistRunnerBase):
optimizer
,
k_steps
=
strategy
.
gradient_merge_configs
[
"k_steps"
],
avg
=
strategy
.
gradient_merge_configs
[
"avg"
])
world_size
=
1
else
:
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
)
world_size
=
fleet
.
world_size
()
optimizer
.
minimize
(
cost
)
if
world_size
>
1
:
assert
paddle
.
static
.
default_main_program
().
num_blocks
==
2
gm_block
=
paddle
.
static
.
default_main_program
().
block
(
1
)
start_allreduce_idx
=
None
for
i
,
op
in
enumerate
(
gm_block
.
ops
):
if
op
.
type
==
"c_allreduce_sum"
:
start_allreduce_idx
=
i
break
# the magic number 1 below means skip the c_sync_calc_stream op
if
avg
:
assert
start_allreduce_idx
>
1
else
:
assert
start_allreduce_idx
==
1
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
...
...
python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py
浏览文件 @
0f741880
...
...
@@ -52,21 +52,35 @@ class TestDistMnistGradMergeNoFuse(TestDistBase):
log_name
=
flag_name
+
"_no_fuse"
)
class
TestDistMnistGradMergeRawOptimizer
(
TestDistBase
):
class
TestDistMnistGradMergeRawOptimizer
Base
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_use_reader_alloc
=
False
self
.
_nccl2_mode
=
True
self
.
_use_fleet_api
=
True
self
.
_use_fleet_api_20
=
True
def
enable_avg
(
self
):
return
False
def
test_dist_train
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
avg
=
str
(
self
.
enable_avg
())
log_name
=
flag_name
+
"_raw_optimizer_gm_avg_"
+
avg
self
.
check_with_place
(
"dist_mnist_gradient_merge_raw_optimizer.py"
,
delta
=
1e-5
,
check_error_log
=
True
,
log_name
=
flag_name
+
"_raw_optimizer"
,
need_envs
=
{
'FLAGS_apply_pass_to_program'
:
'1'
})
log_name
=
log_name
,
need_envs
=
{
'FLAGS_apply_pass_to_program'
:
'1'
,
'enable_gm_avg'
:
avg
,
})
class
TestDistMnistGradMergeRawOptimizerAvg
(
TestDistMnistGradMergeRawOptimizerBase
):
def
enable_avg
(
self
):
return
True
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录