Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
f59bcb1c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f59bcb1c
编写于
6月 01, 2022
作者:
J
JZ-LIANG
提交者:
GitHub
6月 01, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[AutoParallel & Science] Miscellaneous improvements (#43139)
* adapt for 10 loss * partitioner support optimizer
上级
ff1789ca
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
15 addition
and
5 deletion
+15
-5
python/paddle/distributed/auto_parallel/operators/dist_default.py
...addle/distributed/auto_parallel/operators/dist_default.py
+5
-1
python/paddle/distributed/auto_parallel/operators/dist_reduce_p.py
...ddle/distributed/auto_parallel/operators/dist_reduce_p.py
+2
-1
python/paddle/distributed/auto_parallel/partitioner.py
python/paddle/distributed/auto_parallel/partitioner.py
+3
-3
python/paddle/distributed/auto_parallel/utils.py
python/paddle/distributed/auto_parallel/utils.py
+5
-0
未找到文件。
python/paddle/distributed/auto_parallel/operators/dist_default.py
浏览文件 @
f59bcb1c
...
...
@@ -363,7 +363,7 @@ class DistributedDefaultImpl0(DistributedOperatorImpl):
output_name
)
# replicate op in dist program
dist_op_desc
=
main_block
.
append_op
(
type
=
'nop'
).
desc
dist_op_desc
=
main_block
.
desc
.
append_op
()
dist_op_desc
.
copy_from
(
src_op
.
desc
)
set_dist_op_desc_original_id
(
dist_op_desc
,
src_op
.
desc
,
ctx
)
for
input_name
in
src_op
.
desc
.
input_names
():
...
...
@@ -371,6 +371,8 @@ class DistributedDefaultImpl0(DistributedOperatorImpl):
for
output_name
in
src_op
.
desc
.
output_names
():
dist_op_desc
.
set_output
(
output_name
,
kwargs
[
output_name
])
main_block
.
_sync_with_cpp
()
# data parallel synchronization for primtive operators
from
paddle.incubate.autograd
import
prim_enabled
if
prim_enabled
():
...
...
@@ -426,6 +428,8 @@ class DistributedDefaultImpl0(DistributedOperatorImpl):
op_attr
.
set_input_dims_mapping
(
param
.
name
,
dims_mapping
)
ctx
.
set_op_dist_attr_for_program
(
new_op
,
op_attr
)
startup_block
.
_sync_with_cpp
()
@
staticmethod
def
backward
(
ctx
,
*
args
,
**
kwargs
):
...
...
python/paddle/distributed/auto_parallel/operators/dist_reduce_p.py
浏览文件 @
f59bcb1c
...
...
@@ -107,13 +107,14 @@ class DistributedReducePrimtiveImpl0(DistributedOperatorImpl):
output_name
)
# replicate op in dist program
dist_op_desc
=
main_block
.
append_op
(
type
=
'nop'
).
desc
dist_op_desc
=
main_block
.
desc
.
append_op
()
dist_op_desc
.
copy_from
(
src_op
.
desc
)
set_dist_op_desc_original_id
(
dist_op_desc
,
src_op
.
desc
,
ctx
)
for
input_name
in
src_op
.
desc
.
input_names
():
dist_op_desc
.
set_input
(
input_name
,
kwargs
[
input_name
])
for
output_name
in
src_op
.
desc
.
output_names
():
dist_op_desc
.
set_output
(
output_name
,
kwargs
[
output_name
])
main_block
.
_sync_with_cpp
()
# batch dimension synchronization
var_name
=
src_op
.
output_arg_names
[
0
]
...
...
python/paddle/distributed/auto_parallel/partitioner.py
浏览文件 @
f59bcb1c
...
...
@@ -25,7 +25,7 @@ from paddle.distributed.auto_parallel.dist_context import DistributedContext, Di
from
.dist_attribute
import
OperatorDistributedAttribute
from
.process_group
import
new_process_group
from
.utils
import
set_dist_op_desc_original_id
from
.utils
import
print_program_with_dist_attr
,
is_forward_op
,
is_backward_op
,
is_loss_op
from
.utils
import
print_program_with_dist_attr
,
is_forward_op
,
is_backward_op
,
is_loss_op
,
is_optimize_op
from
.operators.common
import
BACKWARD_ONLY_DIST_OPS
__varname_not_in_block__
=
[
"lod_tensor_blocking_queue_0"
]
...
...
@@ -263,14 +263,14 @@ class Partitioner(object):
dist_op_backward_impl
.
backward
(
self
.
_dist_context
,
**
kinputs
,
**
koutputs
,
**
{
"grad_var_to_var"
:
grad_var_to_var
})
elif
i
nt
(
op
.
attr
(
'op_role'
))
==
2
:
elif
i
s_optimize_op
(
op
)
:
kinputs
,
koutputs
=
dist_op_context
.
prepare_context
(
op
)
dist_op_impl
=
get_distributed_operator_impl_container
(
"default"
).
get_impl
(
0
)
dist_op_impl
.
backward
(
self
.
_dist_context
,
**
kinputs
,
**
koutputs
)
else
:
raise
NotImplementedError
(
"partitioner only support forward
op and backward op
, but got {}"
.
"partitioner only support forward
and backward, optimize ops
, but got {}"
.
format
(
str
(
op
)))
def
_is_valid_annotated_program
(
self
,
program
):
...
...
python/paddle/distributed/auto_parallel/utils.py
浏览文件 @
f59bcb1c
...
...
@@ -1099,6 +1099,11 @@ def is_backward_op(op):
int
(
op
.
all_attrs
()[
OP_ROLE_KEY
])
&
int
(
OpRole
.
Backward
)
def
is_optimize_op
(
op
):
return
OP_ROLE_KEY
in
op
.
attr_names
and
\
int
(
op
.
all_attrs
()[
OP_ROLE_KEY
])
&
int
(
OpRole
.
Optimize
)
def
is_loss_op
(
op
):
return
OP_ROLE_KEY
in
op
.
attr_names
and
\
int
(
op
.
all_attrs
()[
OP_ROLE_KEY
])
==
(
int
(
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
)
|
int
(
core
.
op_proto_and_checker_maker
.
OpRole
.
Loss
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录