Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
85bb1a85
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
85bb1a85
编写于
10月 13, 2021
作者:
G
Guoxia Wang
提交者:
GitHub
10月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support auto parallel data shard (#36055)
上级
817f9ef0
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
252 addition
and
0 deletion
+252
-0
python/paddle/distributed/auto_parallel/parallelizer.py
python/paddle/distributed/auto_parallel/parallelizer.py
+3
-0
python/paddle/distributed/auto_parallel/utils.py
python/paddle/distributed/auto_parallel/utils.py
+37
-0
python/paddle/distributed/fleet/base/fleet_base.py
python/paddle/distributed/fleet/base/fleet_base.py
+1
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+3
-0
python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py
...addle/fluid/tests/unittests/auto_parallel_data_unshard.py
+179
-0
python/paddle/fluid/tests/unittests/test_auto_parallel_data_unshard.py
.../fluid/tests/unittests/test_auto_parallel_data_unshard.py
+29
-0
未找到文件。
python/paddle/distributed/auto_parallel/parallelizer.py
浏览文件 @
85bb1a85
...
@@ -20,6 +20,7 @@ from .context import get_default_distributed_context
...
@@ -20,6 +20,7 @@ from .context import get_default_distributed_context
from
.completion
import
complete_annotation
,
complete_backward_annotation
from
.completion
import
complete_annotation
,
complete_backward_annotation
from
.partitioner
import
Partitioner
from
.partitioner
import
Partitioner
from
.process
import
get_all_process_groups
from
.process
import
get_all_process_groups
from
.utils
import
make_data_unshard
from
.reshard
import
reshard
from
.reshard
import
reshard
...
@@ -95,6 +96,8 @@ class AutoParallelizer:
...
@@ -95,6 +96,8 @@ class AutoParallelizer:
self
.
_remove_distributed_attrs
(
partitioned_main_prog
)
self
.
_remove_distributed_attrs
(
partitioned_main_prog
)
complete_backward_annotation
(
partitioned_main_prog
,
self
.
_dist_context
)
complete_backward_annotation
(
partitioned_main_prog
,
self
.
_dist_context
)
make_data_unshard
(
partitioned_main_prog
,
partitioned_startup_prog
)
reshard
(
partitioned_main_prog
,
partitioned_startup_prog
,
rank
,
reshard
(
partitioned_main_prog
,
partitioned_startup_prog
,
rank
,
self
.
_dist_context
)
self
.
_dist_context
)
...
...
python/paddle/distributed/auto_parallel/utils.py
浏览文件 @
85bb1a85
...
@@ -277,3 +277,40 @@ def _linear_idx2coordinate(mesh_shape, linear_idx):
...
@@ -277,3 +277,40 @@ def _linear_idx2coordinate(mesh_shape, linear_idx):
# row major order
# row major order
return
coordinate
return
coordinate
def
_get_unshard_dist_shape
(
var
,
dist_attr
):
var_shape
=
var
.
shape
mapping
=
dist_attr
.
get_dims_mapping
()
mesh
=
dist_attr
.
get_process_mesh
().
topology
assert
len
(
var_shape
)
==
len
(
mapping
),
"variable shape [{}] and dim_mapping [{}] is NOT match !"
.
format
(
var_shape
,
mapping
)
new_shape
=
[]
for
idx
in
range
(
len
(
var_shape
)):
if
var_shape
[
idx
]
==
-
1
or
mapping
[
idx
]
==
-
1
:
new_shape
.
append
(
var_shape
[
idx
])
else
:
new_shape
.
append
(
var_shape
[
idx
]
*
mesh
[
mapping
[
idx
]])
return
new_shape
def
make_data_unshard
(
dist_main_prog
,
dist_startup_prog
):
from
.context
import
get_default_distributed_context
dist_context
=
get_default_distributed_context
()
for
var
in
dist_main_prog
.
list_vars
():
if
var
.
is_data
:
tensor_dist_attr
=
dist_context
.
get_tensor_distributed_attr_for_program
(
var
)
inverse_shape
=
_get_unshard_dist_shape
(
var
,
tensor_dist_attr
)
var
.
desc
.
set_shape
(
inverse_shape
)
dim_mapping
=
tensor_dist_attr
.
get_dims_mapping
()
dim_mapping
=
[
-
1
]
*
len
(
dim_mapping
)
tensor_dist_attr
.
set_dims_mapping
(
dim_mapping
)
dist_context
.
set_tensor_distributed_attr_for_program
(
var
,
tensor_dist_attr
)
var
.
_set_attr
(
'dim_mapping'
+
core
.
kAutoParallelSuffix
(),
dim_mapping
)
python/paddle/distributed/fleet/base/fleet_base.py
浏览文件 @
85bb1a85
...
@@ -1423,6 +1423,7 @@ class Fleet(object):
...
@@ -1423,6 +1423,7 @@ class Fleet(object):
auto_parallelizer
=
AutoParallelizer
(
self
)
auto_parallelizer
=
AutoParallelizer
(
self
)
optimize_ops
,
params_grads
,
dist_startup_prog
,
dist_main_prog
=
auto_parallelizer
.
parallelize
(
optimize_ops
,
params_grads
,
dist_startup_prog
,
dist_main_prog
=
auto_parallelizer
.
parallelize
(
loss
,
startup_program
,
parameter_list
,
no_grad_set
)
loss
,
startup_program
,
parameter_list
,
no_grad_set
)
return
optimize_ops
,
params_grads
,
dist_startup_prog
,
dist_main_prog
return
optimize_ops
,
params_grads
,
dist_startup_prog
,
dist_main_prog
# compile time
# compile time
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
85bb1a85
...
@@ -36,6 +36,7 @@ list(APPEND DIST_TEST_OPS test_parallel_dygraph_mp_layers)
...
@@ -36,6 +36,7 @@ list(APPEND DIST_TEST_OPS test_parallel_dygraph_mp_layers)
list
(
APPEND DIST_TEST_OPS test_hybrid_parallel_inference_helper
)
list
(
APPEND DIST_TEST_OPS test_hybrid_parallel_inference_helper
)
list
(
APPEND DIST_TEST_OPS test_parallel_class_center_sample
)
list
(
APPEND DIST_TEST_OPS test_parallel_class_center_sample
)
list
(
APPEND DIST_TEST_OPS test_parallel_margin_cross_entropy
)
list
(
APPEND DIST_TEST_OPS test_parallel_margin_cross_entropy
)
list
(
APPEND DIST_TEST_OPS test_auto_parallel_data_unshard
)
set
(
MIXED_DIST_TEST_OPS
${
DIST_TEST_OPS
}
)
set
(
MIXED_DIST_TEST_OPS
${
DIST_TEST_OPS
}
)
#remove distribute unittests.
#remove distribute unittests.
list
(
APPEND MIXED_DIST_TEST_OPS test_dgc_op
)
list
(
APPEND MIXED_DIST_TEST_OPS test_dgc_op
)
...
@@ -233,6 +234,7 @@ if ((NOT WITH_GPU) AND (NOT WITH_ROCM))
...
@@ -233,6 +234,7 @@ if ((NOT WITH_GPU) AND (NOT WITH_ROCM))
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_serial
)
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_serial
)
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_mppp
)
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_mppp
)
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_dpmppp
)
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_dpmppp
)
LIST
(
REMOVE_ITEM TEST_OPS test_auto_parallel_data_unshard
)
elseif
(
WITH_GPU
)
elseif
(
WITH_GPU
)
if
(
${
CUDNN_VERSION
}
VERSION_LESS 7100
)
if
(
${
CUDNN_VERSION
}
VERSION_LESS 7100
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
LIST
(
REMOVE_ITEM TEST_OPS test_conv2d_fusion_op
)
...
@@ -1001,6 +1003,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
...
@@ -1001,6 +1003,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
set_tests_properties
(
test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_class_center_sample PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_class_center_sample PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_margin_cross_entropy PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_margin_cross_entropy PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_auto_parallel_data_unshard PROPERTIES TIMEOUT 120
)
if
(
${
NCCL_VERSION
}
VERSION_GREATER_EQUAL 2212
)
if
(
${
NCCL_VERSION
}
VERSION_GREATER_EQUAL 2212
)
set_tests_properties
(
test_parallel_dygraph_sparse_embedding PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_dygraph_sparse_embedding PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_dygraph_transformer PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_parallel_dygraph_transformer PROPERTIES TIMEOUT 120
)
...
...
python/paddle/fluid/tests/unittests/auto_parallel_data_unshard.py
0 → 100644
浏览文件 @
85bb1a85
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
copy
import
numpy
as
np
import
random
import
paddle
import
paddle.nn
as
nn
import
paddle.fluid.core
as
core
import
paddle.distributed.auto_parallel
as
auto
import
paddle.nn.functional
as
F
from
paddle.distributed
import
fleet
paddle
.
enable_static
()
paddle
.
distributed
.
init_parallel_env
()
class
TestDataUnshard
(
unittest
.
TestCase
):
def
test_dp2pp1mp1
(
self
):
def
create_model
(
train_program
,
start_program
):
with
paddle
.
static
.
program_guard
(
train_program
,
start_program
):
ROOT_MESH
=
auto
.
ProcessMesh
([
0
,
1
])
MESH_0
=
auto
.
ProcessMesh
([
0
,
1
],
ROOT_MESH
)
input
=
paddle
.
static
.
data
(
name
=
'input'
,
shape
=
[
2
,
8
])
label
=
paddle
.
static
.
data
(
name
=
'label'
,
shape
=
[
2
,
8
])
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Normal
(
mean
=
0.0
,
std
=
0.02
))
linear0
=
nn
.
Linear
(
8
,
8
,
weight_attr
)
linear1
=
nn
.
Linear
(
8
,
8
,
weight_attr
)
auto
.
shard_tensor
(
input
,
MESH_0
,
dim_mapping
=
[
0
,
-
1
])
auto
.
shard_tensor
(
label
,
MESH_0
,
dim_mapping
=
[
0
,
-
1
])
auto
.
shard_tensor
(
linear0
.
weight
,
MESH_0
,
dim_mapping
=
[
-
1
,
-
1
])
auto
.
shard_tensor
(
linear1
.
weight
,
MESH_0
,
dim_mapping
=
[
-
1
,
-
1
])
linear0_out
=
linear0
(
input
)
gelu_out
=
F
.
gelu
(
linear0_out
)
linear1_out
=
linear1
(
gelu_out
)
error_cost
=
paddle
.
nn
.
functional
.
square_error_cost
(
linear1_out
,
label
)
loss
=
paddle
.
mean
(
error_cost
)
return
train_program
,
start_program
,
loss
,
input
,
label
train_program
=
paddle
.
static
.
Program
()
start_program
=
paddle
.
static
.
Program
()
# serial program
train_program
,
start_program
,
loss
,
input
,
label
=
create_model
(
train_program
,
start_program
)
dist_strategy
=
fleet
.
DistributedStrategy
()
dist_strategy
.
semi_auto
=
True
fleet
.
init
(
is_collective
=
True
,
strategy
=
dist_strategy
)
optimizer
=
paddle
.
fluid
.
optimizer
.
AdamOptimizer
(
learning_rate
=
0.00001
,
beta1
=
0.9
,
beta2
=
0.999
,
epsilon
=
1e-08
,
grad_clip
=
None
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
)
_
,
_
,
distributed_startup_program
,
distributed_main_program
=
optimizer
.
minimize
(
loss
,
start_program
)
worker_index
=
paddle
.
distributed
.
get_rank
()
paddle
.
seed
(
worker_index
+
2021
)
random
.
seed
(
worker_index
+
2021
)
np
.
random
.
seed
(
worker_index
+
2021
)
place
=
paddle
.
set_device
(
"gpu"
)
exe
=
paddle
.
static
.
Executor
(
place
)
exe
.
run
(
distributed_startup_program
)
input_data
=
np
.
array
(
range
(
2
*
8
)).
reshape
([
2
,
8
]).
astype
(
"float32"
)
label_data
=
np
.
random
.
randint
(
0
,
10
,
[
2
,
8
]).
astype
(
"float32"
)
fetchs
=
[
loss
.
name
,
'input@RESHARD_0'
]
loss_np
,
shard_data_np
=
exe
.
run
(
distributed_main_program
,
feed
=
{
"input"
:
input_data
,
"label"
:
label_data
},
fetch_list
=
fetchs
)
desired
=
input_data
[
worker_index
].
reshape
(
shard_data_np
.
shape
)
np
.
testing
.
assert_allclose
(
shard_data_np
,
desired
)
def
dp1pp1mp2
(
self
):
def
create_model
(
train_program
,
start_program
):
with
paddle
.
static
.
program_guard
(
train_program
,
start_program
):
ROOT_MESH
=
auto
.
ProcessMesh
([
0
,
1
])
MESH_0
=
auto
.
ProcessMesh
([
0
,
1
],
ROOT_MESH
)
input
=
paddle
.
static
.
data
(
name
=
'input'
,
shape
=
[
8
,
8
])
label
=
paddle
.
static
.
data
(
name
=
'label'
,
shape
=
[
8
,
8
])
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Normal
(
mean
=
0.0
,
std
=
0.02
))
linear0
=
nn
.
Linear
(
8
,
8
,
weight_attr
)
linear1
=
nn
.
Linear
(
8
,
8
,
weight_attr
)
auto
.
shard_tensor
(
input
,
MESH_0
,
dim_mapping
=
[
-
1
,
-
1
])
auto
.
shard_tensor
(
label
,
MESH_0
,
dim_mapping
=
[
-
1
,
-
1
])
auto
.
shard_tensor
(
linear0
.
weight
,
MESH_0
,
dim_mapping
=
[
-
1
,
0
])
auto
.
shard_tensor
(
linear1
.
weight
,
MESH_0
,
dim_mapping
=
[
0
,
-
1
])
linear0_out
=
linear0
(
input
)
gelu_out
=
F
.
gelu
(
linear0_out
)
linear1_out
=
linear1
(
gelu_out
)
error_cost
=
paddle
.
nn
.
functional
.
square_error_cost
(
linear1_out
,
label
)
loss
=
paddle
.
mean
(
error_cost
)
return
train_program
,
start_program
,
loss
,
input
,
label
train_program
=
paddle
.
static
.
Program
()
start_program
=
paddle
.
static
.
Program
()
# serial program
train_program
,
start_program
,
loss
,
input
,
label
=
create_model
(
train_program
,
start_program
)
dist_strategy
=
fleet
.
DistributedStrategy
()
dist_strategy
.
semi_auto
=
True
fleet
.
init
(
is_collective
=
True
,
strategy
=
dist_strategy
)
optimizer
=
paddle
.
fluid
.
optimizer
.
AdamOptimizer
(
learning_rate
=
0.00001
,
beta1
=
0.9
,
beta2
=
0.999
,
epsilon
=
1e-08
,
grad_clip
=
None
)
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
)
_
,
_
,
distributed_startup_program
,
distributed_main_program
=
optimizer
.
minimize
(
loss
,
start_program
)
worker_index
=
paddle
.
distributed
.
get_rank
()
paddle
.
seed
(
worker_index
+
2021
)
random
.
seed
(
worker_index
+
2021
)
np
.
random
.
seed
(
worker_index
+
2021
)
place
=
paddle
.
set_device
(
"gpu"
)
exe
=
paddle
.
static
.
Executor
(
place
)
exe
.
run
(
distributed_startup_program
)
input_data
=
np
.
array
(
range
(
8
*
8
)).
reshape
([
8
,
8
]).
astype
(
"float32"
)
label_data
=
np
.
random
.
randint
(
0
,
10
,
[
8
,
8
]).
astype
(
"float32"
)
fetchs
=
[
loss
.
name
,
'input'
]
loss_np
,
shard_data_np
=
exe
.
run
(
distributed_main_program
,
feed
=
{
"input"
:
input_data
,
"label"
:
label_data
},
fetch_list
=
fetchs
)
desired
=
input_data
.
reshape
(
shard_data_np
.
shape
)
np
.
testing
.
assert_allclose
(
shard_data_np
,
desired
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_auto_parallel_data_unshard.py
0 → 100644
浏览文件 @
85bb1a85
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
paddle.fluid
as
fluid
from
test_parallel_dygraph_dataparallel
import
TestMultipleGpus
class
TestAutoParallelDataUnshard
(
TestMultipleGpus
):
def
test_auto_parallel_data_unshard
(
self
):
self
.
run_mnist_2gpu
(
'auto_parallel_data_unshard.py'
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录