Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
48d5c36b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
48d5c36b
编写于
11月 22, 2022
作者:
C
caozhou
提交者:
GitHub
11月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add group operators (#48208)
上级
df4dfda0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
396 addition
and
0 deletion
+396
-0
python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
...addle/distributed/auto_parallel/tuner/rule_based_tuner.py
+262
-0
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
...paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/unittests/auto_parallel/test_group_operators.py
...uid/tests/unittests/auto_parallel/test_group_operators.py
+133
-0
未找到文件。
python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
0 → 100644
浏览文件 @
48d5c36b
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class
OperatorGroupUtil
:
common_starts
=
[
"layer_norm"
,
"matmul_v2"
,
"matmul"
]
@
staticmethod
def
get_ranks
(
seq
):
"""Get rank array of the given seq by doubled algorithm."""
ordered_seq
=
sorted
(
list
(
set
(
seq
)))
item_to_rank
=
{
item
:
idx
for
idx
,
item
in
enumerate
(
ordered_seq
)}
inter_ranks
=
[
item_to_rank
[
item
]
for
item
in
seq
]
length
=
len
(
inter_ranks
)
power
=
0
interval
=
2
**
power
while
interval
<
length
:
for
idx
,
item
in
enumerate
(
inter_ranks
):
if
idx
+
interval
>=
length
:
inter_ranks
[
idx
]
=
[
item
,
-
1
]
else
:
inter_ranks
[
idx
]
=
[
item
,
inter_ranks
[
idx
+
interval
]]
tmp
=
[]
for
item
in
inter_ranks
:
if
item
not
in
tmp
:
tmp
.
append
(
item
)
tmp
.
sort
(
key
=
lambda
x
:
(
x
[
0
],
x
[
1
]))
item_to_rank
=
{}
for
idx
,
val
in
enumerate
(
tmp
):
key
=
","
.
join
(
str
(
item
)
for
item
in
val
)
item_to_rank
[
key
]
=
idx
inter_ranks
=
[
item_to_rank
[
","
.
join
(
str
(
val
)
for
val
in
item
)]
for
item
in
inter_ranks
]
power
+=
1
interval
=
2
**
power
return
inter_ranks
@
staticmethod
def
get_suffixes
(
ranks
):
"""Get suffix array by the given rank array."""
suffixes
=
[
0
for
idx
in
range
(
len
(
ranks
))]
for
idx
,
item
in
enumerate
(
ranks
):
suffixes
[
item
]
=
idx
return
suffixes
@
staticmethod
def
get_heights
(
suffixes
,
seq
):
"""Get height array by the suffix array and seq"""
heights
=
[
-
1
for
i
in
range
(
len
(
suffixes
))]
for
i
in
range
(
1
,
len
(
seq
)):
x
=
seq
[
suffixes
[
i
-
1
]
:]
y
=
seq
[
suffixes
[
i
]
:]
max_len
=
len
(
x
)
if
len
(
x
)
>
len
(
y
)
else
len
(
y
)
same_count
=
0
for
j
in
range
(
max_len
):
if
j
>=
len
(
x
)
or
j
>=
len
(
y
):
break
else
:
if
x
[
j
]
==
y
[
j
]:
same_count
+=
1
else
:
break
heights
[
i
]
=
same_count
return
heights
@
staticmethod
def
get_longest_repeated_sub_seq
(
suffixes
,
heights
,
seq
):
"""Get longest repeated sub sequence by suffix array algorithm."""
length
=
len
(
seq
)
if
length
<=
1
:
return
None
k
=
length
//
2
height_groups
=
[]
longest_sub_seq
=
None
longest_sub_seqs
=
[]
while
k
>=
2
:
height_group
=
[]
for
i
in
range
(
1
,
len
(
heights
)):
if
heights
[
i
]
>=
k
:
if
i
==
1
:
height_group
.
append
(
0
)
height_group
.
append
(
i
)
else
:
if
i
==
1
:
height_groups
.
append
([
0
])
height_group
=
[
i
]
else
:
height_groups
.
append
(
height_group
)
height_group
=
[
i
]
if
height_group
:
height_groups
.
append
(
height_group
)
for
height_group
in
height_groups
:
suffix_group
=
[]
index_group
=
[]
for
idx
in
height_group
:
suffix_group
.
append
(
idx
)
index_group
.
append
(
suffixes
[
idx
])
max_index
=
max
(
index_group
)
min_index
=
min
(
index_group
)
if
max_index
-
min_index
>=
k
:
longest_sub_seq
=
seq
[
min_index
:
min_index
+
k
]
if
longest_sub_seq
[
0
]
in
OperatorGroupUtil
.
common_starts
:
return
longest_sub_seq
if
longest_sub_seq
is
not
None
:
return
longest_sub_seq
k
-=
1
height_groups
=
[]
return
longest_sub_seq
@
staticmethod
def
get_decomposed_sub_seq
(
seq
):
"""Get decomposed sub seq s by seq S such as s * R = S."""
if
not
seq
:
return
seq
decomposed_sub_seq
=
seq
seq_len
=
len
(
seq
)
if
seq_len
==
1
:
return
decomposed_sub_seq
else
:
for
interval
in
range
(
2
,
seq_len
+
1
):
if
seq_len
%
interval
==
0
:
repeated_times
=
seq_len
//
interval
decomposed_sub_seq
=
seq
[
0
:
interval
]
decomposed
=
True
for
j
in
range
(
1
,
repeated_times
+
1
):
sub_seq
=
seq
[
interval
*
(
j
-
1
)
:
interval
*
j
]
if
sub_seq
!=
decomposed_sub_seq
:
decomposed
=
False
break
if
decomposed
:
return
decomposed_sub_seq
return
decomposed_sub_seq
@
staticmethod
def
replace_by_decomposed_seq
(
sub_seq
,
seq
):
"""Replace seq by sub seq."""
if
not
sub_seq
:
return
seq
result
=
[]
sub_seq_len
=
len
(
sub_seq
)
i
=
0
while
i
<
len
(
seq
):
if
seq
[
i
:
i
+
sub_seq_len
]
==
sub_seq
:
result
.
append
(
seq
[
i
:
i
+
sub_seq_len
])
i
+=
sub_seq_len
else
:
result
.
append
(
seq
[
i
])
i
+=
1
return
result
@
staticmethod
def
stop_replace
(
seq
):
for
item
in
seq
:
if
not
isinstance
(
item
,
list
):
return
False
return
True
class
RuleBasedTuner
:
def
__init__
(
self
,
dist_context
,
mode
=
"train"
):
self
.
_dist_context
=
dist_context
self
.
_mode
=
mode
def
group_operators
(
self
,
ops
):
"""
Group operators to layers.
Args:
ops (list): A operator list.
Returns:
List: The list contains the list of operators which belong to the same layer.
"""
seq
=
[
op
.
type
for
op
in
ops
]
while
not
OperatorGroupUtil
.
stop_replace
(
seq
):
to_replace_seq
=
[]
to_replace_idxes
=
[]
has_append
=
False
for
idx
,
item
in
enumerate
(
seq
):
if
not
isinstance
(
item
,
list
):
has_append
=
True
to_replace_seq
.
append
(
item
)
to_replace_idxes
.
append
(
idx
)
elif
isinstance
(
seq
,
list
)
and
not
has_append
:
continue
elif
isinstance
(
seq
,
list
)
and
has_append
:
break
ranks
=
OperatorGroupUtil
.
get_ranks
(
to_replace_seq
)
suffixes
=
OperatorGroupUtil
.
get_suffixes
(
ranks
)
heights
=
OperatorGroupUtil
.
get_heights
(
suffixes
,
to_replace_seq
)
longest_sub_seq
=
OperatorGroupUtil
.
get_longest_repeated_sub_seq
(
suffixes
,
heights
,
to_replace_seq
)
has_merged
=
False
if
longest_sub_seq
is
None
:
for
i
in
range
(
to_replace_idxes
[
-
1
]
+
1
,
len
(
seq
)):
if
isinstance
(
seq
[
i
],
list
):
seq
[
i
]
=
to_replace_seq
+
seq
[
i
]
has_merged
=
True
break
if
not
has_merged
:
for
i
in
range
(
to_replace_idxes
[
0
]
-
1
,
-
1
,
-
1
):
if
isinstance
(
seq
[
i
],
list
):
seq
[
i
].
extend
(
to_replace_seq
)
has_merged
=
True
break
if
not
has_merged
:
seq
=
[
to_replace_seq
]
break
decomposed_sub_seq
=
OperatorGroupUtil
.
get_decomposed_sub_seq
(
longest_sub_seq
)
to_replace_seq
=
OperatorGroupUtil
.
replace_by_decomposed_seq
(
decomposed_sub_seq
,
to_replace_seq
)
result
=
seq
[:
to_replace_idxes
[
0
]]
if
not
has_merged
:
result
.
extend
(
to_replace_seq
)
result
.
extend
(
seq
[
to_replace_idxes
[
-
1
]
+
1
:])
seq
=
result
layers
=
[]
idx
=
0
for
groups
in
seq
:
layer
=
[]
for
op
in
groups
:
layer
.
append
(
ops
[
idx
])
idx
+=
1
layers
.
append
(
layer
)
return
layers
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
浏览文件 @
48d5c36b
...
...
@@ -118,5 +118,6 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
test_conditional_block_reshard
)
py_test_modules
(
test_engine_api_error MODULES test_engine_api_error
)
py_test_modules
(
test_fp16_assign MODULES test_fp16_assign
)
py_test_modules
(
test_group_operators MODULES test_group_operators
)
endif
()
python/paddle/fluid/tests/unittests/auto_parallel/test_group_operators.py
0 → 100644
浏览文件 @
48d5c36b
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
unittest
import
numpy
as
np
import
paddle
import
paddle.static
as
static
sys
.
path
.
append
(
".."
)
import
auto_parallel_gpt_model
as
modeling
from
auto_parallel_gpt_model
import
(
GPTModel
,
GPTForPretraining
,
GPTPretrainingCriterion
,
)
def
get_gpt_model
(
train_program
,
start_program
,
place
,
batch_size
,
sequence_len
,
vocab_size
):
with
static
.
program_guard
(
train_program
,
start_program
):
tokens
=
paddle
.
static
.
data
(
name
=
"tokens"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'int64'
)
position_ids
=
paddle
.
static
.
data
(
name
=
"position_ids"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'int64'
)
attention_mask
=
paddle
.
static
.
data
(
name
=
"attention_mask"
,
shape
=
[
batch_size
,
1
,
sequence_len
,
sequence_len
],
dtype
=
'float32'
,
)
labels
=
paddle
.
static
.
data
(
name
=
"labels"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'int64'
)
loss_mask
=
paddle
.
static
.
data
(
name
=
"loss_mask"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'float32'
)
gpt
=
GPTModel
(
vocab_size
=
1000
,
hidden_size
=
64
,
num_hidden_layers
=
2
,
num_attention_heads
=
8
,
intermediate_size
=
256
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.0
,
attention_probs_dropout_prob
=
0.0
,
max_position_embeddings
=
1024
,
type_vocab_size
=
1
,
initializer_range
=
0.02
,
pad_token_id
=
0
,
eos_token_id
=
7
,
bos_token_id
=
0
,
eol_token_id
=
3
,
)
model
=
GPTForPretraining
(
gpt
,
vocab_size
=
1000
,
hidden_size
=
64
,
initializer_range
=
0.02
)
preds
=
model
(
tokens
,
position_ids
,
attention_mask
)
criterion
=
GPTPretrainingCriterion
()
loss
=
criterion
(
preds
,
labels
,
loss_mask
)
def
gen_data
():
np
.
random
.
seed
(
2021
)
tokens
=
[]
position_ids
=
[]
attention_mask
=
[]
labels
=
[]
loss_mask
=
[]
for
_
in
range
(
batch_size
):
tokens
.
append
(
np
.
random
.
randint
(
vocab_size
,
size
=
sequence_len
))
position_ids
.
append
(
np
.
arange
(
sequence_len
))
attention_mask
.
append
([
np
.
tril
(
np
.
ones
(
sequence_len
))])
labels
.
append
(
np
.
random
.
randint
(
vocab_size
,
size
=
sequence_len
))
loss_mask
.
append
(
np
.
ones
(
sequence_len
))
return
tokens
,
position_ids
,
attention_mask
,
labels
,
loss_mask
return
train_program
,
start_program
,
loss
,
gen_data
class
TestGroupOperators
(
unittest
.
TestCase
):
def
test_gpt
(
self
):
modeling
.
init_global
()
train_program
=
static
.
Program
()
start_program
=
static
.
Program
()
place
=
paddle
.
set_device
(
"gpu"
)
batch_size
=
8
sequence_len
=
512
vocab_size
=
1000
train_program
,
start_program
,
loss
,
gen_data
=
get_gpt_model
(
train_program
,
start_program
,
place
,
batch_size
,
sequence_len
,
vocab_size
,
)
from
paddle.distributed.auto_parallel.tuner.rule_based_tuner
import
(
RuleBasedTuner
,
)
from
paddle.distributed.auto_parallel.dist_context
import
(
DistributedContext
,
)
dist_context
=
DistributedContext
()
tuner
=
RuleBasedTuner
(
dist_context
)
layers
=
tuner
.
group_operators
(
train_program
.
global_block
().
ops
)
op_types
=
[]
for
layer
in
layers
:
tmp
=
[]
for
op
in
layer
:
tmp
.
append
(
op
.
type
)
op_types
.
append
(
tmp
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录