Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d3fed5b8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d3fed5b8
编写于
8月 03, 2021
作者:
L
lelelelelez
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
increase parallel tests;notest;test=coverage;test=py3
上级
af886995
变更
3
展开全部
显示空白变更内容
内联
并排
Showing
3 changed file
with
831 addition
and
81 deletion
+831
-81
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+97
-36
python/paddle/fluid/tests/unittests/test_matmul_op.py
python/paddle/fluid/tests/unittests/test_matmul_op.py
+3
-39
tools/parallel_UT_rule.py
tools/parallel_UT_rule.py
+731
-6
未找到文件。
paddle/scripts/paddle_build.sh
浏览文件 @
d3fed5b8
...
...
@@ -1030,6 +1030,7 @@ function get_quickly_disable_ut() {
function
card_test
()
{
set
-m
CTEST_PARALLEL_LEVEL
=
2
case_count
$1
$2
ut_startTime_s
=
`
date
+%s
`
...
...
@@ -1098,10 +1099,8 @@ function card_test() {
ut_endTime_s
=
`
date
+%s
`
if
((
$2
==
-1
))
;
then
echo
"exclusive TestCases Total Time:
$[
$ut_endTime_s
-
$ut_startTime_s
]s"
echo
"ipipe_log_param_Exclusive_TestCases_Total_Time:
$[
$ut_endTime_s
-
$ut_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
else
echo
"
$2
card TestCases Total Time:
$[
$ut_endTime_s
-
$ut_startTime_s
]s"
echo
"ipipe_log_param_
${
2
}
_Cards_TestCases_Total_Time:
$[
$ut_endTime_s
-
$ut_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
fi
set
+m
}
...
...
@@ -1153,13 +1152,17 @@ set +x
test_cases
=
$(
ctest
-N
-V
)
# get all test cases
# Note(zhouwei): Parallel runs are relative to 'CTEST_PARALLEL_LEVEL', e.g: '4 job each time' means 4*CTEST_PARALLEL_LEVEL
single_card_tests_high_parallel
=
'^job$'
# cases list which would run the most job each time with single GPU
single_card_tests_two_parallel
=
'^job$'
# cases list which would run 2 job each time with single GPU
single_card_tests_secondary_high_parallel
=
'^job$'
single_card_tests_secondary_high_parallel_1
=
'^job$'
single_card_tests_tetrad_parallel
=
'^job$'
# cases list which would run 2 job each time with single GPU
#single_card_tests_secondary_tetrad_parallel='^job$'
single_card_tests_non_parallel
=
'^job$'
# cases list which would run 1 job each time with single GPU
single_card_tests
=
'^job$'
# all cases list which would take single GPU
multiple_card_tests_two_parallel
=
'^job$'
# cases list which would run 2 job each time with multiple GPUs, most cases would be two GPUs
multiple_card_tests_non_parallel
=
'^job$'
# cases list which would run 1 job each time with multiple GPUs, most cases would be two GPUs
exclusive_tests_high_parallel
=
'^job$'
exclusive_tests_two_parallel
=
'^job$'
# cases list which would run 2 job exclusively(with all GPUs)
exclusive_tests_non_parallel
=
'^job$'
# cases list which would run 1 job exclusively(with all GPUs)
...
...
@@ -1171,9 +1174,12 @@ set +x
UT_list
=
$(
ctest
-N
|
awk
-F
': '
'{print $2}'
|
sed
'/^$/d'
|
sed
'$d'
)
output
=
$(
python
${
PADDLE_ROOT
}
/tools/parallel_UT_rule.py
"
${
UT_list
}
"
)
cpu_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
1
)
tetrad_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
2
)
two_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
3
)
non_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
4
)
secondary_cpu_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
2
)
secondary_cpu_parallel_job_1
=
$(
echo
$output
|
cut
-d
";"
-f
3
)
tetrad_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
4
)
#secondary_tetrad_parallel_job=$(echo $output | cut -d ";" -f 5)
two_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
5
)
non_parallel_job
=
$(
echo
$output
|
cut
-d
";"
-f
6
)
while
read
-r
line
;
do
if
[[
"
$line
"
==
""
]]
;
then
continue
...
...
@@ -1215,13 +1221,15 @@ set +x
fi
if
[[
"
$is_exclusive
"
!=
""
]]
;
then
if
[[
$(
echo
$cpu_parallel_job$tetrad_parallel_job$two_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
if
[[
$(
echo
$cpu_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
exclusive_tests_high_parallel
=
"
$exclusive_tests_high_parallel
|^
$testcase
$"
elif
[[
$(
echo
$tetrad_parallel_job$two_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
exclusive_tests_two_parallel
=
"
$exclusive_tests_two_parallel
|^
$testcase
$"
else
exclusive_tests_non_parallel
=
"
$exclusive_tests_non_parallel
|^
$testcase
$"
fi
elif
[[
"
$is_multicard
"
!=
""
]]
;
then
if
[[
$(
echo
$cpu_parallel_job$tetrad_parallel_job
$two_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
if
[[
$(
echo
$cpu_parallel_job$tetrad_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
multiple_card_tests_two_parallel
=
"
$multiple_card_tests_two_parallel
|^
$testcase
$"
else
multiple_card_tests_non_parallel
=
"
$multiple_card_tests_non_parallel
|^
$testcase
$"
...
...
@@ -1229,8 +1237,14 @@ set +x
else
if
[[
$(
echo
$cpu_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
single_card_tests_high_parallel
=
"
$single_card_tests_high_parallel
|^
$testcase
$"
elif
[[
$(
echo
$secondary_cpu_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
single_card_tests_secondary_high_parallel
=
"
$single_card_tests_secondary_high_parallel
|^
$testcase
$"
elif
[[
$(
echo
$secondary_cpu_parallel_job_1
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
single_card_tests_secondary_high_parallel_1
=
"
$single_card_tests_secondary_high_parallel_1
|^
$testcase
$"
elif
[[
$(
echo
$tetrad_parallel_job$two_parallel_job
|
grep
-o
"
\^
$testcase
\\
$"
)
!=
""
]]
;
then
single_card_tests_two_parallel
=
"
$single_card_tests_two_parallel
|^
$testcase
$"
single_card_tests_tetrad_parallel
=
"
$single_card_tests_tetrad_parallel
|^
$testcase
$"
#elif [[ $(echo $secondary_tetrad_parallel_job | grep -o "\^$testcase\\$") != "" ]]; then
# single_card_tests_secondary_tetrad_parallel="$single_card_tests_secondary_tetrad_parallel|^$testcase$"
else
single_card_tests_non_parallel
=
"
$single_card_tests_non_parallel
|^
$testcase
$"
fi
...
...
@@ -1243,23 +1257,43 @@ set +x
testcase
=
''
done
<<<
"
$test_cases
"
;
card_test
"
$single_card_tests_high_parallel
"
1 6
# run cases the most each time with single GPU
card_test
"
$single_card_tests_two_parallel
"
1 2
# run cases 2 job each time with single GPU
card_test
"
$single_card_tests_non_parallel
"
1
# run cases 1 job each time with single GPU
ut_actual_total_startTime_s
=
`
date
+%s
`
single_ut_startTime_s
=
`
date
+%s
`
card_test
"
$single_card_tests_high_parallel
"
1 24
# run cases the most each time with single GPU
card_test
"
$single_card_tests_secondary_high_parallel
"
1 12
card_test
"
$single_card_tests_secondary_high_parallel_1
"
1 15
card_test
"
$single_card_tests_tetrad_parallel
"
1 7
# run cases 2 job each time with single GPU
#####card_test "$single_card_tests_secondary_tetrad_parallel" 1 6
card_test
"
$single_card_tests_non_parallel
"
1 2
# run cases 1 job each time with single GPU
single_ut_endTime_s
=
`
date
+%s
`
multi_ut_startTime_s
=
`
date
+%s
`
card_test
"
$multiple_card_tests_two_parallel
"
2 4
# run cases 2 job each time with two GPUs
card_test
"
$multiple_card_tests_non_parallel
"
2 2
# run cases 1 job each time with two GPUs
multi_ut_endTime_s
=
`
date
+%s
`
exclu_ut_startTime_s
=
`
date
+%s
`
card_test
"
$exclusive_tests_high_parallel
"
-1
5
card_test
"
$exclusive_tests_two_parallel
"
-1
3
# run cases exclusively, in this cases would be run with 2/4/8 GPUs
card_test
"
$exclusive_tests_non_parallel
"
-1
2
# run cases exclusively, in this cases would be run with 2/4/8 GPUs
exclu_ut_endTime_s
=
`
date
+%s
`
card_test
"
$multiple_card_tests_two_parallel
"
2 2
# run cases 2 job each time with two GPUs
card_test
"
$multiple_card_tests_non_parallel
"
2
# run cases 1 job each time with two GPUs
echo
"ipipe_log_param_1_TestCases_Total_Time:
$[
$single_ut_endTime_s
-
$single_ut_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
echo
"ipipe_log_param_2_TestCases_Total_Time:
$[
$multi_ut_endTime_s
-
$multi_ut_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
echo
"ipipe_log_param_Exclusive_TestCases_Total_Time:
$[
$exclu_ut_endTime_s
-
$exclu_ut_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
card_test
"
$exclusive_tests_two_parallel
"
-1
2
# run cases exclusively, in this cases would be run with 2/4/8 GPUs
card_test
"
$exclusive_tests_non_parallel
"
-1
# run cases exclusively, in this cases would be run with 2/4/8 GPUs
collect_failed_tests
rm
-f
$tmp_dir
/
*
exec_times
=
0
retry_unittests_record
=
''
retry_time
=
3
exec_time_array
=(
'first'
'second'
'third'
)
retry_time
=
4
exec_time_array
=(
'first'
'second'
'third'
'fourth'
)
parallel_failed_tests_exec_retry_threshold
=
80
exec_retry_threshold
=
10
is_retry_execuate
=
0
rerun_ut_startTime_s
=
`
date
+%s
`
if
[
-n
"
$failed_test_lists
"
]
;
then
if
[
${
TIMEOUT_DEBUG_HELP
:-
OFF
}
==
"ON"
]
;
then
bash
$PADDLE_ROOT
/tools/timeout_debug_help.sh
"
$failed_test_lists
"
# cat logs for tiemout uts which killed by ctest
...
...
@@ -1268,14 +1302,30 @@ set +x
need_retry_ut_arr
=(
${
need_retry_ut_str
}
)
need_retry_ut_count
=
${#
need_retry_ut_arr
[@]
}
read
retry_unittests
<<<
$(
echo
"
$failed_test_lists
"
|
grep
-oEi
"
\-
.+
\(
.+
\)
"
|
sed
's/(.\+)//'
|
sed
's/- //'
)
if
[
$need_retry_ut_count
-lt
$exec_retry_threshold
]
;
then
while
(
[
$exec_times
-lt
$retry_time
]
)
do
if
[[
"
${
exec_times
}
"
==
"0"
]]
;
then
if
[
$need_retry_ut_count
-lt
$parallel_failed_tests_exec_retry_threshold
]
;
then
is_retry_execuate
=
0
else
is_retry_execuate
=
1
fi
elif
[[
"
${
exec_times
}
"
==
"1"
]]
;
then
read
need_retry_ut_str
<<<
$(
echo
"
$failed_test_lists
"
|
grep
-oEi
"
\-
.+
\(
.+
\)
"
|
sed
's/(.\+)//'
|
sed
's/- //'
)
need_retry_ut_arr
=(
${
need_retry_ut_str
}
)
need_retry_ut_count
=
${#
need_retry_ut_arr
[@]
}
if
[
$need_retry_ut_count
-lt
$exec_retry_threshold
]
;
then
is_retry_execuate
=
0
else
is_retry_execuate
=
1
fi
fi
if
[[
"
$is_retry_execuate
"
==
"0"
]]
;
then
set
+e
retry_unittests_record
=
"
$retry_unittests_record$failed_test_lists
"
failed_test_lists_ult
=
`
echo
"
${
failed_test_lists
}
"
|grep
-Po
'[^ ].*$'
`
set
-e
if
[[
"
${
exec_times
}
"
==
"1"
]]
;
then
if
[[
"
${
exec_times
}
"
==
"1"
]]
||
[[
"
${
exec_times
}
"
==
"3"
]]
;
then
if
[[
"
${
failed_test_lists
}
"
==
""
]]
;
then
break
else
...
...
@@ -1287,10 +1337,8 @@ set +x
echo
"========================================="
echo
"The following unittest will be re-run:"
echo
"
${
retry_unittests
}
"
for
line
in
${
retry_unittests
[@]
}
;
do
read
tmp_one_tmp
<<<
"
$(
echo
$single_card_tests
|
grep
-oEi
$line
)
"
read
tmp_mul_tmp
<<<
"
$(
echo
$multiple_card_tests
|
grep
-oEi
$line
)
"
read
exclusive_tmp
<<<
"
$(
echo
$exclusive_tests
|
grep
-oEi
$line
)
"
...
...
@@ -1318,7 +1366,7 @@ set +x
done
if
[[
"
$one_card_retry
"
!=
""
]]
;
then
card_test
"
$one_card_retry
"
1
card_test
"
$one_card_retry
"
1
4
fi
if
[[
"
$multiple_card_retry
"
!=
""
]]
;
then
...
...
@@ -1328,7 +1376,6 @@ set +x
if
[[
"
$exclusive_retry
"
!=
""
]]
;
then
card_test
"
$exclusive_retry
"
-1
fi
exec_times
=
$[$exec_times
+1]
failed_test_lists
=
''
collect_failed_tests
...
...
@@ -1336,13 +1383,14 @@ set +x
one_card_retry
=
''
multiple_card_retry
=
''
exclusive_retry
=
''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate
=
1
fi
done
fi
rerun_ut_endTime_s
=
`
date
+%s
`
echo
"ipipe_log_param_Rerun_TestCases_Total_Time:
$[
$rerun_ut_endTime_s
-
$rerun_ut_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
ut_actual_total_endTime_s
=
`
date
+%s
`
echo
"ipipe_log_param_actual_TestCases_Total_Time:
$[
$ut_actual_total_endTime_s
-
$ut_actual_total_startTime_s
]s"
>>
${
PADDLE_ROOT
}
/build/build_summary.txt
if
[[
"
$EXIT_CODE
"
!=
"0"
]]
;
then
show_ut_retry_result
fi
...
...
@@ -1351,7 +1399,20 @@ set -ex
}
function
show_ut_retry_result
()
{
if
[[
"
$is_retry_execuate
"
!=
"0"
]]
;
then
if
[
"
$SYSTEM
"
==
"Darwin"
]
;
then
exec_retry_threshold_count
=
10
else
exec_retry_threshold_count
=
80
fi
if
[[
"
$is_retry_execuate
"
!=
"0"
]]
&&
[[
"
${
exec_times
}
"
==
"0"
]]
;
then
failed_test_lists_ult
=
`
echo
"
${
failed_test_lists
}
"
|
grep
-Po
'[^ ].*$'
`
echo
"========================================="
echo
"There are more than
${
exec_retry_threshold_count
}
failed unit tests in parallel test, so no unit test retry!!!"
echo
"========================================="
echo
"The following tests FAILED: "
echo
"
${
failed_test_lists_ult
}
"
exit
8
;
elif
[[
"
$is_retry_execuate
"
!=
"0"
]]
&&
[[
"
${
exec_times
}
"
==
"1"
]]
;
then
failed_test_lists_ult
=
`
echo
"
${
failed_test_lists
}
"
|
grep
-Po
'[^ ].*$'
`
echo
"========================================="
echo
"There are more than 10 failed unit tests, so no unit test retry!!!"
...
...
@@ -2291,8 +2352,8 @@ function main() {
cmake_gen_and_build
${
PYTHON_ABI
:-
""
}
${
parallel_number
}
enable_unused_var_check
parallel_test
check_coverage
check_change_of_unittest
${
PYTHON_ABI
:-
""
}
#
check_coverage
#
check_change_of_unittest ${PYTHON_ABI:-""}
;;
cpu_cicheck_coverage
)
check_approvals_of_unittest 1
...
...
python/paddle/fluid/tests/unittests/test_matmul_op.py
浏览文件 @
d3fed5b8
...
...
@@ -206,44 +206,8 @@ for dim_X in (1, 2, 3):
api_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
)
# Test case more batch_size and N, M, K
def
generate_compatible_shapes
(
dim_X
,
dim_Y
,
transpose_X
,
transpose_Y
,
batch_size
):
BATCH_SIZE
=
2
M
=
3
N
=
4
K
=
5
if
(
dim_X
==
1
and
transpose_X
)
or
(
dim_Y
==
1
and
transpose_Y
):
K
=
1
if
dim_X
==
1
:
if
transpose_X
:
shape_X
=
[
M
]
else
:
shape_X
=
[
K
]
if
dim_Y
==
1
:
if
transpose_Y
:
shape_Y
=
[
N
]
else
:
shape_Y
=
[
K
]
if
dim_X
>=
2
:
if
transpose_X
:
shape_X
=
[
K
,
M
]
else
:
shape_X
=
[
M
,
K
]
if
dim_X
==
3
:
shape_X
=
[
BATCH_SIZE
]
+
shape_X
if
dim_Y
>=
2
:
if
transpose_Y
:
shape_Y
=
[
N
,
K
]
else
:
shape_Y
=
[
K
,
N
]
if
dim_Y
==
3
:
shape_Y
=
[
BATCH_SIZE
]
+
shape_Y
return
shape_X
,
shape_Y
# Test case n-dim
def
generate_compatible_shapes
(
dim
,
transpose_X
,
transpose_Y
):
def
generate_compatible_shapes
_ndim
(
dim
,
transpose_X
,
transpose_Y
):
M
=
2
N
=
4
K
=
3
...
...
@@ -270,7 +234,7 @@ for dim in [4]:
test_name
=
(
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'
.
format
(
dim
,
dim
,
transpose_X
,
transpose_Y
))
shape_X
,
shape_Y
=
generate_compatible_shapes
(
dim
,
transpose_X
,
shape_X
,
shape_Y
=
generate_compatible_shapes
_ndim
(
dim
,
transpose_X
,
transpose_Y
)
globals
()[
test_name
]
=
type
(
test_name
,
(
Generator
,
OpTest
),
{
'shape_X'
:
shape_X
,
...
...
tools/parallel_UT_rule.py
浏览文件 @
d3fed5b8
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录