Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
akg
提交
3713f94c
A
akg
项目概览
MindSpore
/
akg
通知
59
Star
7
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
akg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3713f94c
编写于
7月 17, 2020
作者:
C
chenlei_autodiff
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix matmul tuning and support all space tuning.
上级
6335daad
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
85 addition
and
29 deletion
+85
-29
tests/fuzz/tune/autotuning/job.py
tests/fuzz/tune/autotuning/job.py
+32
-18
tests/fuzz/tune/autotuning/kernel_compiler.py
tests/fuzz/tune/autotuning/kernel_compiler.py
+2
-1
tests/fuzz/tune/autotuning/runner.py
tests/fuzz/tune/autotuning/runner.py
+16
-1
tests/fuzz/tune/autotuning/space.py
tests/fuzz/tune/autotuning/space.py
+9
-0
tests/fuzz/tune/autotuning/test_data_generators.py
tests/fuzz/tune/autotuning/test_data_generators.py
+4
-4
tests/fuzz/tune/autotuning/tuner.py
tests/fuzz/tune/autotuning/tuner.py
+15
-3
tests/fuzz/tune/autotuning/type_definitions.py
tests/fuzz/tune/autotuning/type_definitions.py
+1
-1
tests/fuzz/tune/test.py
tests/fuzz/tune/test.py
+6
-1
未找到文件。
tests/fuzz/tune/autotuning/job.py
浏览文件 @
3713f94c
...
...
@@ -15,22 +15,23 @@
"""AutoTuning job"""
import
os
import
json
import
time
import
datetime
import
importlib
import
logging
import
subprocess
import
numpy
as
np
from
collections
import
namedtuple
from
akg
import
composite
from
akg.utils
import
kernel_exec
as
utils
from
autotuning.runner
import
KernelRunner
,
error_time_list
,
error_time_string
from
autotuning.tuner
import
ModelBasedTuner
from
autotuning.tuner
import
ModelBasedTuner
,
Tuner
from
autotuning.type_definitions
import
ConvDesc
,
ConvBackpropDesc
,
MatmulCubeDesc
from
autotuning.space_generators
import
get_space
from
autotuning.space
import
ListConfigSpace
from
autotuning.test_data_generators
import
gen_data
logging
.
basicConfig
(
level
=
logging
.
DEBUG
,
format
=
'%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
)
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
logger
=
logging
.
getLogger
(
'fuzz.tune.autotuning.job'
)
...
...
@@ -92,11 +93,16 @@ def launch_json(debug_mode: bool = True, save_res: bool = False, json_input_dir=
if
save_res
:
save_tuning_result
(
key
,
"json"
,
None
,
index_table
,
tuner
)
def
jobs
(
op_type
:
str
=
'add'
,
desc
=
None
,
debug_mode
:
bool
=
True
,
save_res
:
bool
=
Fals
e
,
insert_key
=
''
,
conf_of_set_dim
=
""
):
def
jobs
(
op_type
:
str
=
'add'
,
desc
=
None
,
debug_mode
:
bool
=
True
,
save_res
:
bool
=
False
,
all_space
:
bool
=
Tru
e
,
insert_key
=
''
,
conf_of_set_dim
=
""
):
"""AutoTuning jobs"""
iter_times
=
[
3
,
3
,
3
]
if
debug_mode
else
[
80
,
160
,
320
]
time_start_get_space
=
time
.
time
()
index_table
,
space
,
key
,
expect
,
input_for_mod
=
get_space
(
op_type
,
desc
)
if
all_space
:
iter_times
=
[
space
.
length
,
space
.
length
,
space
.
length
]
time_end_get_space
=
time
.
time
()
print
(
"get space time: "
,
time_end_get_space
-
time_start_get_space
)
print
(
'space size:'
,
space
.
length
)
print
(
'index table:'
,
index_table
)
key
=
key
if
insert_key
==
''
else
insert_key
...
...
@@ -121,12 +127,18 @@ def jobs(op_type: str = 'add', desc=None, debug_mode: bool = True,
# available device numbers, normally is 8 or 1
available_device_numbers
=
utils
.
get_available_devices_num
()
time_start_tuning
=
time
.
time
()
if
all_space
:
tuner
=
Tuner
(
runner
,
index_table
,
space
,
n_parallel
=
available_device_numbers
)
else
:
tuner
=
ModelBasedTuner
(
runner
,
index_table
,
space
,
n_parallel
=
available_device_numbers
if
is_truly_profiling
else
1
,
plan_size
=
64
,
pre_model
=
None
)
least_try_times
=
iter_times
[
0
if
space
.
length
<
10
**
4
else
1
if
space
.
length
<
10
**
5
else
2
]
tuner
.
tune
(
least_try_times
,
output_file
=
op_type
+
".log"
)
time_end_tuning
=
time
.
time
()
print
(
"tuning time: "
,
time_end_tuning
-
time_start_tuning
)
print_tuning_result
(
op_type
,
space
,
index_table
,
tuner
,
key
)
if
save_res
:
...
...
@@ -231,46 +243,48 @@ def load_json_configs(op_type):
return
{}
return
{}
def
read_shapes_from_file
(
debug_mode
,
save_res
,
conf_of_set_dim
,
op_type
):
def
read_shapes_from_file
(
debug_mode
,
save_res
,
all_space
,
conf_of_set_dim
,
op_type
):
"""read tuning shapes from file"""
file
=
importlib
.
import_module
(
'autotuning.shapes.'
+
op_type
)
shapes
=
file
.
shapes
for
_
,
shp
in
enumerate
(
shapes
):
do_profiling
(
shp
,
debug_mode
,
save_res
,
op_type
,
conf_of_set_dim
)
do_profiling
(
shp
,
debug_mode
,
save_res
,
all_space
,
op_type
,
conf_of_set_dim
)
def
do_profiling
(
shp
,
debug_mode
,
save_res
,
op_type
,
conf_of_set_dim
=
None
):
def
do_profiling
(
shp
,
debug_mode
,
save_res
,
all_space
,
op_type
,
conf_of_set_dim
=
None
):
"""do profiling"""
# remove undeleted JOB files for previous shapes
subprocess
.
run
(
"rm -rf /var/log/npu/profiling/JOB*"
,
shell
=
True
)
if
op_type
==
'matmul'
:
key
=
shp
[
2
][
0
:
-
1
]
logger
.
debug
(
"start profiling: [%s]"
,
str
(
key
))
desc
=
MatmulCubeDesc
(
*
key
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
key
.
__str__
(),
conf_of_set_dim
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
all_space
,
key
.
__str__
(),
conf_of_set_dim
)
logger
.
debug
(
"end profiling: [%s]"
,
str
(
key
))
elif
op_type
.
startswith
(
'conv_backprop'
):
key
=
shp
[
2
]
logger
.
debug
(
"start profiling: [%s]"
,
str
(
key
))
desc
=
ConvBackpropDesc
(
*
key
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
key
.
__str__
(),
conf_of_set_dim
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
all_space
,
key
.
__str__
(),
conf_of_set_dim
)
logger
.
debug
(
"end profiling: [%s]"
,
str
(
key
))
elif
op_type
.
startswith
(
'conv'
):
key
=
shp
[
2
]
logger
.
debug
(
"start profiling: [%s]"
,
str
(
key
))
desc
=
ConvDesc
(
*
key
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
key
.
__str__
(),
conf_of_set_dim
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
all_space
,
key
.
__str__
(),
conf_of_set_dim
)
logger
.
debug
(
"end profiling: [%s]"
,
str
(
key
))
else
:
key
=
shp
logger
.
debug
(
"start profiling: [%s]"
,
str
(
key
))
desc
=
key
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
conf_of_set_dim
=
conf_of_set_dim
)
jobs
(
op_type
,
desc
,
debug_mode
,
save_res
,
all_space
,
conf_of_set_dim
=
conf_of_set_dim
)
logger
.
debug
(
"end profiling: [%s]"
,
str
(
key
))
def
launch
(
op_type
,
debug_mode
,
save_res
=
False
,
desc
=
None
):
def
launch
(
op_type
,
debug_mode
,
save_res
=
False
,
desc
=
None
,
all_space
=
False
):
# get the existed tiling
conf_of_set_dim
=
load_json_configs
(
op_type
)
if
desc
is
None
:
read_shapes_from_file
(
debug_mode
,
save_res
,
conf_of_set_dim
,
op_type
)
read_shapes_from_file
(
debug_mode
,
save_res
,
all_space
,
conf_of_set_dim
,
op_type
)
else
:
shp
=
desc
do_profiling
(
shp
,
debug_mode
,
save_res
,
op_type
)
do_profiling
(
shp
,
debug_mode
,
save_res
,
all_space
,
op_type
)
tests/fuzz/tune/autotuning/kernel_compiler.py
浏览文件 @
3713f94c
...
...
@@ -115,7 +115,8 @@ def gen_kernel_matmul_cube(op_desc: MatmulCubeDesc, _, index_table,
attrs
=
{
'dim'
:
dim_info
,
'bypass'
:
config
.
bypass
}
return
matmul_run
.
matmul_compile
(
op_desc
.
x_shape
,
op_desc
.
y_shape
,
op_desc
.
bias
,
op_desc
.
left_format
,
op_desc
.
right_format
,
op_desc
.
out_format
,
op_desc
.
adj_x
,
op_desc
.
adj_y
,
op_desc
.
dtype
,
op_desc
.
out_dtype
,
kernel_name
,
attrs
,
gen_tiling_spaces
)
op_desc
.
dtype
,
op_desc
.
bias_dtype
,
op_desc
.
out_dtype
,
kernel_name
,
attrs
,
tuning
=
gen_tiling_spaces
)
def
gen_kernel_conv_backprop_input
(
op_desc
:
ConvBackpropDesc
,
_
,
index_table
,
config
:
ConvBackpropInputConfig
=
None
,
...
...
tests/fuzz/tune/autotuning/runner.py
浏览文件 @
3713f94c
...
...
@@ -18,6 +18,7 @@ import multiprocessing
import
logging
import
os
import
subprocess
import
time
from
typing
import
NamedTuple
import
numpy
as
np
from
akg
import
composite
...
...
@@ -86,8 +87,10 @@ class KernelRunner:
def
run_one_kernel
(
self
,
run_times
,
idx
,
config
,
best_time
=
np
.
inf
,
is_auto
=
False
):
"""Compile and execute a config of the operator on device"""
time_one_kernel_start
=
time
.
time
()
logger
.
debug
(
'compile %dth kernel'
,
idx
)
try
:
time_start_build
=
time
.
time
()
if
self
.
op_type
==
"json"
:
if
is_auto
:
mod
=
composite
.
build
(
self
.
op_desc
)
...
...
@@ -105,6 +108,8 @@ class KernelRunner:
else
:
mod
=
compile_kernel
(
self
.
op_type
,
self
.
op_desc
,
self
.
input_shape
,
self
.
_index_table
,
None
if
is_auto
else
config
.
input
,
idx
)
time_end_build
=
time
.
time
()
logger
.
debug
(
"build module time: %f"
,
time_end_build
-
time_start_build
)
logger
.
debug
(
'finished compile %dth kernel'
,
idx
)
except
BaseException
as
e
:
logger
.
debug
(
"Compile Failed: [%s] : %s"
,
"origin"
if
is_auto
else
str
(
config
.
input
),
str
(
e
))
...
...
@@ -127,6 +132,7 @@ class KernelRunner:
for
_
in
range
(
self
.
repeat_times
):
stat_info
=
{}
try
:
time_start_launch
=
time
.
time
()
if
self
.
mod_output_param
is
not
None
:
output
,
stat_info
=
utils
.
mod_launch
(
mod
,
list
(
self
.
input
),
self
.
mod_output_param
,
tuning
=
True
,
device_id
=
device_id
)
...
...
@@ -144,18 +150,24 @@ class KernelRunner:
stat_info
[
'run_time'
]
=
precision_error_time
logger
.
debug
(
"Precision Error: [%s]"
,
"origin"
if
config
is
None
else
str
(
config
.
input
))
time_end_launch
=
time
.
time
()
logger
.
debug
(
"mod launch time: %f"
,
time_end_launch
-
time_start_launch
)
except
BaseException
as
e
:
logger
.
debug
(
"Run Failed: [%s] : %s"
,
str
(
config
.
input
),
str
(
e
))
stat_info
[
'run_time'
]
=
run_failed_time
run_times
[
idx
]
=
np
.
minimum
(
run_times
[
idx
],
stat_info
[
'run_time'
])
finally
:
logger
.
debug
(
'end of %dth kernel'
,
idx
)
time_one_kernel_end
=
time
.
time
()
logger
.
debug
(
'run one kernel time: %f'
,
time_one_kernel_end
-
time_one_kernel_start
)
return
def
run
(
self
,
configs
,
best_time
=
np
.
inf
,
is_auto_set_dim
=
False
):
def
run
(
self
,
configs
,
best_time
=
np
.
inf
,
is_auto_set_dim
=
False
,
all_space
=
False
):
"""Compile and execute a batch config of the operator on device"""
start
=
time
.
time
()
logger
.
setLevel
(
logging
.
DEBUG
)
logger
.
debug
(
"gen cce kernels batch: %d kernels"
,
len
(
configs
))
subprocess
.
run
(
"rm -rf ./jobs/JOB*"
,
shell
=
True
)
process_jobs
=
[]
run_times
=
multiprocessing
.
Manager
().
list
(
np
.
full
((
len
(
configs
),),
compile_fail_time
))
for
idx
,
config
in
enumerate
(
configs
):
...
...
@@ -173,6 +185,8 @@ class KernelRunner:
run_times
[
idx
]
=
timeout_time
p
.
terminate
()
process_end
=
time
.
time
()
logger
.
debug
(
"process time: %f"
,
process_end
-
start
)
# clean the profiling directory
tune_device
=
int
(
os
.
environ
[
'DEVICE_ID'
])
tune_num
=
int
(
os
.
environ
[
'DEVICE_TOTAL_NUM'
])
...
...
@@ -206,6 +220,7 @@ class KernelRunner:
job_file
=
p
[
0
].
decode
(
'utf8'
).
strip
().
split
(
'/'
)[
-
2
]
subprocess
.
run
(
"rm -rf ./jobs/%s"
%
job_file
,
shell
=
True
)
end
=
time
.
time
()
logger
.
debug
(
"run kernels time: %f"
,
end
-
start
)
self
.
run_kernel_time
+=
end
-
start
for
idx
,
config
in
enumerate
(
configs
):
...
...
tests/fuzz/tune/autotuning/space.py
浏览文件 @
3713f94c
...
...
@@ -161,6 +161,9 @@ class ListConfigSpace(ConfigSpace):
"""reset fetch state"""
self
.
__fetch_pool
=
[
i
for
i
in
range
(
len
(
self
.
_configs
))]
def
fetch_scope
(
self
,
start
,
end
):
self
.
__fetch_pool
=
[
i
for
i
in
range
(
start
,
end
)]
def
has_next
(
self
)
->
bool
:
return
len
(
self
.
__fetch_pool
)
>
0
...
...
@@ -172,6 +175,12 @@ class ListConfigSpace(ConfigSpace):
self
.
__fetch_pool
.
pop
()
return
ret
def
fetch_next_index
(
self
)
->
int
:
"""fetch next index of config"""
idx
=
len
(
self
.
__fetch_pool
)
-
1
+
self
.
__fetch_pool
[
0
]
self
.
__fetch_pool
.
pop
()
return
idx
def
fetch_config
(
self
)
->
ConfigEntity
:
"""fetch a random config"""
return
self
.
get
(
self
.
fetch_index
())
...
...
tests/fuzz/tune/autotuning/test_data_generators.py
浏览文件 @
3713f94c
...
...
@@ -107,10 +107,10 @@ def _gen_data_matmul_cube(op_desc: MatmulCubeDesc):
_
,
_
,
_
,
out_shape
,
k
=
matmul_run
.
get_converted_shapes
(
m
,
n
,
k
,
batch_tuple
,
op_desc
.
adj_x
,
op_desc
.
adj_y
,
op_desc
.
bias
,
op_desc
.
left_format
,
op_desc
.
right_format
,
op_desc
.
out_format
)
m_x
,
m_y
,
bench_mark
,
bias_data
=
matmul_run
.
matmul_data
(
batch_tuple
,
m
,
k
,
n
,
op_desc
.
dtype
,
op_desc
.
out
_dtype
,
op_desc
.
bias
,
op_desc
.
adj_x
,
op_desc
.
adj_y
,
op_desc
.
left_format
,
op_desc
.
righ
t_format
,
op_desc
.
out_format
)
m_x
,
m_y
,
bench_mark
,
bias_data
=
matmul_run
.
matmul_data
(
batch_tuple
,
m
,
k
,
n
,
op_desc
.
dtype
,
op_desc
.
bias
_dtype
,
op_desc
.
out_dtype
,
op_desc
.
bias
,
op_desc
.
adj_x
,
op_desc
.
adj_y
,
op_desc
.
lef
t_format
,
op_desc
.
right_format
,
op_desc
.
out_format
)
out_data
=
np
.
full
(
out_shape
,
np
.
nan
,
op_desc
.
out_dtype
)
...
...
tests/fuzz/tune/autotuning/tuner.py
浏览文件 @
3713f94c
...
...
@@ -93,7 +93,7 @@ class Tuner:
print
(
'tuning time:'
,
self
.
_tuning_time
,
'secs'
)
def
next_batch
(
self
,
batch_size
:
int
,
is_add_visited
=
True
):
"""extract next batch"""
"""extract next batch
with xgboost model
"""
ret
=
[]
counter
=
0
if
not
is_add_visited
:
...
...
@@ -116,6 +116,17 @@ class Tuner:
counter
+=
1
return
ret
def
next_config
(
self
,
batch_size
:
int
):
"""extract next config orderly"""
ret
=
[]
counter
=
0
while
counter
<
batch_size
and
self
.
_space
.
has_next
():
index
=
self
.
_space
.
fetch_next_index
()
ret
.
append
(
self
.
_space
.
get
(
index
))
self
.
_visited
.
add
(
index
)
counter
+=
1
return
ret
def
export_configs
(
self
,
configs
:
list
,
output_file
:
str
,
append
:
bool
=
True
,
desc
=
""
):
"""export configs"""
mode
=
"a"
if
append
else
"w"
...
...
@@ -158,13 +169,13 @@ class Tuner:
while
i
<
least_try_times
:
if
not
self
.
_space
.
has_next
():
break
configs
=
self
.
next_
batch
(
min
(
self
.
_n_parallel
,
least_try_times
-
i
))
configs
=
self
.
next_
config
(
min
(
self
.
_n_parallel
,
least_try_times
-
i
))
run_times
=
self
.
_runner
.
run
(
configs
,
self
.
_best_time
)
results
=
[]
for
idx
,
conf
in
enumerate
(
configs
):
results
.
append
((
conf
.
input_id
,
run_times
[
idx
]))
# keep best config
if
self
.
best_time
<
run_times
[
idx
]:
if
self
.
best_time
>
run_times
[
idx
]:
self
.
_best_time
=
run_times
[
idx
]
self
.
_best_iter
=
i
+
idx
self
.
_best_config
=
conf
...
...
@@ -224,6 +235,7 @@ class ModelBasedTuner(Tuner):
self
.
__least_try_times
=
least_try_times
self
.
__early_stopping
=
early_stopping
logger
.
setLevel
(
logging
.
DEBUG
)
old_level
=
logger
.
level
i
=
0
error_ct
=
0
...
...
tests/fuzz/tune/autotuning/type_definitions.py
浏览文件 @
3713f94c
...
...
@@ -21,7 +21,7 @@ ConvDesc = namedtuple("ConvDesc", ['fmap_shape', 'filter_shape', 'pad', 'stride'
ConvBackpropDesc
=
namedtuple
(
"ConvBackpropDesc"
,
[
'fmap_shape'
,
'filter_shape'
,
'pad'
,
'stride'
,
'dilation'
])
MatmulCubeDesc
=
namedtuple
(
"MatmulCubeDesc"
,
[
"x_shape"
,
"y_shape"
,
"bias"
,
"left_format"
,
"right_format"
,
"out_format"
,
"adj_x"
,
"adj_y"
,
"dtype"
,
"out_dtype"
])
"out_format"
,
"adj_x"
,
"adj_y"
,
"dtype"
,
"
bias_dtype"
,
"
out_dtype"
])
# config param definitions
ConvConfig
=
namedtuple
(
'ConvConfig'
,
[
'tile_h'
,
'tile_co'
,
'tile_m'
,
'tile_k'
,
'tile_n'
,
'tile_w'
,
'bypass'
])
...
...
tests/fuzz/tune/test.py
浏览文件 @
3713f94c
...
...
@@ -13,11 +13,16 @@
# limitations under the License.
"""test"""
import
time
from
autotuning.job
import
launch
from
test_run.sub_run
import
sub_execute
time_start
=
time
.
time
()
op_type_
=
'sub'
debug_mode_
=
True
save_res_
=
True
all_space_
=
False
desc_
=
(
'024_sub_64_16_128_128_64_16_128_128_fp16'
,
sub_execute
,
[(
64
,
16
,
128
,
128
),
(
64
,
16
,
128
,
1
),
'float16'
])
launch
(
op_type
=
op_type_
,
debug_mode
=
debug_mode_
,
save_res
=
save_res_
,
desc
=
desc_
)
launch
(
op_type
=
op_type_
,
debug_mode
=
debug_mode_
,
save_res
=
save_res_
,
desc
=
desc_
,
all_space
=
all_space_
)
time_end
=
time
.
time
()
print
(
"launch time: "
,
time_end
-
time_start
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录