Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
akg
提交
aaf77021
A
akg
项目概览
MindSpore
/
akg
通知
59
Star
7
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
akg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aaf77021
编写于
7月 14, 2020
作者:
D
dabaiji
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support dynamical memory allocation ratio adjustment in micro-tuning for allocation exceed problem
上级
d9da1e31
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
383 addition
and
212 deletion
+383
-212
python/akg/ops/array/four2five.py
python/akg/ops/array/four2five.py
+5
-4
src/codegen/build_module.cc
src/codegen/build_module.cc
+230
-202
src/codegen/util.cc
src/codegen/util.cc
+7
-1
src/codegen/util.h
src/codegen/util.h
+6
-1
src/include/build_module.h
src/include/build_module.h
+22
-0
src/pass/storage_rewrite_cce.cc
src/pass/storage_rewrite_cce.cc
+2
-2
src/poly/tiling_solver.cc
src/poly/tiling_solver.cc
+71
-1
src/poly/tiling_solver.h
src/poly/tiling_solver.h
+2
-0
src/poly/tiling_utils.cc
src/poly/tiling_utils.cc
+7
-0
src/poly/tiling_utils.h
src/poly/tiling_utils.h
+2
-1
tests/unittest/pass/test_micro_tuning.py
tests/unittest/pass/test_micro_tuning.py
+28
-0
tests/unittest/unittest.sh
tests/unittest/unittest.sh
+1
-0
未找到文件。
python/akg/ops/array/four2five.py
浏览文件 @
aaf77021
...
@@ -116,8 +116,8 @@ def four2five_tiling_strategy_dynamic(tensor, input_format):
...
@@ -116,8 +116,8 @@ def four2five_tiling_strategy_dynamic(tensor, input_format):
strategy
.
append
(
ct_util
.
create_constraint_on_tensor
(
tensor
,
16
,
ct_util
.
TileConstraint
.
FACTOR
,
4
)[
0
])
strategy
.
append
(
ct_util
.
create_constraint_on_tensor
(
tensor
,
16
,
ct_util
.
TileConstraint
.
FACTOR
,
4
)[
0
])
return
strategy
return
strategy
@
vc_util
.
check_input_type
(
akg
.
tvm
.
tensor
.
Tensor
,
str
,
str
)
@
vc_util
.
check_input_type
(
akg
.
tvm
.
tensor
.
Tensor
,
str
,
str
,
bool
)
def
four2five
(
data
,
format_
,
dst_dtype
=
'float16'
):
def
four2five
(
data
,
format_
,
dst_dtype
=
'float16'
,
need_custom_tiling
=
True
):
"""
"""
Convert 4-dims "data" to 5-dims,the format of "data" is defined in "format_"
Convert 4-dims "data" to 5-dims,the format of "data" is defined in "format_"
...
@@ -294,8 +294,9 @@ def four2five(data, format_, dst_dtype='float16'):
...
@@ -294,8 +294,9 @@ def four2five(data, format_, dst_dtype='float16'):
dim_info
,
_
=
four2five_set_dim_func
(
data
,
format_
,
dst_dtype
)
dim_info
,
_
=
four2five_set_dim_func
(
data
,
format_
,
dst_dtype
)
if
dim_info
!=
""
:
if
dim_info
!=
""
:
attrs
[
"dim"
]
=
dim_info
attrs
[
"dim"
]
=
dim_info
if
need_custom_tiling
:
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy
(
output
,
format_
,
expansion
)
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy
(
output
,
format_
,
expansion
)
el
se
:
el
if
need_custom_tiling
:
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy_dynamic
(
output
,
format_
)
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy_dynamic
(
output
,
format_
)
if
is_dynamic
:
if
is_dynamic
:
...
...
src/codegen/build_module.cc
浏览文件 @
aaf77021
...
@@ -458,7 +458,7 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
...
@@ -458,7 +458,7 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
PassTimer
*
pass_timer
=
PassTimer
::
GetInstance
();
PassTimer
*
pass_timer
=
PassTimer
::
GetInstance
();
global_attrs
.
Set
(
kKernelName
,
StringImm
::
make
(
name
));
global_attrs
.
Set
(
kKernelName
,
StringImm
::
make
(
name
));
global_attrs
.
Set
(
kDumpPassIr
,
ktvm
::
make_const
(
Int
(
1
),
config
->
dump_pass_ir
));
global_attrs
.
Set
(
kDumpPassIr
,
ktvm
::
make_const
(
Int
(
32
),
config
->
dump_pass_ir
));
if
(
config
->
dump_pass_ir
)
{
if
(
config
->
dump_pass_ir
)
{
std
::
string
dump_ir_dir
;
std
::
string
dump_ir_dir
;
if
(
global_attrs
.
GetStringAttr
(
kDumpIrDir
,
&
dump_ir_dir
))
{
if
(
global_attrs
.
GetStringAttr
(
kDumpIrDir
,
&
dump_ir_dir
))
{
...
@@ -498,7 +498,7 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
...
@@ -498,7 +498,7 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
stmt
=
NEXT_PASS
(
RenameRealize
,
stmt
,
binds_0
,
replace
);
stmt
=
NEXT_PASS
(
RenameRealize
,
stmt
,
binds_0
,
replace
);
bool
is_dynamic
=
!
shape_vars
.
empty
();
bool
is_dynamic
=
!
shape_vars
.
empty
();
global_attrs
.
Set
(
kIsDynamic
,
ktvm
::
make_const
(
Int
(
1
),
is_dynamic
));
global_attrs
.
Set
(
kIsDynamic
,
ktvm
::
make_const
(
Int
(
32
),
is_dynamic
));
Array
<
NodeRef
>
arg_list_1
;
Array
<
NodeRef
>
arg_list_1
;
Map
<
Tensor
,
Buffer
>
binds_1
;
Map
<
Tensor
,
Buffer
>
binds_1
;
...
@@ -594,7 +594,17 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
...
@@ -594,7 +594,17 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
NodeRef
tuning_spaces
=
NEXT_PASS
(
GenTuningSpace
,
stmt
,
binds_0
,
attrs_1
,
false
);
NodeRef
tuning_spaces
=
NEXT_PASS
(
GenTuningSpace
,
stmt
,
binds_0
,
attrs_1
,
false
);
return
tuning_spaces
;
return
tuning_spaces
;
}
}
Array
<
NodeRef
>
poly_res
=
NEXT_PASS
(
AutoPoly
,
stmt
,
binds_0
,
global_attrs
,
false
,
is_dynamic
);
}
// micro-tuning configs: current strategy is to retry autopoly up to 3 times when storage flatten/rewrite fails
bool
need_micro_tuning
=
!
aicpu
&&
polyhedral
&&
!
is_dynamic
&&
global_attrs
.
GetStringAttr
(
"dim"
,
""
).
empty
();
const
int
max_enter_poly_times
=
global_attrs
.
GetIntAttr
(
kMaxNumRetryPoly
,
need_micro_tuning
?
4
:
1
);
int
enter_count
=
0
;
Stmt
stmt_before_poly
=
stmt
;
while
(
enter_count
<
max_enter_poly_times
)
{
if
(
!
aicpu
&&
polyhedral
)
{
Array
<
NodeRef
>
poly_res
=
NEXT_PASS
(
AutoPoly
,
stmt_before_poly
,
binds_0
,
global_attrs
,
false
,
is_dynamic
);
enter_count
++
;
CHECK_EQ
(
poly_res
.
size
(),
2
);
CHECK_EQ
(
poly_res
.
size
(),
2
);
stmt
=
ktvm
::
Downcast
<
Stmt
>
(
poly_res
[
0
]);
stmt
=
ktvm
::
Downcast
<
Stmt
>
(
poly_res
[
0
]);
Array
<
ktvm
::
Var
>
tiling_params
=
ktvm
::
Downcast
<
Array
<
ktvm
::
Var
>>
(
poly_res
[
1
]);
Array
<
ktvm
::
Var
>
tiling_params
=
ktvm
::
Downcast
<
Array
<
ktvm
::
Var
>>
(
poly_res
[
1
]);
...
@@ -665,8 +675,15 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
...
@@ -665,8 +675,15 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
stmt
=
NEXT_PASS
(
ConvertIfToSelect
,
stmt
);
stmt
=
NEXT_PASS
(
ConvertIfToSelect
,
stmt
);
}
}
}
}
try
{
stmt
=
NEXT_PASS
(
StorageFlatten
,
stmt
,
binds_0
,
64
);
stmt
=
NEXT_PASS
(
StorageFlatten
,
stmt
,
binds_0
,
64
);
}
catch
(
const
std
::
runtime_error
&
e
)
{
if
(
enter_count
>=
max_enter_poly_times
)
{
CHECK
(
false
)
<<
e
.
what
();
}
global_attrs
.
Set
(
kErrorInfo
,
StringImm
::
make
(
e
.
what
()));
continue
;
}
stmt
=
NEXT_PASS
(
DmaFlatten
,
stmt
,
global_attrs
.
GetBoolAttr
(
kTileSizeIsVar
,
false
));
stmt
=
NEXT_PASS
(
DmaFlatten
,
stmt
,
global_attrs
.
GetBoolAttr
(
kTileSizeIsVar
,
false
));
if
(
global_attrs
.
GetBoolAttr
(
kAlgebraSimplify
,
false
)
&&
is_dynamic
)
{
if
(
global_attrs
.
GetBoolAttr
(
kAlgebraSimplify
,
false
)
&&
is_dynamic
)
{
stmt
=
NEXT_PASS
(
AlgebraSimplify
,
stmt
);
stmt
=
NEXT_PASS
(
AlgebraSimplify
,
stmt
);
...
@@ -814,7 +831,18 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
...
@@ -814,7 +831,18 @@ NodeRef Lower(Schedule sch, const Array<NodeRef> &in_args, const Array<NodeRef>
bool
bc_no_limits
=
false
;
bool
bc_no_limits
=
false
;
// timeout for MaxSAT solver in seconds (int)
// timeout for MaxSAT solver in seconds (int)
int
maxsat_timeout
=
4
;
int
maxsat_timeout
=
4
;
try
{
stmt
=
NEXT_PASS
(
StorageRewriteCCE
,
stmt
,
maxsat_filename
,
use_bc_opt
,
bc_no_limits
,
maxsat_timeout
);
stmt
=
NEXT_PASS
(
StorageRewriteCCE
,
stmt
,
maxsat_filename
,
use_bc_opt
,
bc_no_limits
,
maxsat_timeout
);
}
catch
(
MemoryAllocationException
&
e
)
{
if
(
enter_count
>=
max_enter_poly_times
)
{
CHECK
(
false
)
<<
e
.
what
();
}
global_attrs
.
Set
(
kAllocBits
,
ktvm
::
make_const
(
Int
(
32
),
e
.
alloc_bits_
+
e
.
need_bits_
));
global_attrs
.
Set
(
kErrorScope
,
StringImm
::
make
(
e
.
scope_
));
continue
;
}
break
;
}
if
(
!
is_dynamic
)
if
(
!
is_dynamic
)
stmt
=
NEXT_PASS
(
UnrollLoop
,
stmt
,
config
->
auto_unroll_max_step
,
config
->
auto_unroll_max_depth
,
stmt
=
NEXT_PASS
(
UnrollLoop
,
stmt
,
config
->
auto_unroll_max_step
,
config
->
auto_unroll_max_depth
,
...
...
src/codegen/util.cc
浏览文件 @
aaf77021
...
@@ -98,7 +98,13 @@ int AttrMap::GetIntAttr(const std::string &attr_name, int dft_value) {
...
@@ -98,7 +98,13 @@ int AttrMap::GetIntAttr(const std::string &attr_name, int dft_value) {
const
NodeRef
&
e
=
this
->
at
(
attr_name
);
const
NodeRef
&
e
=
this
->
at
(
attr_name
);
return
ir
::
GetInt32Const
(
Downcast
<
Expr
>
(
e
));
return
ir
::
GetInt32Const
(
Downcast
<
Expr
>
(
e
));
}
}
double
AttrMap
::
GetFloatAttr
(
const
std
::
string
&
attr_name
,
double
dft_value
)
{
if
(
this
->
count
(
attr_name
)
==
0
)
{
return
dft_value
;
}
const
NodeRef
&
e
=
this
->
at
(
attr_name
);
return
ir
::
GetFloatConst
(
Downcast
<
Expr
>
(
e
));
}
bool
AttrMap
::
GetBoolAttr
(
const
std
::
string
&
attr_name
,
bool
dft_value
)
{
bool
AttrMap
::
GetBoolAttr
(
const
std
::
string
&
attr_name
,
bool
dft_value
)
{
int
result
=
GetIntAttr
(
attr_name
,
static_cast
<
int
>
(
dft_value
));
int
result
=
GetIntAttr
(
attr_name
,
static_cast
<
int
>
(
dft_value
));
CHECK
(
result
==
0
||
result
==
1
)
<<
"Bool attribute "
<<
attr_name
<<
" must be 0 or 1, but found "
CHECK
(
result
==
0
||
result
==
1
)
<<
"Bool attribute "
<<
attr_name
<<
" must be 0 or 1, but found "
...
...
src/codegen/util.h
浏览文件 @
aaf77021
...
@@ -91,6 +91,11 @@ constexpr auto kEnableRemoveBroadcastCopy = "enable_remove_broadcast_copy";
...
@@ -91,6 +91,11 @@ constexpr auto kEnableRemoveBroadcastCopy = "enable_remove_broadcast_copy";
constexpr
auto
kEnableSubstituteDivVar
=
"enable_divide_var"
;
constexpr
auto
kEnableSubstituteDivVar
=
"enable_divide_var"
;
constexpr
auto
kEnableComputeInPlace
=
"enable_compute_in_place"
;
constexpr
auto
kEnableComputeInPlace
=
"enable_compute_in_place"
;
constexpr
auto
kEnableRewriteScalarCompute
=
"enable_rewrite_scalar_compute"
;
constexpr
auto
kEnableRewriteScalarCompute
=
"enable_rewrite_scalar_compute"
;
constexpr
auto
kMaxNumRetryPoly
=
"max_num_retry_poly"
;
constexpr
auto
kUBRatio
=
"ub_ratio"
;
constexpr
auto
kErrorInfo
=
""
;
constexpr
auto
kErrorScope
=
""
;
constexpr
auto
kAllocBits
=
"alloc_bits"
;
static
std
::
unordered_map
<
std
::
string
,
int
>
help_tiling_level
=
{
static
std
::
unordered_map
<
std
::
string
,
int
>
help_tiling_level
=
{
{
"None"
,
0
},
{
"None"
,
0
},
...
@@ -109,7 +114,7 @@ class AttrMap : public Map<std::string, NodeRef> {
...
@@ -109,7 +114,7 @@ class AttrMap : public Map<std::string, NodeRef> {
bool
GetBoolAttr
(
const
std
::
string
&
attr_name
,
bool
dft_value
);
bool
GetBoolAttr
(
const
std
::
string
&
attr_name
,
bool
dft_value
);
int
GetIntAttr
(
const
std
::
string
&
attr_name
,
int
dft_value
);
int
GetIntAttr
(
const
std
::
string
&
attr_name
,
int
dft_value
);
double
GetFloatAttr
(
const
std
::
string
&
attr_name
,
double
dft_value
);
bool
GetStringAttr
(
const
std
::
string
&
attr_name
,
std
::
string
*
attr_to_set
);
bool
GetStringAttr
(
const
std
::
string
&
attr_name
,
std
::
string
*
attr_to_set
);
std
::
string
GetStringAttr
(
const
std
::
string
&
attr_name
,
const
std
::
string
&
dft_value
);
std
::
string
GetStringAttr
(
const
std
::
string
&
attr_name
,
const
std
::
string
&
dft_value
);
};
};
...
...
src/include/build_module.h
浏览文件 @
aaf77021
...
@@ -18,11 +18,33 @@
...
@@ -18,11 +18,33 @@
#define INCLUDE_AKG_BUILD_MODULE_H_
#define INCLUDE_AKG_BUILD_MODULE_H_
#include <string>
#include <string>
#include <exception>
#include "codegen/util.h"
#include "codegen/util.h"
namespace
akg
{
namespace
akg
{
extern
AttrMap
global_attrs
;
extern
AttrMap
global_attrs
;
/*
* Custom exception used when memory allocation fails and triggers micro-tuning to try to recover from failure.
*/
class
MemoryAllocationException
:
public
std
::
exception
{
public:
MemoryAllocationException
(
const
std
::
string
&
scope
,
uint64_t
need_bits
,
uint64_t
alloc_bits
)
:
scope_
(
scope
),
need_bits_
(
need_bits
),
alloc_bits_
(
alloc_bits
){};
const
char
*
what
()
const
throw
()
{
std
::
runtime_error
re
((
"Allocation exceed bound of memory tag "
+
scope_
+
": need "
+
std
::
to_string
(
need_bits_
)
+
" bits, total alloc "
+
std
::
to_string
(
alloc_bits_
)
+
" bits."
)
.
c_str
());
return
re
.
what
();
}
std
::
string
scope_
{
""
};
uint64_t
need_bits_
{
0
};
uint64_t
alloc_bits_
{
0
};
};
NodeRef
Lower
(
Schedule
sch
,
const
Array
<
NodeRef
>
&
in_args
,
const
Array
<
NodeRef
>
&
shape_vars
,
const
std
::
string
&
name
,
NodeRef
Lower
(
Schedule
sch
,
const
Array
<
NodeRef
>
&
in_args
,
const
Array
<
NodeRef
>
&
shape_vars
,
const
std
::
string
&
name
,
const
Map
<
Tensor
,
Buffer
>
&
in_binds
,
const
Map
<
std
::
string
,
NodeRef
>
&
in_attrs
,
bool
simple_mode
,
const
Map
<
Tensor
,
Buffer
>
&
in_binds
,
const
Map
<
std
::
string
,
NodeRef
>
&
in_attrs
,
bool
simple_mode
,
bool
polyhedral
,
bool
tuning
,
bool
aicpu
,
const
BuildConfig
&
config
);
bool
polyhedral
,
bool
tuning
,
bool
aicpu
,
const
BuildConfig
&
config
);
...
...
src/pass/storage_rewrite_cce.cc
浏览文件 @
aaf77021
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include <regex>
#include <regex>
#include "ir_pass.h"
#include "ir_pass.h"
#include "build_module.h"
#include "pass/ir_util.h"
#include "pass/ir_util.h"
#include "emit_insn/insn_info.h"
#include "emit_insn/insn_info.h"
#include "pass/storage_rewrite_cce.h"
#include "pass/storage_rewrite_cce.h"
...
@@ -1146,8 +1147,7 @@ bool StoragePlanRewriterCCE::DoRewrite(const std::string scope, std::vector<std:
...
@@ -1146,8 +1147,7 @@ bool StoragePlanRewriterCCE::DoRewrite(const std::string scope, std::vector<std:
}
}
if
(
spec_level
<=
0
||
child_idx
<
0
)
{
if
(
spec_level
<=
0
||
child_idx
<
0
)
{
if
(
!
is_dynamic_
)
{
if
(
!
is_dynamic_
)
{
LOG
(
FATAL
)
<<
"Allocation exceed bound of memory tag "
<<
scope
<<
": need "
<<
need_nbits
throw
MemoryAllocationException
(
scope
,
need_nbits
,
total_alloc_bits
);
<<
" bits, total alloc "
<<
total_alloc_bits
<<
" bits"
;
}
else
{
}
else
{
LOG
(
WARNING
)
<<
"Dynamic shape static allocation exceed bound of memory tag "
<<
scope
<<
": need "
LOG
(
WARNING
)
<<
"Dynamic shape static allocation exceed bound of memory tag "
<<
scope
<<
": need "
<<
need_nbits
<<
" bits, will use dynamic allocation instead"
;
<<
need_nbits
<<
" bits, will use dynamic allocation instead"
;
...
...
src/poly/tiling_solver.cc
浏览文件 @
aaf77021
...
@@ -16,11 +16,63 @@
...
@@ -16,11 +16,63 @@
*/
*/
#include "poly/tiling_solver.h"
#include "poly/tiling_solver.h"
#include "build_module.h"
namespace
akg
{
namespace
akg
{
namespace
ir
{
namespace
ir
{
namespace
poly
{
namespace
poly
{
/*
* This function parse StorageFlatten error info into a ratio that guides the auto tiling to reduce
* memory allocation.
* e.g.
* error info : Check failed: const_size * op->type.bits() <= info->max_num_bits (5242880 vs. 2097152) :
* Allocation exceed bound of memory tag local.UB.
* ratio : memory_size / alloc_size = (2097152 / 5242880) = 0.4, which means the total allocation
* size used in auto tiling shoulde reduce 0.4 times.
*/
double
TilingSolver
::
GetNewAllocRatioWhenFlattenFail
(
const
std
::
string
&
error_info
)
{
std
::
vector
<
std
::
string
>
sub_strs
;
sub_strs
=
akg
::
common
::
Split
(
error_info
,
"("
);
CHECK_GE
(
sub_strs
.
size
(),
2U
);
std
::
string
tmp_str
=
sub_strs
[
2
];
sub_strs
=
akg
::
common
::
Split
(
tmp_str
,
" "
);
CHECK
(
!
sub_strs
.
empty
());
auto
alloc_bits
=
static_cast
<
double
>
(
std
::
strtod
(
sub_strs
[
0
].
c_str
(),
nullptr
));
sub_strs
=
akg
::
common
::
Split
(
error_info
,
")"
);
CHECK_GE
(
sub_strs
.
size
(),
1U
);
tmp_str
=
sub_strs
[
1
];
sub_strs
=
akg
::
common
::
Split
(
tmp_str
,
" "
);
CHECK
(
!
sub_strs
.
empty
());
auto
memory_bits
=
static_cast
<
double
>
(
std
::
strtod
(
sub_strs
.
back
().
c_str
(),
nullptr
));
CHECK_NE
(
alloc_bits
,
0
);
return
memory_bits
/
alloc_bits
;
}
/*
* This function returns an adjust ratio that further reduces the memory allocation limit apart from
* the default percentage reserved for auto double buffer and try to generate smaller tile sizes that
* helps to recover from memory allocation failure such as the one in storage rewrite cce pass.
*/
double
TilingSolver
::
GetNewAllocRatioWhenRewriteFail
(
int64_t
memory_bits
)
{
auto
actual_allocs
=
global_attrs
.
GetFloatAttr
(
kAllocBits
,
0.0
);
auto
last_adjust_ratio
=
global_attrs
.
GetFloatAttr
(
kUBRatio
,
1.0
);
auto
adjust_ratio
=
1.0
;
if
(
actual_allocs
!=
0
)
{
std
::
stringstream
ss
;
auto
expect_allocs
=
memory_bits
*
last_adjust_ratio
;
adjust_ratio
=
(
expect_allocs
/
actual_allocs
);
ss
<<
"Adjust memory allocation ratio to "
<<
adjust_ratio
<<
" times and retry tiling."
;
global_attrs
.
Set
(
kUBRatio
,
ktvm
::
make_const
(
Float
(
32
),
adjust_ratio
));
analyzer_
.
logger_
.
AppendLog
(
MICRO_TUNING
,
ss
);
}
return
adjust_ratio
;
}
void
TilingSolver
::
CollectMemoryLimit
()
{
void
TilingSolver
::
CollectMemoryLimit
()
{
// Init memory allocation percentage.
percentage_
=
ALLOCATION_PERCENTAGE
;
percentage_
=
ALLOCATION_PERCENTAGE
;
for
(
auto
attr
:
analyzer_
.
RootAxis
()
->
attrs
)
{
for
(
auto
attr
:
analyzer_
.
RootAxis
()
->
attrs
)
{
if
(
attr
.
attr_key
!=
"MEM_RATIO"
)
continue
;
if
(
attr
.
attr_key
!=
"MEM_RATIO"
)
continue
;
...
@@ -29,9 +81,27 @@ void TilingSolver::CollectMemoryLimit() {
...
@@ -29,9 +81,27 @@ void TilingSolver::CollectMemoryLimit() {
break
;
break
;
}
}
// Handle previous error info if storage flatten fails and adjust allocation percentage.
auto
error_info
=
global_attrs
.
GetStringAttr
(
kErrorInfo
,
""
);
if
(
!
error_info
.
empty
()
&&
error_info
.
find
(
"storage_flatten"
)
!=
std
::
string
::
npos
)
{
std
::
stringstream
ss
;
ss
<<
"Get Error Info! -> "
<<
global_attrs
.
GetStringAttr
(
kErrorInfo
,
""
);
percentage_
=
percentage_
*
GetNewAllocRatioWhenFlattenFail
(
error_info
);
ss
<<
"Adjust memory allocation to "
<<
percentage_
<<
" of memory size and retry tiling."
;
global_attrs
.
Set
(
kErrorInfo
,
StringImm
::
make
(
""
));
analyzer_
.
logger_
.
AppendLog
(
MICRO_TUNING
,
ss
);
}
// Init memory limit for each scope and reduce ratio of local.UB if storage rewrite fails previously.
DavinciInfo
&
d_info
=
DavinciInfo
::
GetInstance
();
DavinciInfo
&
d_info
=
DavinciInfo
::
GetInstance
();
auto
error_scope
=
global_attrs
.
GetStringAttr
(
kErrorScope
,
""
);
for
(
auto
i
=
0
;
i
<
MEM_SCOPE_BULK
;
++
i
)
{
for
(
auto
i
=
0
;
i
<
MEM_SCOPE_BULK
;
++
i
)
{
this
->
mem_limit_
[
i
]
=
d_info
.
GetMemoryLimitInScope
(
i
)
*
percentage_
;
this
->
mem_limit_
[
i
]
=
d_info
.
GetMemoryLimitInScope
(
i
)
*
percentage_
;
if
(
i
==
DavinciMemScope
::
MEM_SCOPE_UB
&&
error_scope
==
"local.UB"
)
{
this
->
mem_limit_
[
i
]
=
std
::
max
(
static_cast
<
int
>
(
this
->
mem_limit_
[
i
]
*
GetNewAllocRatioWhenRewriteFail
(
this
->
mem_limit_
[
i
])),
1
);
global_attrs
.
Set
(
kErrorScope
,
StringImm
::
make
(
""
));
}
}
}
}
}
...
...
src/poly/tiling_solver.h
浏览文件 @
aaf77021
...
@@ -30,6 +30,8 @@ class TilingSolver {
...
@@ -30,6 +30,8 @@ class TilingSolver {
~
TilingSolver
()
{}
~
TilingSolver
()
{}
void
CollectMemoryLimit
();
void
CollectMemoryLimit
();
void
CollectTileAxisTopDown
();
void
CollectTileAxisTopDown
();
double
GetNewAllocRatioWhenFlattenFail
(
const
std
::
string
&
error_info
);
double
GetNewAllocRatioWhenRewriteFail
(
int64_t
memory_bits
);
TileCandidate
*
Solve
();
TileCandidate
*
Solve
();
TilingAnalyzer
&
analyzer_
;
TilingAnalyzer
&
analyzer_
;
...
...
src/poly/tiling_utils.cc
浏览文件 @
aaf77021
...
@@ -29,6 +29,8 @@ void TileLogger::AppendLine(LogStage stage, const std::string &line) {
...
@@ -29,6 +29,8 @@ void TileLogger::AppendLine(LogStage stage, const std::string &line) {
analyze_tiling_space_stage_
.
emplace_back
(
line
);
analyze_tiling_space_stage_
.
emplace_back
(
line
);
}
else
if
(
stage
==
DO_TILING
)
{
}
else
if
(
stage
==
DO_TILING
)
{
do_tiling_stage_
.
emplace_back
(
line
);
do_tiling_stage_
.
emplace_back
(
line
);
}
else
if
(
stage
==
MICRO_TUNING
)
{
micro_tuning_strage_
.
emplace_back
(
line
);
}
else
{
}
else
{
do_tuning_stage_
.
emplace_back
(
line
);
do_tuning_stage_
.
emplace_back
(
line
);
}
}
...
@@ -70,6 +72,11 @@ bool TileLogger::DumpLogFile() {
...
@@ -70,6 +72,11 @@ bool TileLogger::DumpLogFile() {
of
<<
line
<<
std
::
endl
;
of
<<
line
<<
std
::
endl
;
}
}
of
<<
"========================="
<<
std
::
endl
;
of
<<
"========================="
<<
std
::
endl
;
of
<<
">>>>>>>>>> Micro tuning stage <<<<<<<<<<<<"
<<
std
::
endl
;
for
(
const
auto
&
line
:
micro_tuning_strage_
)
{
of
<<
line
<<
std
::
endl
;
}
of
<<
"========================="
<<
std
::
endl
;
of
.
close
();
of
.
close
();
return
true
;
return
true
;
}
}
...
...
src/poly/tiling_utils.h
浏览文件 @
aaf77021
...
@@ -32,7 +32,7 @@ enum DavinciMemScope {
...
@@ -32,7 +32,7 @@ enum DavinciMemScope {
MEM_SCOPE_L0C
,
MEM_SCOPE_L0C
,
MEM_SCOPE_BULK
,
MEM_SCOPE_BULK
,
};
};
enum
LogStage
{
ANA_SCHETREE
,
ANA_BUF_LIVE_EXTENT
,
ANA_TILING_SPACE
,
DO_TILING
,
DO_TUNING
};
enum
LogStage
{
ANA_SCHETREE
,
ANA_BUF_LIVE_EXTENT
,
ANA_TILING_SPACE
,
DO_TILING
,
DO_TUNING
,
MICRO_TUNING
};
class
DavinciInfo
{
class
DavinciInfo
{
public:
public:
...
@@ -89,6 +89,7 @@ class TileLogger {
...
@@ -89,6 +89,7 @@ class TileLogger {
LogFile
analyze_tiling_space_stage_
;
LogFile
analyze_tiling_space_stage_
;
LogFile
do_tiling_stage_
;
LogFile
do_tiling_stage_
;
LogFile
do_tuning_stage_
;
LogFile
do_tuning_stage_
;
LogFile
micro_tuning_strage_
;
};
};
}
// namespace poly
}
// namespace poly
}
// namespace ir
}
// namespace ir
...
...
tests/unittest/pass/test_micro_tuning.py
0 → 100644
浏览文件 @
aaf77021
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""unittest for micro-tuning"""
from
akg.utils
import
kernel_exec
from
akg.ops.array
import
four2five
def
test_four2five_without_custom_tiling
(
build_shape
,
dtype
,
op_attrs
):
"""This test case will fail without cunstom tiling and micro-tuning will automatically adjust tile sizes."""
build_attr
=
op_attrs
+
[
False
]
return
kernel_exec
.
op_build_test
(
four2five
.
four2five
,
[
build_shape
],
[
dtype
],
build_attr
,
kernel_name
=
"four2five"
,
attrs
=
{},
tuning
=
False
)
if
__name__
==
"__main__"
:
test_four2five_without_custom_tiling
(
[
32
,
1001
,
1
,
1
],
"float16"
,
[
'NCHW'
,
'float16'
])
tests/unittest/unittest.sh
浏览文件 @
aaf77021
...
@@ -22,6 +22,7 @@ casefiles=(
...
@@ -22,6 +22,7 @@ casefiles=(
"pass/test_promote_if.py"
"pass/test_promote_if.py"
"pass/test_sink_if.py"
"pass/test_sink_if.py"
"pass/test_ir_parser.py"
"pass/test_ir_parser.py"
"pass/test_micro_tuning.py"
"pass/test_elim_vector_mask.py"
"pass/test_elim_vector_mask.py"
"pass/test_copy_propagation.py"
"pass/test_copy_propagation.py"
"pass/test_utils_detect_non_linear_index.py"
"pass/test_utils_detect_non_linear_index.py"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录