Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
akg
提交
aaf77021
A
akg
项目概览
MindSpore
/
akg
通知
58
Star
7
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
akg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aaf77021
编写于
7月 14, 2020
作者:
D
dabaiji
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support dynamical memory allocation ratio adjustment in micro-tuning for allocation exceed problem
上级
d9da1e31
变更
12
展开全部
隐藏空白更改
内联
并排
Showing
12 changed file
with
383 addition
and
212 deletion
+383
-212
python/akg/ops/array/four2five.py
python/akg/ops/array/four2five.py
+5
-4
src/codegen/build_module.cc
src/codegen/build_module.cc
+230
-202
src/codegen/util.cc
src/codegen/util.cc
+7
-1
src/codegen/util.h
src/codegen/util.h
+6
-1
src/include/build_module.h
src/include/build_module.h
+22
-0
src/pass/storage_rewrite_cce.cc
src/pass/storage_rewrite_cce.cc
+2
-2
src/poly/tiling_solver.cc
src/poly/tiling_solver.cc
+71
-1
src/poly/tiling_solver.h
src/poly/tiling_solver.h
+2
-0
src/poly/tiling_utils.cc
src/poly/tiling_utils.cc
+7
-0
src/poly/tiling_utils.h
src/poly/tiling_utils.h
+2
-1
tests/unittest/pass/test_micro_tuning.py
tests/unittest/pass/test_micro_tuning.py
+28
-0
tests/unittest/unittest.sh
tests/unittest/unittest.sh
+1
-0
未找到文件。
python/akg/ops/array/four2five.py
浏览文件 @
aaf77021
...
...
@@ -116,8 +116,8 @@ def four2five_tiling_strategy_dynamic(tensor, input_format):
strategy
.
append
(
ct_util
.
create_constraint_on_tensor
(
tensor
,
16
,
ct_util
.
TileConstraint
.
FACTOR
,
4
)[
0
])
return
strategy
@
vc_util
.
check_input_type
(
akg
.
tvm
.
tensor
.
Tensor
,
str
,
str
)
def
four2five
(
data
,
format_
,
dst_dtype
=
'float16'
):
@
vc_util
.
check_input_type
(
akg
.
tvm
.
tensor
.
Tensor
,
str
,
str
,
bool
)
def
four2five
(
data
,
format_
,
dst_dtype
=
'float16'
,
need_custom_tiling
=
True
):
"""
Convert 4-dims "data" to 5-dims,the format of "data" is defined in "format_"
...
...
@@ -294,8 +294,9 @@ def four2five(data, format_, dst_dtype='float16'):
dim_info
,
_
=
four2five_set_dim_func
(
data
,
format_
,
dst_dtype
)
if
dim_info
!=
""
:
attrs
[
"dim"
]
=
dim_info
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy
(
output
,
format_
,
expansion
)
else
:
if
need_custom_tiling
:
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy
(
output
,
format_
,
expansion
)
elif
need_custom_tiling
:
attrs
[
"custom_tiling"
]
=
four2five_tiling_strategy_dynamic
(
output
,
format_
)
if
is_dynamic
:
...
...
src/codegen/build_module.cc
浏览文件 @
aaf77021
此差异已折叠。
点击以展开。
src/codegen/util.cc
浏览文件 @
aaf77021
...
...
@@ -98,7 +98,13 @@ int AttrMap::GetIntAttr(const std::string &attr_name, int dft_value) {
const
NodeRef
&
e
=
this
->
at
(
attr_name
);
return
ir
::
GetInt32Const
(
Downcast
<
Expr
>
(
e
));
}
double
AttrMap
::
GetFloatAttr
(
const
std
::
string
&
attr_name
,
double
dft_value
)
{
if
(
this
->
count
(
attr_name
)
==
0
)
{
return
dft_value
;
}
const
NodeRef
&
e
=
this
->
at
(
attr_name
);
return
ir
::
GetFloatConst
(
Downcast
<
Expr
>
(
e
));
}
bool
AttrMap
::
GetBoolAttr
(
const
std
::
string
&
attr_name
,
bool
dft_value
)
{
int
result
=
GetIntAttr
(
attr_name
,
static_cast
<
int
>
(
dft_value
));
CHECK
(
result
==
0
||
result
==
1
)
<<
"Bool attribute "
<<
attr_name
<<
" must be 0 or 1, but found "
...
...
src/codegen/util.h
浏览文件 @
aaf77021
...
...
@@ -91,6 +91,11 @@ constexpr auto kEnableRemoveBroadcastCopy = "enable_remove_broadcast_copy";
constexpr
auto
kEnableSubstituteDivVar
=
"enable_divide_var"
;
constexpr
auto
kEnableComputeInPlace
=
"enable_compute_in_place"
;
constexpr
auto
kEnableRewriteScalarCompute
=
"enable_rewrite_scalar_compute"
;
constexpr
auto
kMaxNumRetryPoly
=
"max_num_retry_poly"
;
constexpr
auto
kUBRatio
=
"ub_ratio"
;
constexpr
auto
kErrorInfo
=
""
;
constexpr
auto
kErrorScope
=
""
;
constexpr
auto
kAllocBits
=
"alloc_bits"
;
static
std
::
unordered_map
<
std
::
string
,
int
>
help_tiling_level
=
{
{
"None"
,
0
},
...
...
@@ -109,7 +114,7 @@ class AttrMap : public Map<std::string, NodeRef> {
bool
GetBoolAttr
(
const
std
::
string
&
attr_name
,
bool
dft_value
);
int
GetIntAttr
(
const
std
::
string
&
attr_name
,
int
dft_value
);
double
GetFloatAttr
(
const
std
::
string
&
attr_name
,
double
dft_value
);
bool
GetStringAttr
(
const
std
::
string
&
attr_name
,
std
::
string
*
attr_to_set
);
std
::
string
GetStringAttr
(
const
std
::
string
&
attr_name
,
const
std
::
string
&
dft_value
);
};
...
...
src/include/build_module.h
浏览文件 @
aaf77021
...
...
@@ -18,11 +18,33 @@
#define INCLUDE_AKG_BUILD_MODULE_H_
#include <string>
#include <exception>
#include "codegen/util.h"
namespace
akg
{
extern
AttrMap
global_attrs
;
/*
* Custom exception used when memory allocation fails and triggers micro-tuning to try to recover from failure.
*/
class
MemoryAllocationException
:
public
std
::
exception
{
public:
MemoryAllocationException
(
const
std
::
string
&
scope
,
uint64_t
need_bits
,
uint64_t
alloc_bits
)
:
scope_
(
scope
),
need_bits_
(
need_bits
),
alloc_bits_
(
alloc_bits
){};
const
char
*
what
()
const
throw
()
{
std
::
runtime_error
re
((
"Allocation exceed bound of memory tag "
+
scope_
+
": need "
+
std
::
to_string
(
need_bits_
)
+
" bits, total alloc "
+
std
::
to_string
(
alloc_bits_
)
+
" bits."
)
.
c_str
());
return
re
.
what
();
}
std
::
string
scope_
{
""
};
uint64_t
need_bits_
{
0
};
uint64_t
alloc_bits_
{
0
};
};
NodeRef
Lower
(
Schedule
sch
,
const
Array
<
NodeRef
>
&
in_args
,
const
Array
<
NodeRef
>
&
shape_vars
,
const
std
::
string
&
name
,
const
Map
<
Tensor
,
Buffer
>
&
in_binds
,
const
Map
<
std
::
string
,
NodeRef
>
&
in_attrs
,
bool
simple_mode
,
bool
polyhedral
,
bool
tuning
,
bool
aicpu
,
const
BuildConfig
&
config
);
...
...
src/pass/storage_rewrite_cce.cc
浏览文件 @
aaf77021
...
...
@@ -26,6 +26,7 @@
#include <regex>
#include "ir_pass.h"
#include "build_module.h"
#include "pass/ir_util.h"
#include "emit_insn/insn_info.h"
#include "pass/storage_rewrite_cce.h"
...
...
@@ -1146,8 +1147,7 @@ bool StoragePlanRewriterCCE::DoRewrite(const std::string scope, std::vector<std:
}
if
(
spec_level
<=
0
||
child_idx
<
0
)
{
if
(
!
is_dynamic_
)
{
LOG
(
FATAL
)
<<
"Allocation exceed bound of memory tag "
<<
scope
<<
": need "
<<
need_nbits
<<
" bits, total alloc "
<<
total_alloc_bits
<<
" bits"
;
throw
MemoryAllocationException
(
scope
,
need_nbits
,
total_alloc_bits
);
}
else
{
LOG
(
WARNING
)
<<
"Dynamic shape static allocation exceed bound of memory tag "
<<
scope
<<
": need "
<<
need_nbits
<<
" bits, will use dynamic allocation instead"
;
...
...
src/poly/tiling_solver.cc
浏览文件 @
aaf77021
...
...
@@ -16,11 +16,63 @@
*/
#include "poly/tiling_solver.h"
#include "build_module.h"
namespace
akg
{
namespace
ir
{
namespace
poly
{
/*
* This function parse StorageFlatten error info into a ratio that guides the auto tiling to reduce
* memory allocation.
* e.g.
* error info : Check failed: const_size * op->type.bits() <= info->max_num_bits (5242880 vs. 2097152) :
* Allocation exceed bound of memory tag local.UB.
* ratio : memory_size / alloc_size = (2097152 / 5242880) = 0.4, which means the total allocation
* size used in auto tiling shoulde reduce 0.4 times.
*/
double
TilingSolver
::
GetNewAllocRatioWhenFlattenFail
(
const
std
::
string
&
error_info
)
{
std
::
vector
<
std
::
string
>
sub_strs
;
sub_strs
=
akg
::
common
::
Split
(
error_info
,
"("
);
CHECK_GE
(
sub_strs
.
size
(),
2U
);
std
::
string
tmp_str
=
sub_strs
[
2
];
sub_strs
=
akg
::
common
::
Split
(
tmp_str
,
" "
);
CHECK
(
!
sub_strs
.
empty
());
auto
alloc_bits
=
static_cast
<
double
>
(
std
::
strtod
(
sub_strs
[
0
].
c_str
(),
nullptr
));
sub_strs
=
akg
::
common
::
Split
(
error_info
,
")"
);
CHECK_GE
(
sub_strs
.
size
(),
1U
);
tmp_str
=
sub_strs
[
1
];
sub_strs
=
akg
::
common
::
Split
(
tmp_str
,
" "
);
CHECK
(
!
sub_strs
.
empty
());
auto
memory_bits
=
static_cast
<
double
>
(
std
::
strtod
(
sub_strs
.
back
().
c_str
(),
nullptr
));
CHECK_NE
(
alloc_bits
,
0
);
return
memory_bits
/
alloc_bits
;
}
/*
* This function returns an adjust ratio that further reduces the memory allocation limit apart from
* the default percentage reserved for auto double buffer and try to generate smaller tile sizes that
* helps to recover from memory allocation failure such as the one in storage rewrite cce pass.
*/
double
TilingSolver
::
GetNewAllocRatioWhenRewriteFail
(
int64_t
memory_bits
)
{
auto
actual_allocs
=
global_attrs
.
GetFloatAttr
(
kAllocBits
,
0.0
);
auto
last_adjust_ratio
=
global_attrs
.
GetFloatAttr
(
kUBRatio
,
1.0
);
auto
adjust_ratio
=
1.0
;
if
(
actual_allocs
!=
0
)
{
std
::
stringstream
ss
;
auto
expect_allocs
=
memory_bits
*
last_adjust_ratio
;
adjust_ratio
=
(
expect_allocs
/
actual_allocs
);
ss
<<
"Adjust memory allocation ratio to "
<<
adjust_ratio
<<
" times and retry tiling."
;
global_attrs
.
Set
(
kUBRatio
,
ktvm
::
make_const
(
Float
(
32
),
adjust_ratio
));
analyzer_
.
logger_
.
AppendLog
(
MICRO_TUNING
,
ss
);
}
return
adjust_ratio
;
}
void
TilingSolver
::
CollectMemoryLimit
()
{
// Init memory allocation percentage.
percentage_
=
ALLOCATION_PERCENTAGE
;
for
(
auto
attr
:
analyzer_
.
RootAxis
()
->
attrs
)
{
if
(
attr
.
attr_key
!=
"MEM_RATIO"
)
continue
;
...
...
@@ -29,9 +81,27 @@ void TilingSolver::CollectMemoryLimit() {
break
;
}
// Handle previous error info if storage flatten fails and adjust allocation percentage.
auto
error_info
=
global_attrs
.
GetStringAttr
(
kErrorInfo
,
""
);
if
(
!
error_info
.
empty
()
&&
error_info
.
find
(
"storage_flatten"
)
!=
std
::
string
::
npos
)
{
std
::
stringstream
ss
;
ss
<<
"Get Error Info! -> "
<<
global_attrs
.
GetStringAttr
(
kErrorInfo
,
""
);
percentage_
=
percentage_
*
GetNewAllocRatioWhenFlattenFail
(
error_info
);
ss
<<
"Adjust memory allocation to "
<<
percentage_
<<
" of memory size and retry tiling."
;
global_attrs
.
Set
(
kErrorInfo
,
StringImm
::
make
(
""
));
analyzer_
.
logger_
.
AppendLog
(
MICRO_TUNING
,
ss
);
}
// Init memory limit for each scope and reduce ratio of local.UB if storage rewrite fails previously.
DavinciInfo
&
d_info
=
DavinciInfo
::
GetInstance
();
auto
error_scope
=
global_attrs
.
GetStringAttr
(
kErrorScope
,
""
);
for
(
auto
i
=
0
;
i
<
MEM_SCOPE_BULK
;
++
i
)
{
this
->
mem_limit_
[
i
]
=
d_info
.
GetMemoryLimitInScope
(
i
)
*
percentage_
;
if
(
i
==
DavinciMemScope
::
MEM_SCOPE_UB
&&
error_scope
==
"local.UB"
)
{
this
->
mem_limit_
[
i
]
=
std
::
max
(
static_cast
<
int
>
(
this
->
mem_limit_
[
i
]
*
GetNewAllocRatioWhenRewriteFail
(
this
->
mem_limit_
[
i
])),
1
);
global_attrs
.
Set
(
kErrorScope
,
StringImm
::
make
(
""
));
}
}
}
...
...
src/poly/tiling_solver.h
浏览文件 @
aaf77021
...
...
@@ -30,6 +30,8 @@ class TilingSolver {
~
TilingSolver
()
{}
void
CollectMemoryLimit
();
void
CollectTileAxisTopDown
();
double
GetNewAllocRatioWhenFlattenFail
(
const
std
::
string
&
error_info
);
double
GetNewAllocRatioWhenRewriteFail
(
int64_t
memory_bits
);
TileCandidate
*
Solve
();
TilingAnalyzer
&
analyzer_
;
...
...
src/poly/tiling_utils.cc
浏览文件 @
aaf77021
...
...
@@ -29,6 +29,8 @@ void TileLogger::AppendLine(LogStage stage, const std::string &line) {
analyze_tiling_space_stage_
.
emplace_back
(
line
);
}
else
if
(
stage
==
DO_TILING
)
{
do_tiling_stage_
.
emplace_back
(
line
);
}
else
if
(
stage
==
MICRO_TUNING
)
{
micro_tuning_strage_
.
emplace_back
(
line
);
}
else
{
do_tuning_stage_
.
emplace_back
(
line
);
}
...
...
@@ -70,6 +72,11 @@ bool TileLogger::DumpLogFile() {
of
<<
line
<<
std
::
endl
;
}
of
<<
"========================="
<<
std
::
endl
;
of
<<
">>>>>>>>>> Micro tuning stage <<<<<<<<<<<<"
<<
std
::
endl
;
for
(
const
auto
&
line
:
micro_tuning_strage_
)
{
of
<<
line
<<
std
::
endl
;
}
of
<<
"========================="
<<
std
::
endl
;
of
.
close
();
return
true
;
}
...
...
src/poly/tiling_utils.h
浏览文件 @
aaf77021
...
...
@@ -32,7 +32,7 @@ enum DavinciMemScope {
MEM_SCOPE_L0C
,
MEM_SCOPE_BULK
,
};
enum
LogStage
{
ANA_SCHETREE
,
ANA_BUF_LIVE_EXTENT
,
ANA_TILING_SPACE
,
DO_TILING
,
DO_TUNING
};
enum
LogStage
{
ANA_SCHETREE
,
ANA_BUF_LIVE_EXTENT
,
ANA_TILING_SPACE
,
DO_TILING
,
DO_TUNING
,
MICRO_TUNING
};
class
DavinciInfo
{
public:
...
...
@@ -89,6 +89,7 @@ class TileLogger {
LogFile
analyze_tiling_space_stage_
;
LogFile
do_tiling_stage_
;
LogFile
do_tuning_stage_
;
LogFile
micro_tuning_strage_
;
};
}
// namespace poly
}
// namespace ir
...
...
tests/unittest/pass/test_micro_tuning.py
0 → 100644
浏览文件 @
aaf77021
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""unittest for micro-tuning"""
from
akg.utils
import
kernel_exec
from
akg.ops.array
import
four2five
def
test_four2five_without_custom_tiling
(
build_shape
,
dtype
,
op_attrs
):
"""This test case will fail without cunstom tiling and micro-tuning will automatically adjust tile sizes."""
build_attr
=
op_attrs
+
[
False
]
return
kernel_exec
.
op_build_test
(
four2five
.
four2five
,
[
build_shape
],
[
dtype
],
build_attr
,
kernel_name
=
"four2five"
,
attrs
=
{},
tuning
=
False
)
if
__name__
==
"__main__"
:
test_four2five_without_custom_tiling
(
[
32
,
1001
,
1
,
1
],
"float16"
,
[
'NCHW'
,
'float16'
])
tests/unittest/unittest.sh
浏览文件 @
aaf77021
...
...
@@ -22,6 +22,7 @@ casefiles=(
"pass/test_promote_if.py"
"pass/test_sink_if.py"
"pass/test_ir_parser.py"
"pass/test_micro_tuning.py"
"pass/test_elim_vector_mask.py"
"pass/test_copy_propagation.py"
"pass/test_utils_detect_non_linear_index.py"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录