Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
akg
提交
d920ab32
A
akg
项目概览
MindSpore
/
akg
通知
58
Star
7
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
akg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d920ab32
编写于
7月 08, 2020
作者:
D
dabaiji
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add core number and core granularity balance model
上级
225139a0
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
37 addition
and
6 deletion
+37
-6
src/poly/tiling_analyzer.cc
src/poly/tiling_analyzer.cc
+1
-1
src/poly/tiling_analyzer.h
src/poly/tiling_analyzer.h
+3
-1
src/poly/tiling_strategy_manager.cc
src/poly/tiling_strategy_manager.cc
+32
-4
src/poly/tiling_strategy_manager.h
src/poly/tiling_strategy_manager.h
+1
-0
未找到文件。
src/poly/tiling_analyzer.cc
浏览文件 @
d920ab32
...
...
@@ -717,7 +717,7 @@ int TileCandidate::GetMinFactorForMinDataGranularity(TileAxis *axis) {
}
granularity
*=
l1_val
;
}
return
std
::
max
(
static_cast
<
int
>
(
MIN_
MULTICORE_BYTES
/
granularity
),
1
);
return
std
::
max
(
static_cast
<
int
>
(
MIN_
CORE_GRANULARITY
/
granularity
),
1
);
}
/*
...
...
src/poly/tiling_analyzer.h
浏览文件 @
d920ab32
...
...
@@ -52,7 +52,9 @@ constexpr auto DUMP_LEVEL_TUNING = 3;
constexpr
auto
DUMP_LINE_BREAK_NUM
=
100
;
constexpr
auto
GEN_PRIME_NUM
=
32
;
constexpr
auto
VECTORIZE_BYTE
=
256
;
constexpr
auto
MIN_MULTICORE_BYTES
=
256
;
constexpr
auto
MAX_REPEAT
=
255
;
constexpr
auto
MIN_CORE_GRANULARITY
=
256
;
constexpr
auto
DESIRE_CORE_GRANULARITY
=
8192
;
// 256 Bytes * 64 repeat
// Controlled by custom tiling.
constexpr
auto
ALLOCATION_PERCENTAGE
=
0.5
;
// reserved for double buffer in default
...
...
src/poly/tiling_strategy_manager.cc
浏览文件 @
d920ab32
...
...
@@ -426,19 +426,47 @@ void GemmStrategy::AddConstraint() {
}
}
std
::
pair
<
int
,
int
>
MulticoreStrategy
::
GetProposalRangeForFullMulticore
(
TileAxis
*
multicore_axis
)
{
// Adjust max core for element-wise and inner-most reduction operations to balance core number and granularity.
int
MulticoreStrategy
::
GetProposalCoreNum
()
{
int
max_core
=
cand_
.
GetCoreNumConf
();
int
problem_size
=
1
;
for
(
auto
axis
:
this
->
cand_
.
GetTileAxis
())
{
if
(
axis
->
range_extent
.
as
<
IntImm
>
()
==
nullptr
)
{
return
0
;
}
if
((
axis
->
HasAttr
(
"TRANSFORM"
))
||
(
axis
->
HasAttr
(
"TRANSPOSE"
))
||
(
axis
->
HasAttr
(
"REDUCE_AXIS"
)
&&
!
axis
->
HasAttr
(
"REDUCE_SRC_LAST"
)))
{
return
max_core
;
}
problem_size
*=
axis
->
range_extent
.
as
<
IntImm
>
()
->
value
;
}
if
(
problem_size
<
max_core
*
MIN_CORE_GRANULARITY
*
MAX_REPEAT
)
{
max_core
=
static_cast
<
int
>
(
problem_size
/
DESIRE_CORE_GRANULARITY
);
if
(
max_core
>
2
&&
max_core
%
2
!=
0
)
{
max_core
--
;
}
}
return
max_core
;
}
std
::
pair
<
int
,
int
>
MulticoreStrategy
::
GetProposalRangeForFullMulticore
(
TileAxis
*
multicore_axis
)
{
int
max_core
=
GetProposalCoreNum
();
int
used_core
=
1
;
std
::
pair
<
int
,
int
>
proposal_range
=
std
::
make_pair
(
cand_
.
GetMinFactorForMinDataGranularity
(
multicore_axis
),
-
1
);
auto
this_level_core
=
std
::
max
(
static_cast
<
int
>
(
max_core
/
used_core
),
1
);
std
::
stringstream
ss
;
if
(
multicore_axis
->
range_extent
.
as
<
IntImm
>
()
==
nullptr
)
return
proposal_range
;
if
(
multicore_axis
->
range_extent
.
as
<
IntImm
>
()
==
nullptr
||
this_level_core
<=
1
)
{
return
proposal_range
;
}
auto
shape
=
multicore_axis
->
range_extent
.
as
<
IntImm
>
()
->
value
;
bool
is_last_level
=
false
;
for
(
auto
other_axis
:
this
->
cand_
.
GetTileAxis
())
{
if
(
other_axis
==
multicore_axis
)
break
;
if
(
other_axis
->
index
!=
multicore_axis
->
index
||
other_axis
->
HasAttr
(
"REDUCE_AXIS"
))
continue
;
if
(
other_axis
->
range_extent
.
as
<
IntImm
>
()
==
nullptr
)
return
proposal_range
;
int64_t
l1_val
=
TileVarId
::
UNDEFINE
;
std
::
tie
(
l1_val
,
std
::
ignore
)
=
cand_
.
GetConstTileVal
(
other_axis
);
if
(
l1_val
==
TileVarId
::
VAR
)
return
proposal_range
;
...
...
@@ -529,7 +557,7 @@ int64_t MulticoreStrategy::AdjustTilingAccordingToMulticoreConstraint(TileAxis *
bool
efficient
=
(
shape
%
tiling_factor
==
0
)
>=
(
shape
%
origin_factor
==
0
);
auto
multicore_shrink_limit
=
2
;
auto
reduced_mem
=
std
::
max
(
origin_factor
-
tiling_factor
,
min_factor_for_enough_data
-
tiling_factor
);
if
((
static_cast
<
int
>
(
origin_factor
/
tiling_factor
)
>
=
multicore_shrink_limit
)
&&
reduced_mem
>
pending_blocks
)
{
if
((
static_cast
<
int
>
(
origin_factor
/
tiling_factor
)
>
multicore_shrink_limit
)
&&
reduced_mem
>
pending_blocks
)
{
ss
<<
"If axis adjust to "
<<
tiling_factor
<<
", "
<<
reduced_mem
<<
" memory is reduced;"
<<
" while maximal pending blocks is only "
<<
pending_blocks
<<
", adjust may not be efficient."
;
logger_
.
AppendLog
(
DO_TILING
,
ss
);
...
...
src/poly/tiling_strategy_manager.h
浏览文件 @
d920ab32
...
...
@@ -199,6 +199,7 @@ class MulticoreStrategy {
TileCandidate
&
cand_
;
TileLogger
&
logger_
;
std
::
pair
<
int
,
int
>
GetProposalRangeForFullMulticore
(
TileAxis
*
axis
);
int
GetProposalCoreNum
();
};
class
TilingPriorityScorer
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录