Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
akg
提交
44ef36bd
A
akg
项目概览
MindSpore
/
akg
通知
58
Star
7
Fork
7
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
akg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
44ef36bd
编写于
6月 29, 2020
作者:
D
dabaiji
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add tiling priority scoring model
上级
5a35fac5
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
216 addition
and
22 deletion
+216
-22
src/poly/tiling_analyzer.cc
src/poly/tiling_analyzer.cc
+22
-15
src/poly/tiling_analyzer.h
src/poly/tiling_analyzer.h
+1
-1
src/poly/tiling_solver.cc
src/poly/tiling_solver.cc
+4
-1
src/poly/tiling_strategy_manager.cc
src/poly/tiling_strategy_manager.cc
+99
-5
src/poly/tiling_strategy_manager.h
src/poly/tiling_strategy_manager.h
+90
-0
未找到文件。
src/poly/tiling_analyzer.cc
浏览文件 @
44ef36bd
...
...
@@ -1303,12 +1303,7 @@ int TilingAnalyzer::GetNumOfAxisInBand(int band_idx) const {
return
max
+
1
;
}
void
TilingAnalyzer
::
TileSpaceAnalyze
()
{
CHECK
(
scop_
);
SpaceAnalyzer
space_analyzer
(
this
);
space_analyzer
.
AnalyzeSpecialAxes
();
void
TilingAnalyzer
::
AddTilingConstraints
()
{
std
::
vector
<
TilingStrategy
*>
actived_strategies
;
PassDownAttrStrategy
pd_attr_strategy
(
this
);
...
...
@@ -1351,15 +1346,10 @@ void TilingAnalyzer::TileSpaceAnalyze() {
TilingStrategyManager
&
strategy_manager
=
TilingStrategyManager
::
GetInstance
();
strategy_manager
.
SetStrategies
(
actived_strategies
);
strategy_manager
.
Execute
();
logger_
.
AppendLine
(
ANA_TILING_SPACE
,
"After adding constraints =======>"
);
auto
PrintAttr
=
[
&
](
TileAxis
*
a
)
->
void
{
if
(
a
!=
nullptr
)
a
->
DumpAxis
();
};
ForEachAxisTopDown
(
PrintAttr
);
logger_
.
AppendLine
(
ANA_TILING_SPACE
,
"<============="
);
}
bool
TilingAnalyzer
::
Prepare
()
{
// Stage 1: Analyze schedule tree.
ScheduleTreeAnalyzer
sch_ana
(
this
,
this
->
sch_
);
root_axis_
=
sch_ana
.
Build
(
this
->
Halide
());
if
(
root_axis_
==
nullptr
)
{
...
...
@@ -1368,25 +1358,42 @@ bool TilingAnalyzer::Prepare() {
if
(
root_axis_
->
children
.
empty
())
{
return
false
;
}
auto
build_axis_m
ap
=
[
this
](
const
TileAxis
*
a
)
{
auto
BuildAxisM
ap
=
[
this
](
const
TileAxis
*
a
)
{
for
(
auto
loop
:
a
->
loops
)
{
CHECK
(
loop
)
<<
"Tile axis has null ptr loop, check"
;
this
->
tile_axis_
[
loop
]
=
const_cast
<
TileAxis
*>
(
a
);
}
};
this
->
ForEachAxisTopDown
(
build_axis_m
ap
);
this
->
ForEachAxisTopDown
(
BuildAxisM
ap
);
if
(
op_type_
!=
VECTOR_OP
)
{
sch_ana
.
AnalyzeCubeInfo
();
}
TileSpaceAnalyze
();
// Stage 2: Analyze Halide IR and add tiling constraints.
SpaceAnalyzer
space_analyzer
(
this
);
space_analyzer
.
AnalyzeSpecialAxes
();
AddTilingConstraints
();
// Stage 3: Analyze buffer footprint.
LinearAccessPatternBuilder
lap_bdr
(
this
);
lap_bdr
.
Build
(
body_
);
linear_seq_
=
std
::
move
(
lap_bdr
.
seq_
);
buf_info_
=
std
::
move
(
lap_bdr
.
buf_
);
buffer_usage_timetable_
=
std
::
move
(
lap_bdr
.
buffer_usage_timetable_
);
// Stage 4: Set tiling priority based on previous analysis.
TilingPriorityScorer
scroer
(
*
this
);
scroer
.
SetPriorityByScoring
();
// Logging
logger_
.
AppendLine
(
ANA_TILING_SPACE
,
"After adding constraints =======>"
);
auto
PrintAttr
=
[
&
](
TileAxis
*
a
)
->
void
{
if
(
a
!=
nullptr
)
a
->
DumpAxis
();
};
ForEachAxisTopDown
(
PrintAttr
);
logger_
.
AppendLine
(
ANA_TILING_SPACE
,
"<============="
);
DumpLinearSeq
();
return
true
;
}
...
...
src/poly/tiling_analyzer.h
浏览文件 @
44ef36bd
...
...
@@ -288,7 +288,7 @@ class TilingAnalyzer {
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
BufferEntry
>>
buf_info_
;
private:
void
TileSpaceAnalyze
();
void
AddTilingConstraints
();
std
::
unique_ptr
<
TileAxis
>
root_axis_
;
};
...
...
src/poly/tiling_solver.cc
浏览文件 @
44ef36bd
...
...
@@ -42,7 +42,6 @@ void TilingSolver::CollectTileAxisTopDown() {
}
this
->
cand_
.
InsertAxisBack
(
a
);
};
this
->
cand_
.
ResetTileAxis
();
this
->
analyzer_
.
ForEachAxisTopDown
(
CollectTileAxis
);
this
->
cand_
.
SortByPriority
();
...
...
@@ -97,9 +96,11 @@ TileCandidate *InequalitySolver::Solve() {
auto
tile_band_size
=
static_cast
<
int
>
(
analyzer_
.
RootAxis
()
->
children
.
size
());
for
(
auto
band
=
0
;
band
<
tile_band_size
;
++
band
)
{
tiling_band_
=
band
;
CollectTileAxisTopDown
();
InitTileAxis
(
LEVEL1
);
if
(
analyzer_
.
op_type_
!=
VECTOR_OP
)
{
InitTileAxis
(
LEVEL0
);
}
...
...
@@ -738,9 +739,11 @@ void DynamicShapeSolver::AppendTileConstraintInIR(TileCandidate *cand, TileLevel
TileCandidate
*
TraverseSolver
::
Solve
()
{
CollectMemoryLimit
();
auto
tile_band_size
=
static_cast
<
int
>
(
analyzer_
.
RootAxis
()
->
children
.
size
());
for
(
auto
band
=
0
;
band
<
tile_band_size
;
++
band
)
{
tiling_band_
=
band
;
CollectTileAxisTopDown
();
// tile all axis top down
...
...
src/poly/tiling_strategy_manager.cc
浏览文件 @
44ef36bd
...
...
@@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "poly/tiling_strategy_manager.h"
#include <numeric>
#include <iostream>
namespace
akg
{
...
...
@@ -222,13 +222,9 @@ void ReduceStrategy::AddConstraint() {
if
(
align_elem
==
block_size
)
{
axis
->
l1_constraints
.
tile_min_
=
align_elem
;
}
else
{
axis
->
priority
+=
1
;
axis
->
forbid_iso
=
true
;
}
}
for
(
auto
axis
:
analyzer_
->
GetAxesOfAttr
(
"REDUCE_SRC_LAST"
))
{
axis
->
priority
+=
1
;
}
}
void
VectorizedStrategy
::
AddConstraint
()
{
...
...
@@ -553,6 +549,104 @@ int64_t MulticoreStrategy::AdjustTilingAccordingToMulticoreConstraint(TileAxis *
return
(
valid
&&
efficient
)
?
tiling_factor
:
origin_factor
;
}
void
TilingPriorityScorer
::
SetPriorityByScoring
()
{
std
::
stringstream
ss
;
for
(
int
band_idx
=
0
;
band_idx
<
static_cast
<
int
>
(
analyzer_
.
RootAxis
()
->
children
.
size
());
++
band_idx
)
{
std
::
map
<
double
,
std
::
vector
<
TileAxis
*>>
priority_map
;
std
::
vector
<
TileAxis
*>
tile_axes
=
GetBandTileAxes
(
band_idx
);
auto
norm_range
=
static_cast
<
int
>
(
tile_axes
.
size
());
auto
dd_scores
=
MinMaxScaler
(
ComputeTileDependency
(
tile_axes
),
norm_range
);
auto
pl_scores
=
MinMaxScaler
(
ComputeParallelism
(
tile_axes
),
norm_range
);
auto
vec_scores
=
MinMaxScaler
(
ComputeVectorization
(
tile_axes
),
norm_range
);
bool
has_custom_priority
=
false
;
int
default_priority
=
-
1
;
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
tile_axes
.
size
());
++
i
)
{
auto
axis
=
tile_axes
[
i
];
if
(
axis
->
priority
!=
default_priority
)
{
has_custom_priority
=
true
;
break
;
}
ss
<<
"Axis "
<<
axis
->
index
<<
" , "
<<
axis
->
dim_axis
<<
": "
;
auto
total_score
=
(
weight_
.
tile_dependency
*
dd_scores
[
i
]
+
weight_
.
parallelism
*
pl_scores
[
i
]
+
weight_
.
vectorization
*
vec_scores
[
i
])
/
weight_
.
Sum
();
ss
<<
"score = (tile dependency) "
<<
weight_
.
tile_dependency
<<
"*"
<<
dd_scores
[
i
]
<<
" + (parallelism) "
<<
weight_
.
parallelism
<<
" * "
<<
pl_scores
[
i
]
<<
" + (vectorization) "
<<
weight_
.
vectorization
<<
" * "
<<
vec_scores
[
i
]
<<
" / "
<<
weight_
.
Sum
()
<<
" = "
<<
total_score
;
logger_
.
AppendLog
(
DO_TILING
,
ss
);
if
(
priority_map
.
find
(
total_score
)
==
priority_map
.
end
())
{
priority_map
[
total_score
]
=
{
axis
};
}
else
{
priority_map
[
total_score
].
emplace_back
(
axis
);
}
}
if
(
has_custom_priority
)
{
continue
;
}
int
priority
=
static_cast
<
int
>
(
tile_axes
.
size
())
-
1
;
for
(
auto
it
:
priority_map
)
{
for
(
auto
a
:
it
.
second
)
{
a
->
priority
=
priority
;
priority
-=
1
;
}
}
}
}
std
::
vector
<
double
>
TilingPriorityScorer
::
ComputeTileDependency
(
std
::
vector
<
TileAxis
*>
tile_axes
)
{
std
::
vector
<
double
>
scores
;
scores
.
reserve
(
tile_axes
.
size
());
for
(
auto
axis
:
tile_axes
)
{
scores
.
emplace_back
((
axis
->
dim_axis
+
1
)
*
axis
->
HasAttr
(
"REDUCE_AXIS"
));
}
return
scores
;
}
std
::
vector
<
double
>
TilingPriorityScorer
::
ComputeParallelism
(
std
::
vector
<
TileAxis
*>
tile_axes
)
{
std
::
vector
<
double
>
scores
;
scores
.
reserve
(
tile_axes
.
size
());
for
(
auto
axis
:
tile_axes
)
{
scores
.
emplace_back
(
!
axis
->
mc_sup
);
}
return
scores
;
}
std
::
vector
<
double
>
TilingPriorityScorer
::
ComputeVectorization
(
std
::
vector
<
TileAxis
*>
tile_axes
)
{
std
::
vector
<
double
>
scores
;
scores
.
reserve
(
tile_axes
.
size
());
std
::
unordered_map
<
DavinciMemScope
,
int
>
coef_map
=
{
{
DavinciMemScope
::
MEM_SCOPE_GM
,
2
},
// continuous dma copy is considered as the most important factor
{
DavinciMemScope
::
MEM_SCOPE_UB
,
1
},
// vectorization instruction is also important
{
DavinciMemScope
::
MEM_SCOPE_L1
,
0
},
// does not consider impact of L1 dma copy
{
DavinciMemScope
::
MEM_SCOPE_L0A
,
0
},
// does not consider impact of L0 dma copy
{
DavinciMemScope
::
MEM_SCOPE_L0B
,
0
},
{
DavinciMemScope
::
MEM_SCOPE_L0C
,
0
},
};
for
(
auto
axis
:
tile_axes
)
{
int
vec_level
=
0
;
for
(
auto
it
:
analyzer_
.
buf_info_
)
{
auto
buf
=
it
.
second
.
get
();
auto
coef
=
coef_map
[
buf
->
scope
];
int
dim_depth
=
1
;
for
(
auto
&
a
:
*
(
buf
->
tile_axis
))
{
if
(
a
==
axis
)
{
vec_level
+=
coef
*
dim_depth
;
break
;
}
dim_depth
+=
1
;
}
}
scores
.
emplace_back
(
vec_level
);
}
return
scores
;
}
}
// namespace poly
}
// namespace ir
}
// namespace akg
src/poly/tiling_strategy_manager.h
浏览文件 @
44ef36bd
...
...
@@ -17,6 +17,7 @@
#define POLY_TILING_STRATEGY_MANAGER_H_
#include <iostream>
#include <algorithm>
#include "poly/tiling_analyzer.h"
...
...
@@ -199,6 +200,95 @@ class MulticoreStrategy {
TileLogger
&
logger_
;
std
::
pair
<
int
,
int
>
GetProposalRangeForFullMulticore
(
TileAxis
*
axis
);
};
class
TilingPriorityScorer
{
public:
TilingPriorityScorer
(
TilingAnalyzer
&
analyzer
)
:
analyzer_
(
analyzer
),
logger_
(
TileLogger
::
GetInstance
(
analyzer
.
logger_
.
GetDumpDir
()))
{}
~
TilingPriorityScorer
()
{}
/*
* Compute a total score of priority for each tile axis considering all related features and corresponding weights.
* Tile axis with higher score will have higher tiling priority (i.e. have more memory space).
* Note that score of each feature is standardlised into range [1, tile_axis_size].
*/
void
SetPriorityByScoring
();
void
SetParallelismWeight
(
const
int
parallelism
)
{
weight_
.
parallelism
=
parallelism
;
}
void
SetVectorizationWeight
(
const
int
vectorization
)
{
weight_
.
vectorization
=
vectorization
;
}
void
SetDataReuseWeight
(
const
int
tile_dependency
)
{
weight_
.
tile_dependency
=
tile_dependency
;
}
private:
TilingAnalyzer
&
analyzer_
;
TileLogger
&
logger_
;
/*
* Weight parameters for each feature in priority score model.
* Initial weights are set empirically and changing they can support micro-tuning.
*/
struct
Weight
{
int
parallelism
{
1
};
// get lowest weight because coincident may not always trustable
int
tile_dependency
{
2
};
int
vectorization
{
3
};
int
Sum
()
{
return
parallelism
+
vectorization
+
tile_dependency
;
}
}
weight_
;
/*
* Parallelism is computed by checking coincident value in schedule tree for corresponding axis.
* If an axis can be parallelised, the parallelism score is 0; otherwise it is 1.
*/
std
::
vector
<
double
>
ComputeParallelism
(
std
::
vector
<
TileAxis
*>
tile_axes
);
/*
* Tile dependency describes the relationship between tile axes: if more tile axes are dependended on one tile axis,
* this tile axis will have higher tile dependency score and gets higher priority during tiling.
* For example, reduce axis is usually depended by other axes and thus it should be put into local buffer first.
*/
std
::
vector
<
double
>
ComputeTileDependency
(
std
::
vector
<
TileAxis
*>
tile_axes
);
/*
* Vectorization is computed by accumulating the dimension index of corresponding axis on each buffer.
* If an axis is related with more innermost dimensions of different buffers, the vectorization score is higher.
*/
std
::
vector
<
double
>
ComputeVectorization
(
std
::
vector
<
TileAxis
*>
tile_axes
);
/*
* Normalize data to range [1, range_max].
* `range_max` is usually set to the size of tile axes that need to determine priority.
*/
std
::
vector
<
double
>
MinMaxScaler
(
std
::
vector
<
double
>
data
,
int
range_max
=
1
)
{
auto
min
=
*
min_element
(
data
.
begin
(),
data
.
end
());
auto
max
=
*
max_element
(
data
.
begin
(),
data
.
end
());
std
::
stringstream
ss
;
ss
<<
"Min: "
<<
min
<<
", Max: "
<<
max
;
logger_
.
AppendLog
(
DO_TILING
,
ss
);
std
::
vector
<
double
>
scaled_data
(
data
.
size
(),
1
);
if
(
max
-
min
==
0
)
{
return
scaled_data
;
}
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
data
.
size
());
++
i
)
{
auto
old_d
=
data
[
i
];
ss
<<
"Orginal data: "
<<
old_d
;
auto
new_d
=
(
old_d
-
min
)
/
(
max
-
min
);
new_d
=
range_max
>
1
?
(
new_d
*
(
range_max
-
1
)
+
1
)
:
new_d
;
ss
<<
" -> Scaled data: "
<<
new_d
;
scaled_data
[
i
]
=
new_d
;
logger_
.
AppendLog
(
DO_TILING
,
ss
);
}
return
scaled_data
;
}
std
::
vector
<
TileAxis
*>
GetBandTileAxes
(
int
band_idx
)
{
std
::
vector
<
TileAxis
*>
tile_axes
;
auto
Collect
=
[
&
tile_axes
,
band_idx
](
TileAxis
*
axis
)
{
if
(
axis
->
index
==
band_idx
)
{
tile_axes
.
emplace_back
(
axis
);
}
};
analyzer_
.
ForEachAxisTopDown
(
Collect
);
return
tile_axes
;
}
};
}
// namespace poly
}
// namespace ir
}
// namespace akg
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录