Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
4e9be159
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
4e9be159
编写于
11月 24, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/gopt): add opt pass for fusing convolution and reformat
GitOrigin-RevId: d0c5deace2e860cb62002a6cfedd4b32a8ca24df
上级
c3a4b222
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
624 addition
and
24 deletion
+624
-24
src/gopt/impl/framework.cpp
src/gopt/impl/framework.cpp
+1
-0
src/gopt/impl/tensor_reformat.cpp
src/gopt/impl/tensor_reformat.cpp
+368
-20
src/gopt/include/megbrain/gopt/inference.h
src/gopt/include/megbrain/gopt/inference.h
+6
-0
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+242
-1
src/plugin/impl/opr_footprint.cpp
src/plugin/impl/opr_footprint.cpp
+6
-2
third_party/cutlass
third_party/cutlass
+1
-1
未找到文件。
src/gopt/impl/framework.cpp
浏览文件 @
4e9be159
...
...
@@ -759,6 +759,7 @@ const GraphOptimizer& GraphOptimizer::add_passes_for_optimize_options(
add_pass
<
RemoveRedundantTypeCvtPass
>
();
add_pass
(
FuseNCHW4Int8Preprocess
::
make
());
add_pass
<
FuseWarpPerspectiveDimshufflePass
>
();
add_pass
<
FoldingConvBiasDimshufflePass
>
();
});
cb
(
chwn4
,
{
add_pass
<
FuseConvBiasNonlinPass
>
();
...
...
src/gopt/impl/tensor_reformat.cpp
浏览文件 @
4e9be159
...
...
@@ -2825,27 +2825,26 @@ public:
MGB_DEFINE_OPR_CLASS
(
ShuffleShuffleRemovePass
::
Impl
::
AbstractShuffleOpr
,
cg
::
SingleCNOperatorNodeBase
)
// {
public
:
AbstractShuffleOpr
(
VarNode
*
inpvar
,
TensorFormat
inp_format
,
TensorFormat
out_format
);
static
SymbolVar
make
(
VarNode
*
inpvar
,
TensorFormat
inp_format
,
TensorFormat
out_format
);
TensorFormat
inp_format
()
const
{
return
m_inp_format
;
}
TensorFormat
out_format
()
const
{
return
m_out_format
;
}
AbstractShuffleOpr
(
VarNode
*
inpvar
,
TensorFormat
inp_format
,
TensorFormat
out_format
);
static
SymbolVar
make
(
VarNode
*
inpvar
,
TensorFormat
inp_format
,
TensorFormat
out_format
);
TensorFormat
inp_format
()
const
{
return
m_inp_format
;
}
TensorFormat
out_format
()
const
{
return
m_out_format
;
}
private
:
void
init_output_static_infer_desc
()
override
;
void
scn_do_execute
()
override
;
const
TensorFormat
m_inp_format
;
const
TensorFormat
m_out_format
;
}
;
void
init_output_static_infer_desc
()
override
;
void
scn_do_execute
()
override
;
const
TensorFormat
m_inp_format
;
const
TensorFormat
m_out_format
;
}
;
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
ShuffleShuffleRemovePass
::
Impl
::
AbstractShuffleOpr
);
...
...
@@ -3228,4 +3227,353 @@ void ShuffleShuffleRemovePass::apply(OptState& opt) const {
MIDOUT_E
}
/* ==================== FoldingConvBiasDimshufflePass ================= */
const
char
*
FoldingConvBiasDimshufflePass
::
name
()
const
{
return
mgb_cstr_log
(
"folding conv bias dimshuffle pass"
);
}
void
FoldingConvBiasDimshufflePass
::
apply
(
OptState
&
opt
)
const
{
MIDOUT_B
(
"FoldingConvBiasDimshufflePass::apply"
);
using
DepType
=
cg
::
OperatorNodeProp
::
DepType
;
ThinHashMap
<
OperatorNodeBase
*
,
SmallVector
<
std
::
pair
<
OperatorNodeBase
*
,
DepType
>>>
readers
;
static
const
ThinHashSet
<
Typeinfo
*>
opr_type_list
=
{
opr
::
TypeCvt
::
typeinfo
(),
opr
::
Dimshuffle
::
typeinfo
(),
opr
::
Reshape
::
typeinfo
(),
opr
::
ConvBias
::
typeinfo
()};
opt
.
graph
().
iter
([
&
readers
](
OperatorNodeBase
*
opr
)
{
for
(
auto
&&
i
:
opr
->
node_prop
().
dep_map
())
{
if
(
opr_type_list
.
count
(
i
.
first
->
owner_opr
()
->
dyn_typeinfo
()))
{
readers
[
i
.
first
->
owner_opr
()].
emplace_back
(
opr
,
i
.
second
);
}
}
});
auto
rewriter
=
opt
.
graph
().
make_rewriter
();
auto
nchw42nchw
=
[](
VarNode
*
inp
)
->
VarNode
*
{
mgb_assert
(
inp
->
shape
().
ndim
==
5
&&
inp
->
shape
()[
4
]
==
4
);
auto
x
=
SymbolVar
(
inp
);
auto
xshp
=
opr
::
GetVarShape
::
make
(
x
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
auto
sub
=
[
&
xshp
,
&
cv
](
int
idx
)
{
return
opr
::
IndexAt
::
make
(
xshp
,
{{
0
,
cv
(
idx
)}});
};
auto
tshp
=
opr
::
Concat
::
make
({
sub
(
0
),
sub
(
1
)
*
4
,
sub
(
2
),
sub
(
3
)},
0
);
auto
y0
=
opr
::
Dimshuffle
::
make
(
x
,
{
0
,
1
,
4
,
2
,
3
});
auto
y1
=
opr
::
Reshape
::
make
(
y0
,
tshp
);
auto
y2
=
opr
::
TypeCvt
::
make
(
y1
,
dtype
::
Float32
());
return
y2
.
node
();
};
auto
nchw42nchw32
=
[](
VarNode
*
inp
)
->
VarNode
*
{
mgb_assert
(
inp
->
shape
().
ndim
==
5
&&
inp
->
shape
()[
4
]
==
4
);
auto
x
=
SymbolVar
(
inp
);
auto
xshp
=
opr
::
GetVarShape
::
make
(
x
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
auto
sub
=
[
&
xshp
,
&
cv
](
int
idx
)
{
return
opr
::
IndexAt
::
make
(
xshp
,
{{
0
,
cv
(
idx
)}});
};
auto
tshp0
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
)
/
8
,
cv
(
8
),
sub
(
2
),
sub
(
3
),
sub
(
4
)},
0
),
tshp1
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
)
/
8
,
sub
(
2
),
sub
(
3
),
sub
(
4
)
*
8
},
0
);
auto
y0
=
opr
::
Reshape
::
make
(
x
,
tshp0
);
auto
y1
=
opr
::
Dimshuffle
::
make
(
y0
,
{
0
,
1
,
3
,
4
,
2
,
5
});
auto
y2
=
opr
::
Reshape
::
make
(
y1
,
tshp1
);
return
y2
.
node
();
};
auto
nchw322nchw4
=
[](
VarNode
*
inp
)
->
VarNode
*
{
mgb_assert
(
inp
->
shape
().
ndim
==
5
&&
inp
->
shape
()[
4
]
==
32
);
auto
x
=
SymbolVar
(
inp
);
auto
xshp
=
opr
::
GetVarShape
::
make
(
x
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
auto
sub
=
[
&
xshp
,
&
cv
](
int
idx
)
{
return
opr
::
IndexAt
::
make
(
xshp
,
{{
0
,
cv
(
idx
)}});
};
auto
tshp0
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
),
sub
(
2
),
sub
(
3
),
cv
(
8
),
sub
(
4
)
/
8
},
0
),
tshp1
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
)
*
8
,
sub
(
2
),
sub
(
3
),
sub
(
4
)
/
8
},
0
);
auto
y0
=
opr
::
Reshape
::
make
(
x
,
tshp0
);
auto
y1
=
opr
::
Dimshuffle
::
make
(
y0
,
{
0
,
1
,
4
,
2
,
3
,
5
});
auto
y2
=
opr
::
Reshape
::
make
(
y1
,
tshp1
);
return
y2
.
node
();
};
auto
try_conv_dimshuffle_reshape_typecvt
=
[
&
rewriter
,
&
readers
,
&
nchw42nchw
](
OperatorNodeBase
*
opr
)
{
ThinHashSet
<
OperatorNodeBase
*>
opr_set
;
ThinHashSet
<
OperatorNodeBase
*>
reader_set
;
// check typecvt
auto
typecvt
=
try_cast_as_op
<
opr
::
TypeCvt
>
(
opr
);
if
(
typecvt
==
nullptr
)
return
false
;
auto
inp_dtype
=
typecvt
->
input
(
0
)
->
dtype
(),
out_dtype
=
typecvt
->
output
(
0
)
->
dtype
();
bool
is_s82f32
=
inp_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
out_dtype
.
enumv
()
==
DTypeEnum
::
Float32
;
if
(
!
is_s82f32
)
return
false
;
opr_set
.
insert
(
opr
);
// check reshape
auto
reshape
=
try_cast_as_op
<
opr
::
Reshape
>
(
typecvt
->
input
(
0
)
->
owner_opr
());
if
(
reshape
==
nullptr
)
return
false
;
opr_set
.
insert
(
reshape
);
for
(
auto
&&
i
:
readers
[
reshape
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check shuffle
auto
shuffle
=
try_cast_as_op
<
opr
::
Dimshuffle
>
(
reshape
->
input
(
0
)
->
owner_opr
());
if
(
shuffle
==
nullptr
)
return
false
;
auto
&&
param
=
shuffle
->
param
();
if
(
param
.
pattern_len
!=
5
)
return
false
;
bool
is_nchw42nchw
=
param
.
pattern
[
0
]
==
0
&&
param
.
pattern
[
1
]
==
1
&&
param
.
pattern
[
2
]
==
4
&&
param
.
pattern
[
3
]
==
2
&&
param
.
pattern
[
4
]
==
3
&&
shuffle
->
input
(
0
)
->
shape
()[
4
]
==
4
;
if
(
!
is_nchw42nchw
)
return
false
;
opr_set
.
insert
(
shuffle
);
for
(
auto
&&
i
:
readers
[
shuffle
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check conv bias
auto
conv_bias
=
try_cast_as_op
<
opr
::
ConvBias
>
(
shuffle
->
input
(
0
)
->
owner_opr
());
if
(
conv_bias
==
nullptr
)
return
false
;
inp_dtype
=
conv_bias
->
input
(
0
)
->
dtype
();
bool
is_s8nchw4
=
inp_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
conv_bias
->
param
().
format
==
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4
;
if
(
!
is_s8nchw4
)
return
false
;
if
(
conv_bias
->
input
().
size
()
!=
3
)
return
false
;
opr_set
.
insert
(
conv_bias
);
for
(
auto
&&
i
:
readers
[
conv_bias
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
for
(
auto
reader
:
reader_set
)
{
if
(
opr_set
.
count
(
reader
)
<=
0
)
{
return
false
;
}
}
auto
src
=
rewriter
.
get_var
(
conv_bias
->
input
(
0
)),
filter
=
rewriter
.
get_var
(
conv_bias
->
input
(
1
)),
bias
=
rewriter
.
get_var
(
conv_bias
->
input
(
2
));
auto
new_bias
=
nchw42nchw
(
bias
);
auto
new_param
=
conv_bias
->
param
();
new_param
.
format
=
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4_NCHW
;
auto
conv_bias_shuffle
=
opr
::
ConvBias
::
make
(
src
,
filter
,
new_bias
,
new_param
,
conv_bias
->
execution_policy
(),
OperatorNodeConfig
{
dtype
::
Float32
()});
rewriter
.
replace_var
(
opr
->
output
(
0
),
conv_bias_shuffle
.
node
(),
mgb_cstr_log
(
"replace conv_bias + typecvt + "
"dimshuffle + "
"reshape to conv_bias(NCHW4_NCHW)"
));
return
true
;
};
auto
try_conv_reformat_nchw42nchw32
=
[
&
rewriter
,
&
nchw42nchw32
,
&
readers
](
OperatorNodeBase
*
opr
)
{
ThinHashSet
<
OperatorNodeBase
*>
opr_set
;
ThinHashSet
<
OperatorNodeBase
*>
reader_set
;
// check reshape
auto
reshape1
=
try_cast_as_op
<
opr
::
Reshape
>
(
opr
);
if
(
reshape1
==
nullptr
)
return
false
;
opr_set
.
insert
(
opr
);
// check dimshuffle
auto
shuffle
=
try_cast_as_op
<
opr
::
Dimshuffle
>
(
reshape1
->
input
(
0
)
->
owner_opr
());
if
(
shuffle
==
nullptr
)
return
false
;
auto
&&
param
=
shuffle
->
param
();
if
(
param
.
pattern_len
!=
6
)
return
false
;
bool
is_nchw42nchw32
=
param
.
pattern
[
0
]
==
0
&&
param
.
pattern
[
1
]
==
1
&&
param
.
pattern
[
2
]
==
3
&&
param
.
pattern
[
3
]
==
4
&&
param
.
pattern
[
4
]
==
2
&&
param
.
pattern
[
5
]
==
5
&&
shuffle
->
output
(
0
)
->
shape
()[
5
]
==
4
&&
shuffle
->
output
(
0
)
->
shape
()[
4
]
==
8
;
if
(
!
is_nchw42nchw32
)
return
false
;
opr_set
.
insert
(
shuffle
);
for
(
auto
&&
i
:
readers
[
shuffle
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check reshape
auto
reshape2
=
try_cast_as_op
<
opr
::
Reshape
>
(
shuffle
->
input
(
0
)
->
owner_opr
());
if
(
reshape2
==
nullptr
)
return
false
;
opr_set
.
insert
(
reshape2
);
for
(
auto
&&
i
:
readers
[
reshape2
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check conv bias
auto
conv_bias
=
try_cast_as_op
<
opr
::
ConvBias
>
(
reshape2
->
input
(
0
)
->
owner_opr
());
if
(
conv_bias
==
nullptr
)
return
false
;
auto
inp_dtype
=
conv_bias
->
input
(
0
)
->
dtype
();
bool
is_s8nchw4
=
inp_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
conv_bias
->
param
().
format
==
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4
;
if
(
!
is_s8nchw4
)
return
false
;
if
(
conv_bias
->
input
().
size
()
!=
3
)
return
false
;
opr_set
.
insert
(
conv_bias
);
for
(
auto
&&
i
:
readers
[
conv_bias
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
for
(
auto
reader
:
reader_set
)
{
if
(
opr_set
.
count
(
reader
)
<=
0
)
{
return
false
;
}
}
auto
src
=
rewriter
.
get_var
(
conv_bias
->
input
(
0
)),
filter
=
rewriter
.
get_var
(
conv_bias
->
input
(
1
)),
bias
=
rewriter
.
get_var
(
conv_bias
->
input
(
2
));
auto
new_bias
=
nchw42nchw32
(
bias
);
auto
new_param
=
conv_bias
->
param
();
new_param
.
format
=
megdnn
::
param
::
ConvBias
::
Format
::
NCHW4_NCHW32
;
auto
conv_bias_shuffle
=
opr
::
ConvBias
::
make
(
src
,
filter
,
new_bias
,
new_param
,
conv_bias
->
execution_policy
(),
conv_bias
->
config
());
rewriter
.
replace_var
(
opr
->
output
(
0
),
conv_bias_shuffle
.
node
(),
mgb_cstr_log
(
"replace conv_bias + "
"reformat to conv_bias(NCHW4_NCHW32)"
));
return
true
;
};
auto
try_conv_reformat_nchw322nchw4
=
[
&
rewriter
,
&
readers
,
&
nchw322nchw4
](
OperatorNodeBase
*
opr
)
{
ThinHashSet
<
OperatorNodeBase
*>
opr_set
;
ThinHashSet
<
OperatorNodeBase
*>
reader_set
;
// check reshape
auto
reshape1
=
try_cast_as_op
<
opr
::
Reshape
>
(
opr
);
if
(
reshape1
==
nullptr
)
return
false
;
opr_set
.
insert
(
opr
);
// check dimshuffle
auto
shuffle
=
try_cast_as_op
<
opr
::
Dimshuffle
>
(
reshape1
->
input
(
0
)
->
owner_opr
());
if
(
shuffle
==
nullptr
)
return
false
;
auto
&&
param
=
shuffle
->
param
();
if
(
param
.
pattern_len
!=
6
)
return
false
;
bool
is_nchw322nchw4
=
param
.
pattern
[
0
]
==
0
&&
param
.
pattern
[
1
]
==
1
&&
param
.
pattern
[
2
]
==
4
&&
param
.
pattern
[
3
]
==
2
&&
param
.
pattern
[
4
]
==
3
&&
param
.
pattern
[
5
]
==
5
&&
shuffle
->
input
(
0
)
->
shape
()[
5
]
==
4
&&
shuffle
->
input
(
0
)
->
shape
()[
4
]
==
8
;
if
(
!
is_nchw322nchw4
)
return
false
;
opr_set
.
insert
(
shuffle
);
for
(
auto
&&
i
:
readers
[
shuffle
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check reshape
auto
reshape2
=
try_cast_as_op
<
opr
::
Reshape
>
(
shuffle
->
input
(
0
)
->
owner_opr
());
if
(
reshape2
==
nullptr
)
return
false
;
opr_set
.
insert
(
reshape2
);
for
(
auto
&&
i
:
readers
[
reshape2
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check conv bias
auto
conv_bias
=
try_cast_as_op
<
opr
::
ConvBias
>
(
reshape2
->
input
(
0
)
->
owner_opr
());
if
(
conv_bias
==
nullptr
)
return
false
;
auto
inp_dtype
=
conv_bias
->
input
(
0
)
->
dtype
();
bool
is_s8nchw32
=
inp_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
&&
conv_bias
->
param
().
format
==
megdnn
::
param
::
ConvBias
::
Format
::
NCHW32
;
if
(
!
is_s8nchw32
)
return
false
;
if
(
conv_bias
->
input
().
size
()
!=
3
)
return
false
;
opr_set
.
insert
(
conv_bias
);
for
(
auto
&&
i
:
readers
[
conv_bias
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
for
(
auto
reader
:
reader_set
)
{
if
(
opr_set
.
count
(
reader
)
<=
0
)
{
return
false
;
}
}
auto
src
=
rewriter
.
get_var
(
conv_bias
->
input
(
0
)),
filter
=
rewriter
.
get_var
(
conv_bias
->
input
(
1
)),
bias
=
rewriter
.
get_var
(
conv_bias
->
input
(
2
));
auto
new_bias
=
nchw322nchw4
(
bias
);
auto
new_param
=
conv_bias
->
param
();
new_param
.
format
=
megdnn
::
param
::
ConvBias
::
Format
::
NCHW32_NCHW4
;
auto
conv_bias_shuffle
=
opr
::
ConvBias
::
make
(
src
,
filter
,
new_bias
,
new_param
,
conv_bias
->
execution_policy
(),
conv_bias
->
config
());
rewriter
.
replace_var
(
opr
->
output
(
0
),
conv_bias_shuffle
.
node
(),
mgb_cstr_log
(
"replace conv_bias + "
"reformat to conv_bias(NCHW32_NCHW4)"
));
return
true
;
};
MGB_MARK_USED_VAR
(
try_conv_reformat_nchw322nchw4
);
auto
on_opr
=
[
&
try_conv_dimshuffle_reshape_typecvt
,
&
try_conv_reformat_nchw42nchw32
,
#if CUDA_VERSION >= 10020
&
try_conv_reformat_nchw322nchw4
,
#endif
&
rewriter
](
OperatorNodeBase
*
opr
)
{
if
(
!
try_conv_dimshuffle_reshape_typecvt
(
opr
)
&&
!
try_conv_reformat_nchw42nchw32
(
opr
)
#if CUDA_VERSION >= 10020
&&
!
try_conv_reformat_nchw322nchw4
(
opr
)
#endif
)
{
rewriter
.
auto_replace_outputs
(
opr
);
}
};
opt
.
graph
().
iter
(
on_opr
);
rewriter
.
apply_inplace
();
MIDOUT_E
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/gopt/include/megbrain/gopt/inference.h
浏览文件 @
4e9be159
...
...
@@ -402,6 +402,12 @@ namespace gopt {
void
apply
(
OptState
&
opt
)
const
override
;
};
class
FoldingConvBiasDimshufflePass
final
:
public
Pass
{
public:
const
char
*
name
()
const
override
;
void
apply
(
OptState
&
opt
)
const
override
;
};
}
// namespace gopt
}
// namespace mgb
...
...
src/gopt/test/inference.cpp
浏览文件 @
4e9be159
...
...
@@ -3632,7 +3632,6 @@ TEST(TestGoptInference, ConvertFormatCD4GroupOneConv) {
}
#if MGB_CUDA
TEST
(
TestGoptInference
,
PreProcessCase0
)
{
REQUIRE_GPU
(
1
);
HostTensorGenerator
<
dtype
::
Quantized8Asymm
,
RandomDistribution
::
UNIFORM
>
...
...
@@ -3783,5 +3782,247 @@ TEST(TestGoptInference, WarpAndPreProcessCase) {
func
->
execute
();
MGB_ASSERT_TENSOR_NEAR
(
host_y
,
host_y_opt
,
1e-5
);
}
TEST
(
TestGoptInference
,
FoldingConvDimshuffle
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
cn
.
activate
();
auto
&&
prop
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
device_prop
;
auto
sm_ver
=
prop
.
major
*
10
+
prop
.
minor
;
if
(
sm_ver
<
61
)
{
printf
(
"This testcast ignored due to insufficient cuda cap(got: %d, "
"expected: %d)
\n
"
,
sm_ver
,
61
);
return
;
}
HostTensorGenerator
<
dtype
::
Int8
>
gen
;
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
auto
mkvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
gen
(
shp
,
cn
)).
rename
(
name
),
dtype
);
};
auto
mkcvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
SharedDeviceTensor
::
make
(
*
graph
,
*
gen
(
shp
,
cn
))
.
rename
(
name
),
dtype
);
};
auto
nchw42nchw
=
[](
SymbolVar
x
)
{
auto
xshp
=
opr
::
GetVarShape
::
make
(
x
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
auto
sub
=
[
&
xshp
,
&
cv
](
int
idx
)
{
return
opr
::
IndexAt
::
make
(
xshp
,
{{
0
,
cv
(
idx
)}});
};
auto
tshp0
=
opr
::
Concat
::
make
({
sub
(
0
),
sub
(
1
)
*
4
,
sub
(
2
),
sub
(
3
)},
0
);
auto
y0
=
opr
::
Dimshuffle
::
make
(
x
,
{
0
,
1
,
4
,
2
,
3
});
auto
y1
=
opr
::
Reshape
::
make
(
y0
,
tshp0
);
return
y1
;
};
auto
x
=
mkvar
(
"x"
,
{
32
,
16
,
4
,
8
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
w
=
mkcvar
(
"w"
,
{
64
,
16
,
3
,
3
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
b
=
mkcvar
(
"b"
,
{
1
,
16
,
1
,
1
,
4
},
dtype
::
QuantizedS32
(
6.25
f
));
opr
::
ConvBias
::
Param
param
;
param
.
format
=
opr
::
ConvBias
::
Param
::
Format
::
NCHW4
;
param
.
nonlineMode
=
opr
::
ConvBias
::
Param
::
NonlineMode
::
RELU
;
param
.
stride_h
=
param
.
stride_w
=
2
;
param
.
pad_h
=
param
.
pad_w
=
1
;
auto
y
=
opr
::
ConvBias
::
make
(
x
,
w
,
b
,
param
,
{},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
y
=
opr
::
TypeCvt
::
make
(
y
,
dtype
::
Float32
());
y
=
nchw42nchw
(
y
);
SymbolVar
y_fuse
,
y_non_fuse
;
unpack_vector
(
gopt
::
GraphOptimizer
{}
.
add_pass
<
gopt
::
ShuffleShuffleRemovePass
>
()
.
add_pass
<
gopt
::
FoldingConvBiasDimshufflePass
>
()
.
add_pass
<
gopt
::
ParamFusePass
>
()
.
apply
({{
y
}})
.
endpoint_vars
(),
y_fuse
);
graph
->
compile
({{
y_fuse
,
{}}})
->
to_json
()
->
writeto_fpath
(
output_file
(
"TestGoptInference.FoldingConvDimshuffle.json"
));
ASSERT_EQ
(
opr
::
ConvBias
::
Param
::
Format
::
NCHW4_NCHW
,
find_opr
<
opr
::
ConvBias
>
(
y_fuse
).
param
().
format
);
ASSERT_EQ
(
0u
,
find_opr_num
<
opr
::
Dimshuffle
>
(
y_fuse
));
unpack_vector
(
gopt
::
GraphOptimizer
{}.
apply
({{
y
}}).
endpoint_vars
(),
y_non_fuse
);
HostTensorND
host_y_fuse
,
host_y_non_fuse
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y_fuse
,
host_y_fuse
),
make_callback_copy
(
y_non_fuse
,
host_y_non_fuse
)});
func
->
execute
();
}
TEST
(
TestGoptInference
,
FoldingConvDimshuffleNCHW4NCHW32
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
cn
.
activate
();
auto
&&
prop
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
device_prop
;
auto
sm_ver
=
prop
.
major
*
10
+
prop
.
minor
;
if
(
sm_ver
<
61
)
{
printf
(
"This testcast ignored due to insufficient cuda cap(got: %d, "
"expected: %d)
\n
"
,
sm_ver
,
61
);
return
;
}
HostTensorGenerator
<
dtype
::
Int8
>
gen
;
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
auto
mkvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
gen
(
shp
,
cn
)).
rename
(
name
),
dtype
);
};
auto
mkcvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
SharedDeviceTensor
::
make
(
*
graph
,
*
gen
(
shp
,
cn
))
.
rename
(
name
),
dtype
);
};
auto
nchw42nchw32
=
[](
SymbolVar
x
)
{
auto
xshp
=
opr
::
GetVarShape
::
make
(
x
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
auto
sub
=
[
&
xshp
,
&
cv
](
int
idx
)
{
return
opr
::
IndexAt
::
make
(
xshp
,
{{
0
,
cv
(
idx
)}});
};
auto
tshp0
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
)
/
8
,
cv
(
8
),
sub
(
2
),
sub
(
3
),
sub
(
4
)},
0
),
tshp1
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
)
/
8
,
sub
(
2
),
sub
(
3
),
sub
(
4
)
*
8
},
0
);
auto
y0
=
opr
::
Reshape
::
make
(
x
,
tshp0
);
auto
y1
=
opr
::
Dimshuffle
::
make
(
y0
,
{
0
,
1
,
3
,
4
,
2
,
5
});
auto
y2
=
opr
::
Reshape
::
make
(
y1
,
tshp1
);
return
y2
;
};
auto
x
=
mkvar
(
"x"
,
{
32
,
16
,
4
,
8
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
w
=
mkcvar
(
"w"
,
{
64
,
16
,
3
,
3
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
b
=
mkcvar
(
"b"
,
{
1
,
16
,
1
,
1
,
4
},
dtype
::
QuantizedS32
(
6.25
f
));
opr
::
ConvBias
::
Param
param
;
param
.
format
=
opr
::
ConvBias
::
Param
::
Format
::
NCHW4
;
param
.
nonlineMode
=
opr
::
ConvBias
::
Param
::
NonlineMode
::
RELU
;
param
.
stride_h
=
param
.
stride_w
=
2
;
param
.
pad_h
=
param
.
pad_w
=
1
;
auto
y
=
opr
::
ConvBias
::
make
(
x
,
w
,
b
,
param
,
{},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
y
=
nchw42nchw32
(
y
);
y
=
opr
::
TypeCvt
::
make
(
y
,
dtype
::
Float32
());
SymbolVar
y_fuse
,
y_non_fuse
;
unpack_vector
(
gopt
::
GraphOptimizer
{}
.
add_pass
<
gopt
::
FoldingConvBiasDimshufflePass
>
()
.
add_pass
<
gopt
::
ParamFusePass
>
()
.
apply
({{
y
}})
.
endpoint_vars
(),
y_fuse
);
graph
->
compile
({{
y_fuse
,
{}}})
->
to_json
()
->
writeto_fpath
(
output_file
(
"TestGoptInference.FoldingConvDimshuffleNCHW4NCHW32.json"
));
ASSERT_EQ
(
opr
::
ConvBias
::
Param
::
Format
::
NCHW4_NCHW32
,
find_opr
<
opr
::
ConvBias
>
(
y_fuse
).
param
().
format
);
ASSERT_EQ
(
0u
,
find_opr_num
<
opr
::
Dimshuffle
>
(
y_fuse
));
unpack_vector
(
gopt
::
GraphOptimizer
{}.
apply
({{
y
}}).
endpoint_vars
(),
y_non_fuse
);
HostTensorND
host_y_fuse
,
host_y_non_fuse
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y_fuse
,
host_y_fuse
),
make_callback_copy
(
y_non_fuse
,
host_y_non_fuse
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_y_fuse
,
host_y_non_fuse
);
}
#if CUDA_VERSION >= 10020
TEST
(
TestGoptInference
,
FoldingConvDimshuffleNCHW32NCHW4
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
cn
.
activate
();
auto
&&
prop
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
device_prop
;
auto
sm_ver
=
prop
.
major
*
10
+
prop
.
minor
;
if
(
sm_ver
<
75
)
{
printf
(
"This testcast ignored due to insufficient cuda cap(got: %d, "
"expected: %d)
\n
"
,
sm_ver
,
75
);
return
;
}
HostTensorGenerator
<
dtype
::
Int8
>
gen
;
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
auto
mkvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
gen
(
shp
,
cn
)).
rename
(
name
),
dtype
);
};
auto
mkcvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
SharedDeviceTensor
::
make
(
*
graph
,
*
gen
(
shp
,
cn
))
.
rename
(
name
),
dtype
);
};
auto
x
=
mkvar
(
"x"
,
{
32
,
16
,
4
,
8
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
w
=
mkcvar
(
"w"
,
{
64
,
16
,
3
,
3
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
b
=
mkcvar
(
"b"
,
{
1
,
16
,
1
,
1
,
4
},
dtype
::
QuantizedS32
(
6.25
f
)),
w1
=
mkcvar
(
"w1"
,
{
16
,
16
,
3
,
3
,
4
},
dtype
::
QuantizedS8
(
2.5
f
)),
b1
=
mkcvar
(
"b1"
,
{
1
,
4
,
1
,
1
,
4
},
dtype
::
QuantizedS32
(
6.25
f
));
opr
::
ConvBias
::
Param
param
;
param
.
format
=
opr
::
ConvBias
::
Param
::
Format
::
NCHW4
;
param
.
nonlineMode
=
opr
::
ConvBias
::
Param
::
NonlineMode
::
RELU
;
param
.
stride_h
=
param
.
stride_w
=
2
;
param
.
pad_h
=
param
.
pad_w
=
1
;
auto
y
=
opr
::
ConvBias
::
make
(
x
,
w
,
b
,
param
,
{},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
param
.
stride_h
=
param
.
stride_w
=
1
;
y
=
opr
::
ConvBias
::
make
(
y
,
w1
,
b1
,
param
,
{},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
y
=
opr
::
TypeCvt
::
make
(
y
,
dtype
::
Float32
());
SymbolVar
y_fuse
,
y_non_fuse
;
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw32
().
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_fuse
);
}
graph
->
compile
({{
y_fuse
,
{}}})
->
to_json
()
->
writeto_fpath
(
output_file
(
"TestGoptInference.FoldingConvDimshuffleNCHW32NCHW4.json"
));
ASSERT_EQ
(
1u
,
find_opr_num
<
opr
::
Dimshuffle
>
(
y_fuse
));
bool
found
=
false
;
cg
::
DepOprIter
{[
&
found
](
cg
::
OperatorNodeBase
*
opr
)
{
if
(
!
found
&&
opr
->
same_type
<
opr
::
ConvBias
>
())
{
opr
::
ConvBias
*
cb
=
&
opr
->
cast_final_safe
<
opr
::
ConvBias
>
();
if
(
cb
->
param
().
format
==
opr
::
ConvBias
::
Param
::
Format
::
NCHW32_NCHW4
)
found
=
true
;
}
}}
.
add
(
y_fuse
.
node
()
->
owner_opr
());
EXPECT_TRUE
(
found
);
unpack_vector
(
gopt
::
GraphOptimizer
{}.
apply
({{
y
}}).
endpoint_vars
(),
y_non_fuse
);
HostTensorND
host_y_fuse
,
host_y_non_fuse
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y_fuse
,
host_y_fuse
),
make_callback_copy
(
y_non_fuse
,
host_y_non_fuse
)});
func
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
host_y_fuse
,
host_y_non_fuse
);
}
#endif
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/plugin/impl/opr_footprint.cpp
浏览文件 @
4e9be159
...
...
@@ -131,8 +131,10 @@ uint64_t eval_conv_computation(const TensorShape& src_shape,
return
dst_shape
.
total_nr_elems
()
*
fh
*
fw
*
src_shape
[
1
]
*
32
/
group
*
2
;
}
mgb_assert
(
param
.
format
==
Param
::
Format
::
NCHW4
,
"format should be NCHW4/NCHW32"
);
mgb_assert
(
param
.
format
==
Param
::
Format
::
NCHW4
||
param
.
format
==
Param
::
Format
::
NCHW4_NCHW
||
param
.
format
==
Param
::
Format
::
NCHW4_NCHW32
,
"format should be NCHW4/NCHW4_NCHW/NCHW4_NCHW32"
);
return
dst_shape
.
total_nr_elems
()
*
fh
*
fw
*
src_shape
[
1
]
*
4
/
group
*
2
;
};
...
...
@@ -154,6 +156,8 @@ uint64_t eval_conv_computation(const TensorShape& src_shape,
2
;
};
if
(
param
.
format
==
Param
::
Format
::
NCHW4
||
param
.
format
==
Param
::
Format
::
NCHW4_NCHW
||
param
.
format
==
Param
::
Format
::
NCHW4_NCHW32
||
param
.
format
==
Param
::
Format
::
NCHW88
||
param
.
format
==
Param
::
Format
::
NCHW44
||
param
.
format
==
Param
::
Format
::
NCHW44_DOT
||
...
...
cutlass
@
9f743167
比较
41426ea4
...
9f743167
Subproject commit
41426ea4074dcfc448b1c9979ea7617407590c04
Subproject commit
9f7431672c17d4a731f84ca9d8f3f4e741e267b1
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录