Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
239916a9
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
239916a9
编写于
6月 16, 2021
作者:
M
Megvii Engine Team
提交者:
huangxinda
7月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb/gopt): fix testcase for enable nchw64 pass
GitOrigin-RevId: 2ae8d1608d7e9623f4c2abe59f9c80d54559aaf4
上级
2ab5c53f
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
38 addition
and
157 deletion
+38
-157
dnn/src/common/conv_bias.cpp
dnn/src/common/conv_bias.cpp
+2
-2
src/gopt/impl/tensor_reformat.cpp
src/gopt/impl/tensor_reformat.cpp
+31
-55
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+5
-100
未找到文件。
dnn/src/common/conv_bias.cpp
浏览文件 @
239916a9
...
...
@@ -65,8 +65,8 @@ void do_check_exec_common(
bias
.
to_string
().
c_str
(),
dst
.
to_string
().
c_str
());
megdnn_assert
(
bias
.
shape
[
2
]
==
1
);
megdnn_assert
(
bias
.
shape
[
3
]
==
1
);
}
else
if
(
param
().
format
==
param
::
ConvBias
::
Format
::
NHWC
||
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW4_NHWC
)
{
}
else
if
(
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NHWC
||
opr
->
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW4_NHWC
)
{
megdnn_assert
(
bias
.
shape
[
0
]
==
1
);
megdnn_assert
(
bias
.
shape
[
1
]
==
1
);
megdnn_assert
(
bias
.
shape
[
2
]
==
1
);
...
...
src/gopt/impl/tensor_reformat.cpp
浏览文件 @
239916a9
...
...
@@ -420,7 +420,8 @@ void TensorReformatPass::RelayoutPlaceholder::init_output_static_infer_desc() {
dst
[
4
]
=
32
;
}
else
if
(
layout_type
()
==
RelayoutPlaceholder
::
LayoutType
::
NCHW_TO_NCHW64
)
{
mgb_assert
(
inp_shape
.
ndim
==
4
&&
inp_shape
[
1
]
%
64
==
0
,
"%s"
,
inp_shape
.
to_string
().
c_str
());
mgb_assert
(
inp_shape
.
ndim
==
4
&&
inp_shape
[
1
]
%
64
==
0
,
"%s"
,
inp_shape
.
to_string
().
c_str
());
dst
.
ndim
=
5
;
dst
[
0
]
=
inp_shape
[
0
];
dst
[
1
]
=
inp_shape
[
1
]
/
64
;
...
...
@@ -438,8 +439,6 @@ void TensorReformatPass::RelayoutPlaceholder::init_output_static_infer_desc() {
dst
[
4
]
=
32
;
}
else
if
(
layout_type
()
==
RelayoutPlaceholder
::
LayoutType
::
NCHW4_TO_NCHW64
)
{
mgb_assert
(
layout_type
()
==
RelayoutPlaceholder
::
LayoutType
::
NCHW4_TO_NCHW64
);
mgb_assert
(
inp_shape
.
ndim
==
5
&&
inp_shape
[
1
]
%
16
==
0
);
dst
.
ndim
=
5
;
dst
[
0
]
=
inp_shape
[
0
];
...
...
@@ -499,18 +498,17 @@ void TensorReformatPass::RelayoutPlaceholder::init_output_static_infer_desc() {
}
else
if
(
layout_type
()
==
RelayoutPlaceholder
::
LayoutType
::
NHWC_TO_NCHW32
)
{
mgb_assert
(
inp_shape
.
ndim
==
4
&&
inp_shape
[
3
]
%
32
==
0
);
dst
.
ndim
=
4
;
dst
.
ndim
=
5
;
dst
[
0
]
=
inp_shape
[
0
];
dst
[
1
]
=
inp_shape
[
3
]
/
32
;
dst
[
2
]
=
inp_shape
[
1
];
dst
[
3
]
=
inp_shape
[
2
];
dst
[
4
]
=
32
;
}
else
if
(
layout_type
()
==
RelayoutPlaceholder
::
LayoutType
::
NHWC_TO_NCHW64
)
{
}
else
{
mgb_assert
(
layout_type
()
==
RelayoutPlaceholder
::
LayoutType
::
NHWC_TO_NCHW64
);
mgb_assert
(
inp_shape
.
ndim
==
4
&&
inp_shape
[
3
]
%
64
==
0
);
dst
.
ndim
=
4
;
dst
.
ndim
=
5
;
dst
[
0
]
=
inp_shape
[
0
];
dst
[
1
]
=
inp_shape
[
3
]
/
64
;
dst
[
2
]
=
inp_shape
[
1
];
...
...
@@ -3729,21 +3727,6 @@ void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
return
y1
.
node
();
};
auto
nhwc2nchw64
=
[](
VarNode
*
inp
)
->
VarNode
*
{
mgb_assert
(
inp
->
shape
().
ndim
==
4
);
auto
x
=
SymbolVar
(
inp
);
auto
xshp
=
opr
::
GetVarShape
::
make
(
x
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
auto
sub
=
[
&
xshp
,
&
cv
](
int
idx
)
{
return
opr
::
IndexAt
::
make
(
xshp
,
{{
0
,
cv
(
idx
)}});
};
auto
tshp
=
opr
::
Concat
::
make
(
{
sub
(
0
),
sub
(
1
),
sub
(
2
),
sub
(
3
)
/
64
,
cv
(
64
)},
0
);
auto
y0
=
opr
::
Reshape
::
make
(
x
,
tshp
);
auto
y1
=
opr
::
Dimshuffle
::
make
(
y0
,
{
0
,
3
,
1
,
2
,
4
});
return
y1
.
node
();
};
auto
try_conv_dimshuffle_reshape_typecvt
=
[
&
rewriter
,
&
readers
,
&
nchw42nchw
](
OperatorNodeBase
*
opr
)
{
...
...
@@ -3915,31 +3898,29 @@ void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
return
true
;
};
auto
try_conv_reformat_nchw42n
chw64
=
[
&
rewriter
,
&
nchw42nhwc
,
&
nhwc2nchw64
,
auto
try_conv_reformat_nchw42n
hwc
=
[
&
rewriter
,
&
nchw42nhwc
,
&
readers
](
OperatorNodeBase
*
opr
)
{
ThinHashSet
<
OperatorNodeBase
*>
opr_set
;
ThinHashSet
<
OperatorNodeBase
*>
reader_set
;
// check reshape
auto
reshape1
=
try_cast_as_op
<
opr
::
Reshape
>
(
opr
);
if
(
reshape1
==
nullptr
)
auto
reshape
=
try_cast_as_op
<
opr
::
Reshape
>
(
opr
);
if
(
reshape
==
nullptr
)
return
false
;
opr_set
.
insert
(
opr
);
// check dimshuffle
auto
shuffle
=
try_cast_as_op
<
opr
::
Dimshuffle
>
(
reshape
1
->
input
(
0
)
->
owner_opr
());
reshape
->
input
(
0
)
->
owner_opr
());
if
(
shuffle
==
nullptr
)
return
false
;
auto
&&
param
=
shuffle
->
param
();
if
(
param
.
pattern_len
!=
6
)
if
(
param
.
pattern_len
!=
5
)
return
false
;
bool
is_nchw42nchw64
=
param
.
pattern
[
0
]
==
0
&&
param
.
pattern
[
1
]
==
1
&&
param
.
pattern
[
2
]
==
3
&&
param
.
pattern
[
3
]
==
4
&&
param
.
pattern
[
4
]
==
2
&&
param
.
pattern
[
5
]
==
5
&&
shuffle
->
output
(
0
)
->
shape
()[
5
]
==
4
&&
shuffle
->
output
(
0
)
->
shape
()[
4
]
==
16
;
if
(
!
is_nchw42nchw64
)
bool
is_nchw42nhwc
=
param
.
pattern
[
0
]
==
0
&&
param
.
pattern
[
1
]
==
2
&&
param
.
pattern
[
2
]
==
3
&&
param
.
pattern
[
3
]
==
1
&&
param
.
pattern
[
4
]
==
4
&&
shuffle
->
output
(
0
)
->
shape
()[
4
]
==
4
;
if
(
!
is_nchw42nhwc
)
return
false
;
opr_set
.
insert
(
shuffle
);
for
(
auto
&&
i
:
readers
[
shuffle
])
{
...
...
@@ -3948,20 +3929,8 @@ void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
}
}
// check reshape
auto
reshape2
=
try_cast_as_op
<
opr
::
Reshape
>
(
shuffle
->
input
(
0
)
->
owner_opr
());
if
(
reshape2
==
nullptr
)
return
false
;
opr_set
.
insert
(
reshape2
);
for
(
auto
&&
i
:
readers
[
reshape2
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
auto
typecvt
=
try_cast_as_op
<
opr
::
TypeCvt
>
(
reshape2
->
input
(
0
)
->
owner_opr
());
try_cast_as_op
<
opr
::
TypeCvt
>
(
shuffle
->
input
(
0
)
->
owner_opr
());
if
(
typecvt
==
nullptr
)
return
false
;
auto
in_dtype
=
typecvt
->
input
(
0
)
->
dtype
(),
...
...
@@ -3972,6 +3941,11 @@ void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
if
(
!
is_s82s4
)
return
false
;
opr_set
.
insert
(
typecvt
);
for
(
auto
&&
i
:
readers
[
typecvt
])
{
if
(
i
.
second
&
DepType
::
DEV_VALUE
)
{
reader_set
.
insert
(
i
.
first
);
}
}
// check conv bias
auto
conv_bias
=
...
...
@@ -4006,11 +3980,10 @@ void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
auto
conv_bias_shuffle
=
opr
::
ConvBias
::
make
(
src
,
filter
,
new_bias
,
new_param
,
conv_bias
->
execution_policy
(),
OperatorNodeConfig
{
out_dtype
});
auto
new_var
=
nhwc2nchw64
(
conv_bias_shuffle
.
node
());
rewriter
.
replace_var
(
opr
->
output
(
0
),
new_var
,
opr
->
output
(
0
),
conv_bias_shuffle
.
node
()
,
mgb_cstr_log
(
"replace conv_bias + "
"reformat to conv_bias(NCHW4_N
CHW64
)"
));
"reformat to conv_bias(NCHW4_N
HWC
)"
));
return
true
;
};
...
...
@@ -4098,14 +4071,14 @@ void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
auto
on_opr
=
[
&
try_conv_dimshuffle_reshape_typecvt
,
&
try_conv_reformat_nchw42nchw32
,
&
try_conv_reformat_nchw42n
chw64
,
&
try_conv_reformat_nchw42n
hwc
,
#if CUDA_VERSION >= 10020
&
try_conv_reformat_nchw322nchw4
,
#endif
&
rewriter
](
OperatorNodeBase
*
opr
)
{
if
(
!
try_conv_dimshuffle_reshape_typecvt
(
opr
)
&&
!
try_conv_reformat_nchw42nchw32
(
opr
)
&&
!
try_conv_reformat_nchw42n
chw64
(
opr
)
!
try_conv_reformat_nchw42n
hwc
(
opr
)
#if CUDA_VERSION >= 10020
&&
!
try_conv_reformat_nchw322nchw4
(
opr
)
#endif
...
...
@@ -4546,6 +4519,9 @@ VarNode* EnableNCHW64Pass::on_graph_endpoint_var(VarNode* new_var,
case
Format
::
NCHW64
:
type
=
LayoutType
::
NCHW64_TO_NCHW
;
break
;
case
Format
::
NHWC
:
type
=
LayoutType
::
NHWC_TO_NCHW
;
break
;
default:
mgb_throw
(
AssertionError
,
"format(%d) is not supported, related var "
...
...
@@ -4980,7 +4956,7 @@ EnableNCHW64Pass::make_nchw64_converter() {
case
Format
::
NHWC
:
inps
[
1
]
=
RelayoutPlaceholder
::
make
(
inps
[
1
],
RelayoutPlaceholder
::
LayoutType
::
N
CHW_TO_NHWC
)
N
HWC_TO_NCHW4
)
.
node
();
break
;
case
Format
::
NCHW32
:
...
...
src/gopt/test/inference.cpp
浏览文件 @
239916a9
...
...
@@ -4404,10 +4404,10 @@ TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NHWC) {
};
auto
x
=
mkvar
(
"x"
,
{
32
,
4
,
23
,
40
},
dtype
::
QuantizedS8
(
2.5
f
)),
w
=
mkcvar
(
"w"
,
{
64
,
4
,
3
,
3
},
dtype
::
QuantizedS8
(
2.5
f
)),
b
=
mkcvar
(
"b"
,
{
1
,
64
,
1
,
1
},
dtype
::
QuantizedS32
(
6.25
f
)),
w1
=
mkcvar
(
"w1"
,
{
64
,
64
,
3
,
3
},
dtype
::
QuantizedS4
(
1.234
f
)),
b1
=
mkcvar
(
"b1"
,
{
1
,
64
,
1
,
1
},
dtype
::
QuantizedS32
(
12.34567
f
*
1.234
f
));
w
=
mkcvar
(
"w"
,
{
32
,
4
,
3
,
3
},
dtype
::
QuantizedS8
(
2.5
f
)),
b
=
mkcvar
(
"b"
,
{
1
,
32
,
1
,
1
},
dtype
::
QuantizedS32
(
6.25
f
)),
w1
=
mkcvar
(
"w1"
,
{
32
,
32
,
3
,
3
},
dtype
::
QuantizedS4
(
1.234
f
)),
b1
=
mkcvar
(
"b1"
,
{
1
,
32
,
1
,
1
},
dtype
::
QuantizedS32
(
12.34567
f
*
1.234
f
));
opr
::
ConvBias
::
Param
param
;
param
.
format
=
opr
::
ConvBias
::
Param
::
Format
::
NCHW
;
param
.
nonlineMode
=
opr
::
ConvBias
::
Param
::
NonlineMode
::
RELU
;
...
...
@@ -4438,7 +4438,7 @@ TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NHWC) {
->
writeto_fpath
(
output_file
(
"TestGoptInference.FoldingConvDimshuffleNCHW4NHWC.json"
));
size_t
nr_dimshuffle
=
find_opr_num
<
opr
::
TypeCvt
>
(
y_fuse
);
ASSERT_EQ
(
3u
,
find_opr_num
<
opr
::
Dimshuffle
>
(
y_fuse
)
);
ASSERT_EQ
(
2u
,
nr_dimshuffle
);
bool
found
=
false
;
cg
::
DepOprIter
{[
&
found
](
cg
::
OperatorNodeBase
*
opr
)
{
if
(
!
found
&&
opr
->
same_type
<
opr
::
ConvBias
>
())
{
...
...
@@ -4735,101 +4735,6 @@ TEST(TestGoptInference, PaddingChannelsWithWarpPerspective) {
MGB_ASSERT_TENSOR_EQ
(
t1
,
t2
);
}
TEST
(
TestGoptInference
,
PaddingChannelsB4
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
cn
.
activate
();
REQUIRE_CUDA_COMPUTE_CAPABILITY
(
7
,
5
);
HostTensorGenerator
<
dtype
::
Int8
>
gen
;
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
auto
mkvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
gen
(
shp
,
cn
)).
rename
(
name
),
dtype
);
};
auto
mkcvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
SharedDeviceTensor
::
make
(
*
graph
,
*
gen
(
shp
,
cn
))
.
rename
(
name
),
dtype
);
};
auto
x
=
mkvar
(
"x"
,
{
16
,
3
,
14
,
14
},
dtype
::
QuantizedS8
(
2.5
f
)),
w
=
mkcvar
(
"w"
,
{
16
,
3
,
3
,
3
},
dtype
::
QuantizedS8
(
2.5
f
)),
b
=
mkcvar
(
"b"
,
{
1
,
16
,
1
,
1
},
dtype
::
QuantizedS32
(
6.25
f
));
opr
::
ConvBias
::
Param
param
;
param
.
format
=
opr
::
ConvBias
::
Param
::
Format
::
NCHW
;
param
.
nonlineMode
=
opr
::
ConvBias
::
Param
::
NonlineMode
::
RELU
;
param
.
stride_h
=
param
.
stride_w
=
1
;
param
.
pad_h
=
param
.
pad_w
=
1
;
auto
y
=
opr
::
ConvBias
::
make
(
x
,
w
,
b
,
param
,
{},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
y
=
opr
::
TypeCvt
::
make
(
y
,
dtype
::
Quantized4Asymm
{
20.
f
,
8
});
opr
::
Pooling
::
Param
pool
;
pool
.
format
=
opr
::
Pooling
::
Param
::
Format
::
NCHW
;
y
=
opr
::
Pooling
::
make
(
y
,
pool
);
auto
w1
=
mkcvar
(
"w1"
,
{
48
,
16
,
3
,
3
},
dtype
::
QuantizedS4
(
1.234
f
)),
b1
=
mkcvar
(
"b1"
,
{
1
,
48
,
1
,
1
},
dtype
::
QuantizedS32
(
20.
f
*
1.234
f
));
auto
y1
=
opr
::
ConvBias
::
make
(
y
,
w1
,
b1
,
param
,
{},
OperatorNodeConfig
{
dtype
::
Quantized4Asymm
(
20.
f
,
8
)});
auto
w2
=
mkcvar
(
"w2"
,
{
48
,
48
,
3
,
3
},
dtype
::
QuantizedS4
(
1.234
f
)),
b2
=
mkcvar
(
"b2"
,
{
1
,
48
,
1
,
1
},
dtype
::
QuantizedS32
(
20.
f
*
1.234
f
));
auto
y2
=
opr
::
ConvBias
::
make
(
y1
,
w2
,
b2
,
param
,
{},
OperatorNodeConfig
{
dtype
::
Quantized4Asymm
(
20.
f
,
8
)});
auto
w3
=
mkcvar
(
"w2"
,
{
16
,
48
,
3
,
3
},
dtype
::
QuantizedS4
(
1.234
f
)),
b3
=
mkcvar
(
"b2"
,
{
1
,
16
,
1
,
1
},
dtype
::
QuantizedS32
(
20.
f
*
1.234
f
));
auto
y3
=
opr
::
ConvBias
::
make
(
y2
,
w3
,
b3
,
param
,
{},
OperatorNodeConfig
{
dtype
::
Quantized4Asymm
(
20.
f
,
8
)});
using
ElemMultiMode
=
opr
::
ElemwiseMultiType
::
Param
::
Mode
;
auto
y4
=
opr
::
ElemwiseMultiType
::
make
(
{
y
,
y3
},
{
ElemMultiMode
::
QFUSE_ADD_RELU
},
OperatorNodeConfig
{
dtype
::
Quantized4Asymm
{
20.
f
,
7
}});
y4
=
opr
::
TypeCvt
::
make
(
y4
,
dtype
::
Float32
());
SymbolVar
y4_pad
;
unpack_vector
(
gopt
::
GraphOptimizer
{}
.
add_pass
<
gopt
::
PaddingChannelPass
>
()
.
add_pass
<
gopt
::
ParamFusePass
>
()
.
apply
({{
y4
}})
.
endpoint_vars
(),
y4_pad
);
ASSERT_EQ
(
y4_pad
.
node
()
->
shape
()[
1
],
y4
.
node
()
->
shape
()[
1
]);
SmallVector
<
cg
::
OperatorNodeBase
*>
oprs
;
auto
cb1
=
[
&
oprs
](
cg
::
OperatorNodeBase
*
opr
)
{
if
(
opr
->
same_type
<
opr
::
ConvBias
>
())
{
oprs
.
push_back
(
opr
);
}
};
cg
::
DepOprIter
{
cb1
}.
add
(
y4_pad
.
node
()
->
owner_opr
());
ASSERT_EQ
(
oprs
.
size
(),
4
);
ASSERT_EQ
(
oprs
[
0
]
->
output
(
0
)
->
shape
()[
1
],
16
);
ASSERT_EQ
(
oprs
[
1
]
->
output
(
0
)
->
shape
()[
1
],
64
);
ASSERT_EQ
(
oprs
[
2
]
->
output
(
0
)
->
shape
()[
1
],
64
);
ASSERT_EQ
(
oprs
[
3
]
->
output
(
0
)
->
shape
()[
1
],
16
);
size_t
nr_concat
=
find_opr_num
<
opr
::
Concat
>
(
y4_pad
);
ASSERT_EQ
(
nr_concat
,
1
);
cg
::
OperatorNodeBase
*
concat
=
nullptr
;
auto
cb2
=
[
&
concat
](
cg
::
OperatorNodeBase
*
opr
)
{
if
(
opr
->
same_type
<
opr
::
Concat
>
())
{
concat
=
opr
;
}
};
cg
::
DepOprIter
{
cb2
}.
add
(
y4_pad
.
node
()
->
owner_opr
());
ASSERT_EQ
(
oprs
[
0
]
->
input
(
0
)
->
owner_opr
(),
concat
);
HostTensorND
t1
,
t2
;
auto
func1
=
graph
->
compile
({
make_callback_copy
(
y4
,
t1
)});
func1
->
execute
();
auto
func2
=
graph
->
compile
({
make_callback_copy
(
y4_pad
,
t2
)});
func2
->
execute
();
MGB_ASSERT_TENSOR_EQ
(
t1
,
t2
);
}
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录