Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
f2db7b0d
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
f2db7b0d
编写于
9月 24, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/gopt): global layout transform support cuda fp16
GitOrigin-RevId: 1449c54215d053d2bd22c6f6fc5235c1a9fb560f
上级
ca7cec7a
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
127 addition
and
14 deletion
+127
-14
src/gopt/impl/global_layout_transform/layout_transform_context.cpp
...impl/global_layout_transform/layout_transform_context.cpp
+5
-5
src/gopt/impl/global_layout_transform/opr_tensor_formats_config.cpp
...mpl/global_layout_transform/opr_tensor_formats_config.cpp
+23
-7
src/gopt/test/layout_transform_pass.cpp
src/gopt/test/layout_transform_pass.cpp
+92
-0
src/gopt/test/network.cpp
src/gopt/test/network.cpp
+7
-2
未找到文件。
src/gopt/impl/global_layout_transform/layout_transform_context.cpp
浏览文件 @
f2db7b0d
...
...
@@ -78,13 +78,13 @@ std::unique_ptr<LayoutTransformContext> make_cuda_ctx(
OprFormatConfigID
::
NHWC
})
.
add_opr_config
(
opr
::
PoolingForward
::
typeinfo
(),
{
OprFormatConfigID
::
NCHW
4
,
OprFormatConfigID
::
NCHW32
,
OprFormatConfigID
::
N
HWC
,
OprFormatConfigID
::
NCHW64
,
OprFormatConfigID
::
CHWN4
})
{
OprFormatConfigID
::
NCHW
,
OprFormatConfigID
::
NCHW4
,
OprFormatConfigID
::
N
CHW32
,
OprFormatConfigID
::
NHWC
,
OprFormatConfigID
::
NCHW64
,
OprFormatConfigID
::
CHWN4
})
.
add_opr_config
(
opr
::
WarpPerspectiveForward
::
typeinfo
(),
{
OprFormatConfigID
::
N
HWC
,
OprFormatConfigID
::
NCHW4
,
OprFormatConfigID
::
NCHW64
});
{
OprFormatConfigID
::
N
CHW
,
OprFormatConfigID
::
NHWC
,
OprFormatConfigID
::
NCHW
4
,
OprFormatConfigID
::
NCHW
64
});
return
ctx
;
}
...
...
src/gopt/impl/global_layout_transform/opr_tensor_formats_config.cpp
浏览文件 @
f2db7b0d
...
...
@@ -191,8 +191,11 @@ struct OprSingleInOutTensorFormatsDispatcherImpl<OprFormatConfigID::NHWC> {
config
.
typeinfo
=
opr
->
dyn_typeinfo
();
config
.
opr_format
=
OprFormat
::
NHWC
;
config
.
config_id
=
OprFormatConfigID
::
NHWC
;
bool
available
=
opr
->
input
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
Quantized4Asymm
||
bool
f16_config
=
DNN_FLOAT16_SELECT
(
(
opr
->
input
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
Float16
),
true
);
bool
i4_config
=
opr
->
input
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
Quantized4Asymm
||
opr
->
input
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS4
;
bool
available
=
f16_config
||
i4_config
;
config
.
input_dtypes
=
{
opr
->
input
(
0
)
->
dtype
().
enumv
()};
config
.
input_tensor_types
=
{
TensorType
::
FEATURE
};
available
&=
opr
->
output
(
0
)
->
dtype
().
enumv
()
==
opr
->
input
(
0
)
->
dtype
().
enumv
();
...
...
@@ -275,16 +278,22 @@ struct ConvTensorFormatsDispatcherImpl<Opr, OprFormatConfigID::NHWC> {
config
.
opr_format
=
OprFormat
::
NHWC
;
config
.
config_id
=
OprFormatConfigID
::
NHWC
;
auto
check_dtype
=
[](
const
DType
&
dt
)
{
bool
f16_config
=
DNN_FLOAT16_SELECT
((
dt
.
enumv
()
==
DTypeEnum
::
Float16
),
true
);
bool
i4_config
=
dt
.
enumv
()
==
DTypeEnum
::
Quantized4Asymm
||
dt
.
enumv
()
==
DTypeEnum
::
QuantizedS4
;
bool
i8_config
=
dt
.
enumv
()
==
DTypeEnum
::
QuantizedS8
;
return
i4_config
||
i8_config
;
return
f16_config
||
i4_config
||
i8_config
;
};
bool
available
=
true
;
for
(
size_t
i
=
0
;
i
<
opr
->
input
().
size
();
++
i
)
{
if
(
i
==
2
)
available
&=
opr
->
input
(
i
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS32
;
else
{
if
(
i
==
2
)
{
available
&=
opr
->
input
(
i
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS32
||
DNN_FLOAT16_SELECT
(
opr
->
input
(
i
)
->
dtype
().
enumv
()
==
DTypeEnum
::
Float16
,
true
);
}
else
{
available
&=
check_dtype
(
opr
->
input
(
i
)
->
dtype
());
}
config
.
input_dtypes
.
emplace_back
(
opr
->
input
(
i
)
->
dtype
().
enumv
());
...
...
@@ -866,12 +875,18 @@ struct ConvTensorFormatsDispatcherImpl<
config
.
config_id
=
OprFormatConfigID
::
NHWC
;
bool
available
=
true
;
for
(
size_t
i
=
0
;
i
<
opr
->
input
().
size
();
++
i
)
{
available
&=
opr
->
input
(
i
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS8
;
available
&=
opr
->
input
(
i
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS8
||
DNN_FLOAT16_SELECT
(
opr
->
input
(
i
)
->
dtype
().
enumv
()
==
DTypeEnum
::
Float16
,
true
);
config
.
input_dtypes
.
emplace_back
(
opr
->
input
(
i
)
->
dtype
().
enumv
());
TensorType
tensor_type
=
i
==
0
?
TensorType
::
WEIGHT
:
TensorType
::
FEATURE
;
config
.
input_tensor_types
.
emplace_back
(
tensor_type
);
}
available
&=
opr
->
output
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS8
;
available
&=
opr
->
output
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
QuantizedS8
||
DNN_FLOAT16_SELECT
(
opr
->
output
(
0
)
->
dtype
().
enumv
()
==
DTypeEnum
::
Float16
,
true
);
config
.
output_dtypes
.
emplace_back
(
opr
->
output
(
0
)
->
dtype
().
enumv
());
available
&=
conv
.
param
().
sparse
==
opr
::
ConvBias
::
Param
::
Sparse
::
DENSE
;
config
.
input_tensor_formats
=
{
...
...
@@ -934,6 +949,7 @@ StaticData::StaticData() {
OPR_TENSOR_FORMATS_CONFIG_REG
(
ConvBias
,
NCHW44_DOT_HYBRID
);
OPR_TENSOR_FORMATS_CONFIG_REG
(
ConvolutionForward
,
NCHW
);
OPR_TENSOR_FORMATS_CONFIG_REG
(
ConvolutionForward
,
NHWC
);
OPR_TENSOR_FORMATS_CONFIG_REG
(
ConvolutionForward
,
NCHW4
);
OPR_TENSOR_FORMATS_CONFIG_REG
(
ConvolutionForward
,
NCHW44
);
OPR_TENSOR_FORMATS_CONFIG_REG
(
ConvolutionForward
,
NCHW88
);
...
...
src/gopt/test/layout_transform_pass.cpp
浏览文件 @
f2db7b0d
...
...
@@ -29,6 +29,8 @@
#include "./cache_data.h"
#endif
#include "megbrain/plugin/opr_io_dump.h"
using
namespace
mgb
;
using
namespace
gopt
;
using
namespace
serialization
;
...
...
@@ -748,6 +750,95 @@ TEST(TestLayoutTransform, CanonicalizeLayoutTransform) {
MGB_ASSERT_TENSOR_EQ
(
t1
,
t2
);
}
#if MGB_CUDA
TEST
(
TestLayoutTransform
,
Resnet18_F16
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpu0"
);
auto
&&
prop
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
device_prop
;
auto
sm_ver
=
prop
.
major
*
10
+
prop
.
minor
;
if
(
sm_ver
<
70
)
{
printf
(
"This testcast ignored due to insufficient cuda cap(got: %d, "
"expected: %d)
\n
"
,
sm_ver
,
70
);
return
;
}
Network
network
(
cn
);
auto
output
=
make_resnet18
(
network
,
16
);
using
S
=
opr
::
mixin
::
AlgoChooserHelper
::
ExecutionPolicy
::
Strategy
;
S
strategy
=
S
::
PROFILE
;
gopt
::
modify_opr_algo_strategy_inplace
({{
output
}},
strategy
);
HostTensorND
t1
;
auto
func1
=
network
.
graph
->
compile
({
make_callback_copy
(
output
,
t1
)});
func1
->
execute
();
using
OprFormatConfigID
=
LayoutTransformContext
::
OprFormatConfigID
;
using
OprList
=
LayoutTransformContext
::
OprList
;
using
Attribute
=
LayoutTransformContext
::
Attribute
;
using
Target
=
LayoutTransformContext
::
Target
;
using
ReformatAttribute
=
LayoutTransformContext
::
ReformatAttribute
;
OprList
opr_list
=
{
opr
::
ConvBiasForward
::
typeinfo
(),
opr
::
ElemwiseMultiType
::
typeinfo
(),
opr
::
Elemwise
::
typeinfo
(),
opr
::
TypeCvt
::
typeinfo
(),
opr
::
PoolingForward
::
typeinfo
(),
opr
::
WarpPerspectiveForward
::
typeinfo
(),
};
SmallVector
<
TensorFormats
>
available_tensor_formats
=
{
TensorFormats
::
NCHW
,
TensorFormats
::
NHWC
};
Attribute
attribute
=
{
OprFormatConfigID
::
NCHW
,
TensorFormats
::
NCHW
,
Target
::
UNSPEC
,
ReformatAttribute
::
AUTO_PADDING_NHWC
};
auto
ctx
=
std
::
make_unique
<
LayoutTransformContext
>
(
std
::
move
(
opr_list
),
std
::
move
(
available_tensor_formats
),
attribute
);
ctx
->
add_opr_config
(
opr
::
ConvBiasForward
::
typeinfo
(),
{
OprFormatConfigID
::
NCHW
,
OprFormatConfigID
::
NHWC
})
.
add_opr_config
(
opr
::
PoolingForward
::
typeinfo
(),
{
OprFormatConfigID
::
NCHW
,
OprFormatConfigID
::
NHWC
});
#if MGB_WITH_CACHED_TEST
auto
profiler
=
std
::
make_unique
<
ProfilerMock
>
(
static_cast
<
const
uint8_t
*>
(
TestLayoutTransform_Resnet18_F16
.
data
()),
TestLayoutTransform_Resnet18_F16
.
size
());
#else
auto
profiler
=
ProfilerBase
::
make_cached_profiler
(
"TestLayoutTransform.Resnet18_F16.cache"
);
#endif
std
::
unique_ptr
<
SolverBase
>
solver
{
new
DynamicProgrammingSolver
(
std
::
move
(
profiler
))};
auto
new_output
=
gopt
::
GraphOptimizer
{}
.
add_pass
(
ConvertF32ToF16Pass
::
make
(
false
))
.
add_pass
<
FuseConvBiasNonlinPass
>
()
.
add_pass
<
FuseConvBiasZPass
>
()
.
add_pass
<
LayoutTransformPass
>
(
std
::
move
(
ctx
),
std
::
move
(
solver
))
.
add_pass
<
ShuffleShuffleRemovePass
>
()
.
add_pass
(
FuseNCHW4Int8Preprocess
::
make
())
.
add_pass
<
FoldingConvBiasDimshufflePass
>
()
.
add_pass
<
ParamFusePass
>
()
.
add_pass
<
ParamMergePass
>
()
.
apply
({{
output
}})
.
endpoint_vars
();
auto
new_out_var
=
new_output
[
0
];
/// check global layout transform pass
auto
nr_dimshuffle
=
find_opr_num
<
opr
::
Dimshuffle
>
(
new_out_var
);
ASSERT_EQ
(
nr_dimshuffle
,
4u
);
/// check pass fuse conv bias with z
auto
nr_elemwise
=
find_opr_num
<
opr
::
Elemwise
>
(
new_out_var
);
ASSERT_EQ
(
nr_elemwise
,
4u
);
/// 21 convolutions, 21 weights and 21 bias, total 42 parameters
const
auto
&
param_merge
=
find_opr
<
opr
::
MultipleDeviceTensorHolder
>
(
new_out_var
);
ASSERT_EQ
(
param_merge
.
output
().
size
(),
42u
);
GraphProfiler
gprof
{
network
.
graph
.
get
()};
HostTensorND
t2
;
auto
func2
=
network
.
graph
->
compile
({
make_callback_copy
(
new_out_var
,
t2
)});
func2
->
execute
();
gprof
.
to_json_full
(
func2
.
get
())
->
writeto_fpath
(
output_file
(
"resnet18_f16.json"
));
MGB_ASSERT_TENSOR_NEAR
(
t1
,
t2
,
1e-3
);
}
#endif
TEST
(
TestLayoutTransform
,
Resnet18_F32
)
{
auto
cn
=
CompNode
::
load
(
"cpu0"
);
...
...
@@ -1115,4 +1206,5 @@ TEST(TestLayoutTransform, MobileNetV2_NCHW44_DOT) {
/// check correct
MGB_ASSERT_TENSOR_EQ
(
t1
,
t2
);
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/gopt/test/network.cpp
浏览文件 @
f2db7b0d
...
...
@@ -38,8 +38,13 @@ SymbolVar Network::add_conv(
param
.
nonlineMode
=
opr
::
ConvBias
::
Param
::
NonlineMode
::
IDENTITY
;
}
auto
conv
=
opr
::
ConvBias
::
make
(
f
,
weight
,
bias
,
param
,
{},
OperatorNodeConfig
{
out_dtype
});
SymbolVar
conv
;
if
(
out_dtype
.
category
()
==
DTypeCategory
::
QUANTIZED
)
{
conv
=
opr
::
ConvBias
::
make
(
f
,
weight
,
bias
,
param
,
{},
OperatorNodeConfig
{
out_dtype
});
}
else
{
conv
=
opr
::
ConvBias
::
make
(
f
,
weight
,
bias
,
param
,
{});
}
weight_idx
++
;
bias_idx
++
;
return
conv
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录