Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
2a2a7f45
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
2a2a7f45
编写于
3月 30, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test(mgb/opr): add testcase for conv bias int4
GitOrigin-RevId: e3fff5e30b8be8398bfc2a96ea3753624a8e7161
上级
858261af
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
72 addition
and
25 deletion
+72
-25
dnn/src/fallback/conv_bias/algos.cpp
dnn/src/fallback/conv_bias/algos.cpp
+3
-1
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+2
-1
dnn/src/naive/conv_bias/opr_impl.cpp
dnn/src/naive/conv_bias/opr_impl.cpp
+2
-0
dnn/src/naive/lowbit_utils.cpp
dnn/src/naive/lowbit_utils.cpp
+65
-23
未找到文件。
dnn/src/fallback/conv_bias/algos.cpp
浏览文件 @
2a2a7f45
...
...
@@ -187,7 +187,9 @@ bool ConvBiasImpl::AlgoNaive::usable(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
/*algo_selection_strategy*/
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_naive
,
0
)
{
return
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
;
auto
algo_data_type
=
param
.
deduce_algo_data_type
();
return
param
.
filter_meta
.
format
==
param
::
ConvBias
::
Format
::
NCHW
&&
contain_data_type
(
get_algo_type
().
data_type
,
algo_data_type
);
}
MIDOUT_END
();
return
false
;
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
2a2a7f45
...
...
@@ -342,7 +342,8 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
param
().
format
==
Param
::
Format
::
NCHW4
||
param
().
format
==
Param
::
Format
::
NCHW44
||
param
().
format
==
Param
::
Format
::
NCHW44_DOT
||
param
().
format
==
Param
::
Format
::
NCHW
)
{
param
().
format
==
Param
::
Format
::
NCHW
||
param
().
format
==
Param
::
Format
::
NCHW64
)
{
spatial_pos
=
2
;
}
else
if
(
param
().
format
==
Param
::
Format
::
NHWC
)
{
spatial_pos
=
1
;
...
...
dnn/src/naive/conv_bias/opr_impl.cpp
浏览文件 @
2a2a7f45
...
...
@@ -123,6 +123,7 @@ void forward_bias<dt_quint4, dt_quint4, dt_qint32, dt_qint32>(
auto
param
=
layout
.
dtype
.
param
<
dtype
::
Quantized4Asymm
>
();
ret
.
dtype
=
dtype
::
Quantized8Asymm
(
param
.
scale
,
param
.
zero_point
);
ret
.
format
=
TensorFormat
(
ret
.
dtype
);
ret
.
init_contiguous_stride
();
return
ret
;
};
TensorND
new_src
=
{
workspace_ptr
,
convert_layout
(
src
.
layout
)};
...
...
@@ -147,6 +148,7 @@ void forward_bias<dt_qint4, dt_qint4, dt_qint32, dt_qint32>(
auto
param
=
layout
.
dtype
.
param
<
dtype
::
QuantizedS4
>
();
ret
.
dtype
=
dtype
::
QuantizedS8
(
param
.
scale
);
ret
.
format
=
TensorFormat
(
ret
.
dtype
);
ret
.
init_contiguous_stride
();
return
ret
;
};
TensorND
new_src
=
{
workspace_ptr
,
convert_layout
(
src
.
layout
)};
...
...
dnn/src/naive/lowbit_utils.cpp
浏览文件 @
2a2a7f45
...
...
@@ -16,10 +16,20 @@
void
megdnn
::
naive
::
uint4_to_uint8
(
const
TensorND
&
in
,
const
TensorND
&
out
)
{
auto
in_ptr
=
static_cast
<
uint8_t
*>
(
in
.
raw_ptr
)
+
in
.
layout
.
span
().
low_byte
;
auto
out_ptr
=
out
.
compatible_ptr
<
uint8_t
>
()
+
out
.
layout
.
span
().
low_byte
;
for
(
size_t
i
=
0
;
i
<
in
.
layout
.
span
().
dist_elem
();
i
+=
2
)
{
uint8_t
val
=
in_ptr
[
i
/
2
];
out_ptr
[
i
]
=
val
&
0xF
;
out_ptr
[
i
+
1
]
=
(
val
>>
4
)
&
0xF
;
const
auto
&
ly
=
in
.
layout
;
auto
dim_in
=
ly
.
shape
[
ly
.
ndim
-
1
];
auto
elems
=
ly
.
total_nr_elems
();
auto
dim_out
=
elems
/
dim_in
;
auto
stride_out
=
div_ceil
(
dim_in
,
2
_z
);
for
(
size_t
i
=
0
;
i
<
dim_out
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
dim_in
;
j
+=
2
)
{
uint8_t
val
=
in_ptr
[
j
/
2
];
out_ptr
[
j
]
=
val
&
0xF
;
if
(
j
+
1
<
dim_in
)
out_ptr
[
j
+
1
]
=
(
val
>>
4
)
&
0xF
;
}
in_ptr
+=
stride_out
;
out_ptr
+=
dim_in
;
}
}
...
...
@@ -27,11 +37,23 @@ void megdnn::naive::uint8_to_uint4(const TensorND& in, const TensorND& out) {
auto
in_ptr
=
static_cast
<
uint8_t
*>
(
in
.
raw_ptr
)
+
in
.
layout
.
span
().
low_byte
;
auto
out_ptr
=
static_cast
<
uint8_t
*>
(
out
.
raw_ptr
)
+
out
.
layout
.
span
().
low_byte
;
for
(
size_t
i
=
0
;
i
<
out
.
layout
.
span
().
dist_elem
();
i
+=
2
)
{
uint8_t
a
=
in_ptr
[
i
],
b
=
in_ptr
[
i
+
1
];
a
=
std
::
min
(
a
,
DTypeTrait
<
dtype
::
Quantized4Asymm
>::
max
());
b
=
std
::
min
(
b
,
DTypeTrait
<
dtype
::
Quantized4Asymm
>::
max
());
out_ptr
[
i
/
2
]
=
a
+
(
b
<<
4
);
const
auto
&
ly
=
in
.
layout
;
auto
dim_in
=
ly
.
shape
[
ly
.
ndim
-
1
];
auto
elems
=
ly
.
total_nr_elems
();
auto
dim_out
=
elems
/
dim_in
;
auto
stride_out
=
div_ceil
(
dim_in
,
2
_z
);
for
(
size_t
i
=
0
;
i
<
dim_out
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
dim_in
;
j
+=
2
)
{
uint8_t
a
=
in_ptr
[
j
];
uint8_t
b
=
0
;
if
(
j
+
1
<
dim_in
)
b
=
in_ptr
[
j
+
1
];
a
=
std
::
min
(
a
,
DTypeTrait
<
dtype
::
Quantized4Asymm
>::
max
());
b
=
std
::
min
(
b
,
DTypeTrait
<
dtype
::
Quantized4Asymm
>::
max
());
out_ptr
[
j
/
2
]
=
a
+
(
b
<<
4
);
}
in_ptr
+=
dim_in
;
out_ptr
+=
stride_out
;
}
}
...
...
@@ -40,13 +62,21 @@ void megdnn::naive::int4_to_int8(const TensorND& in, const TensorND& out) {
auto
in_ptr
=
static_cast
<
int8_t
*>
(
in
.
raw_ptr
)
+
in
.
layout
.
span
().
low_byte
;
auto
out_ptr
=
static_cast
<
int8_t
*>
(
out
.
raw_ptr
)
+
out
.
layout
.
span
().
low_byte
;
megdnn_assert
(
in
.
layout
.
span
().
dist_elem
()
%
2
==
0
);
for
(
size_t
i
=
0
;
i
<
in
.
layout
.
span
().
dist_elem
();
i
+=
2
)
{
int8_t
cur
=
in_ptr
[
i
/
2
];
out_ptr
[
i
]
=
cur
<<
4
;
out_ptr
[
i
]
=
out_ptr
[
i
]
>>
4
;
out_ptr
[
i
+
1
]
=
cur
>>
4
;
const
auto
&
ly
=
in
.
layout
;
auto
dim_in
=
ly
.
shape
[
ly
.
ndim
-
1
];
auto
elems
=
ly
.
total_nr_elems
();
auto
dim_out
=
elems
/
dim_in
;
auto
stride_out
=
div_ceil
(
dim_in
,
2
_z
);
for
(
size_t
i
=
0
;
i
<
dim_out
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
dim_in
;
j
+=
2
)
{
int8_t
cur
=
in_ptr
[
j
/
2
];
out_ptr
[
j
]
=
cur
<<
4
;
out_ptr
[
j
]
=
out_ptr
[
j
]
>>
4
;
if
(
j
+
1
<
dim_in
)
out_ptr
[
j
+
1
]
=
cur
>>
4
;
}
in_ptr
+=
stride_out
;
out_ptr
+=
dim_in
;
}
}
...
...
@@ -54,12 +84,24 @@ void megdnn::naive::int8_to_int4(const TensorND& in, const TensorND& out) {
auto
in_ptr
=
static_cast
<
int8_t
*>
(
in
.
raw_ptr
)
+
in
.
layout
.
span
().
low_byte
;
auto
out_ptr
=
static_cast
<
int8_t
*>
(
out
.
raw_ptr
)
+
out
.
layout
.
span
().
low_byte
;
for
(
size_t
i
=
0
;
i
<
out
.
layout
.
span
().
dist_elem
();
i
+=
2
)
{
int8_t
a
=
in_ptr
[
i
],
b
=
in_ptr
[
i
+
1
];
a
=
std
::
min
(
a
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
max
());
a
=
std
::
max
(
a
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
min
());
b
=
std
::
min
(
b
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
max
());
b
=
std
::
max
(
b
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
min
());
out_ptr
[
i
/
2
]
=
(
a
&
0xF
)
|
(
b
<<
4
);
const
auto
&
ly
=
in
.
layout
;
auto
dim_in
=
ly
.
shape
[
ly
.
ndim
-
1
];
auto
elems
=
ly
.
total_nr_elems
();
auto
dim_out
=
elems
/
dim_in
;
auto
stride_out
=
div_ceil
(
dim_in
,
2
_z
);
for
(
size_t
i
=
0
;
i
<
dim_out
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
dim_in
;
j
+=
2
)
{
int8_t
a
=
in_ptr
[
j
];
int8_t
b
=
0
;
if
(
j
+
1
<
dim_in
)
b
=
in_ptr
[
j
+
1
];
a
=
std
::
min
(
a
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
max
());
a
=
std
::
max
(
a
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
min
());
b
=
std
::
min
(
b
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
max
());
b
=
std
::
max
(
b
,
DTypeTrait
<
dtype
::
QuantizedS4
>::
min
());
out_ptr
[
j
/
2
]
=
(
a
&
0xF
)
|
(
b
<<
4
);
}
in_ptr
+=
dim_in
;
out_ptr
+=
stride_out
;
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录