提交 2a2a7f45 编写于 作者: M Megvii Engine Team

test(mgb/opr): add testcase for conv bias int4

GitOrigin-RevId: e3fff5e30b8be8398bfc2a96ea3753624a8e7161
上级 858261af
...@@ -187,7 +187,9 @@ bool ConvBiasImpl::AlgoNaive::usable( ...@@ -187,7 +187,9 @@ bool ConvBiasImpl::AlgoNaive::usable(
const NCBKernSizeParam& param, const NCBKernSizeParam& param,
AlgoSelectionStrategy /*algo_selection_strategy*/) const { AlgoSelectionStrategy /*algo_selection_strategy*/) const {
MIDOUT_BEGIN(megdnn_fallback_naive, 0) { MIDOUT_BEGIN(megdnn_fallback_naive, 0) {
return param.filter_meta.format == param::ConvBias::Format::NCHW; auto algo_data_type = param.deduce_algo_data_type();
return param.filter_meta.format == param::ConvBias::Format::NCHW &&
contain_data_type(get_algo_type().data_type, algo_data_type);
} }
MIDOUT_END(); MIDOUT_END();
return false; return false;
......
...@@ -342,7 +342,8 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( ...@@ -342,7 +342,8 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
param().format == Param::Format::NCHW4 || param().format == Param::Format::NCHW4 ||
param().format == Param::Format::NCHW44 || param().format == Param::Format::NCHW44 ||
param().format == Param::Format::NCHW44_DOT || param().format == Param::Format::NCHW44_DOT ||
param().format == Param::Format::NCHW) { param().format == Param::Format::NCHW ||
param().format == Param::Format::NCHW64) {
spatial_pos = 2; spatial_pos = 2;
} else if (param().format == Param::Format::NHWC) { } else if (param().format == Param::Format::NHWC) {
spatial_pos = 1; spatial_pos = 1;
......
...@@ -123,6 +123,7 @@ void forward_bias<dt_quint4, dt_quint4, dt_qint32, dt_qint32>( ...@@ -123,6 +123,7 @@ void forward_bias<dt_quint4, dt_quint4, dt_qint32, dt_qint32>(
auto param = layout.dtype.param<dtype::Quantized4Asymm>(); auto param = layout.dtype.param<dtype::Quantized4Asymm>();
ret.dtype = dtype::Quantized8Asymm(param.scale, param.zero_point); ret.dtype = dtype::Quantized8Asymm(param.scale, param.zero_point);
ret.format = TensorFormat(ret.dtype); ret.format = TensorFormat(ret.dtype);
ret.init_contiguous_stride();
return ret; return ret;
}; };
TensorND new_src = {workspace_ptr, convert_layout(src.layout)}; TensorND new_src = {workspace_ptr, convert_layout(src.layout)};
...@@ -147,6 +148,7 @@ void forward_bias<dt_qint4, dt_qint4, dt_qint32, dt_qint32>( ...@@ -147,6 +148,7 @@ void forward_bias<dt_qint4, dt_qint4, dt_qint32, dt_qint32>(
auto param = layout.dtype.param<dtype::QuantizedS4>(); auto param = layout.dtype.param<dtype::QuantizedS4>();
ret.dtype = dtype::QuantizedS8(param.scale); ret.dtype = dtype::QuantizedS8(param.scale);
ret.format = TensorFormat(ret.dtype); ret.format = TensorFormat(ret.dtype);
ret.init_contiguous_stride();
return ret; return ret;
}; };
TensorND new_src = {workspace_ptr, convert_layout(src.layout)}; TensorND new_src = {workspace_ptr, convert_layout(src.layout)};
......
...@@ -16,10 +16,20 @@ ...@@ -16,10 +16,20 @@
void megdnn::naive::uint4_to_uint8(const TensorND& in, const TensorND& out) { void megdnn::naive::uint4_to_uint8(const TensorND& in, const TensorND& out) {
auto in_ptr = static_cast<uint8_t*>(in.raw_ptr) + in.layout.span().low_byte; auto in_ptr = static_cast<uint8_t*>(in.raw_ptr) + in.layout.span().low_byte;
auto out_ptr = out.compatible_ptr<uint8_t>() + out.layout.span().low_byte; auto out_ptr = out.compatible_ptr<uint8_t>() + out.layout.span().low_byte;
for (size_t i = 0; i < in.layout.span().dist_elem(); i += 2) { const auto& ly = in.layout;
uint8_t val = in_ptr[i / 2]; auto dim_in = ly.shape[ly.ndim - 1];
out_ptr[i] = val & 0xF; auto elems = ly.total_nr_elems();
out_ptr[i + 1] = (val >> 4) & 0xF; auto dim_out = elems / dim_in;
auto stride_out = div_ceil(dim_in, 2_z);
for (size_t i = 0; i < dim_out; ++i) {
for (size_t j = 0; j < dim_in; j += 2) {
uint8_t val = in_ptr[j / 2];
out_ptr[j] = val & 0xF;
if (j + 1 < dim_in)
out_ptr[j + 1] = (val >> 4) & 0xF;
}
in_ptr += stride_out;
out_ptr += dim_in;
} }
} }
...@@ -27,11 +37,23 @@ void megdnn::naive::uint8_to_uint4(const TensorND& in, const TensorND& out) { ...@@ -27,11 +37,23 @@ void megdnn::naive::uint8_to_uint4(const TensorND& in, const TensorND& out) {
auto in_ptr = static_cast<uint8_t*>(in.raw_ptr) + in.layout.span().low_byte; auto in_ptr = static_cast<uint8_t*>(in.raw_ptr) + in.layout.span().low_byte;
auto out_ptr = auto out_ptr =
static_cast<uint8_t*>(out.raw_ptr) + out.layout.span().low_byte; static_cast<uint8_t*>(out.raw_ptr) + out.layout.span().low_byte;
for (size_t i = 0; i < out.layout.span().dist_elem(); i += 2) { const auto& ly = in.layout;
uint8_t a = in_ptr[i], b = in_ptr[i + 1]; auto dim_in = ly.shape[ly.ndim - 1];
a = std::min(a, DTypeTrait<dtype::Quantized4Asymm>::max()); auto elems = ly.total_nr_elems();
b = std::min(b, DTypeTrait<dtype::Quantized4Asymm>::max()); auto dim_out = elems / dim_in;
out_ptr[i / 2] = a + (b << 4); auto stride_out = div_ceil(dim_in, 2_z);
for (size_t i = 0; i < dim_out; ++i) {
for (size_t j = 0; j < dim_in; j += 2) {
uint8_t a = in_ptr[j];
uint8_t b = 0;
if (j + 1 < dim_in)
b = in_ptr[j + 1];
a = std::min(a, DTypeTrait<dtype::Quantized4Asymm>::max());
b = std::min(b, DTypeTrait<dtype::Quantized4Asymm>::max());
out_ptr[j / 2] = a + (b << 4);
}
in_ptr += dim_in;
out_ptr += stride_out;
} }
} }
...@@ -40,13 +62,21 @@ void megdnn::naive::int4_to_int8(const TensorND& in, const TensorND& out) { ...@@ -40,13 +62,21 @@ void megdnn::naive::int4_to_int8(const TensorND& in, const TensorND& out) {
auto in_ptr = static_cast<int8_t*>(in.raw_ptr) + in.layout.span().low_byte; auto in_ptr = static_cast<int8_t*>(in.raw_ptr) + in.layout.span().low_byte;
auto out_ptr = auto out_ptr =
static_cast<int8_t*>(out.raw_ptr) + out.layout.span().low_byte; static_cast<int8_t*>(out.raw_ptr) + out.layout.span().low_byte;
const auto& ly = in.layout;
megdnn_assert(in.layout.span().dist_elem() % 2 == 0); auto dim_in = ly.shape[ly.ndim - 1];
for (size_t i = 0; i < in.layout.span().dist_elem(); i += 2) { auto elems = ly.total_nr_elems();
int8_t cur = in_ptr[i / 2]; auto dim_out = elems / dim_in;
out_ptr[i] = cur << 4; auto stride_out = div_ceil(dim_in, 2_z);
out_ptr[i] = out_ptr[i] >> 4; for (size_t i = 0; i < dim_out; ++i) {
out_ptr[i + 1] = cur >> 4; for (size_t j = 0; j < dim_in; j += 2) {
int8_t cur = in_ptr[j / 2];
out_ptr[j] = cur << 4;
out_ptr[j] = out_ptr[j] >> 4;
if (j + 1 < dim_in)
out_ptr[j + 1] = cur >> 4;
}
in_ptr += stride_out;
out_ptr += dim_in;
} }
} }
...@@ -54,12 +84,24 @@ void megdnn::naive::int8_to_int4(const TensorND& in, const TensorND& out) { ...@@ -54,12 +84,24 @@ void megdnn::naive::int8_to_int4(const TensorND& in, const TensorND& out) {
auto in_ptr = static_cast<int8_t*>(in.raw_ptr) + in.layout.span().low_byte; auto in_ptr = static_cast<int8_t*>(in.raw_ptr) + in.layout.span().low_byte;
auto out_ptr = auto out_ptr =
static_cast<int8_t*>(out.raw_ptr) + out.layout.span().low_byte; static_cast<int8_t*>(out.raw_ptr) + out.layout.span().low_byte;
for (size_t i = 0; i < out.layout.span().dist_elem(); i += 2) { const auto& ly = in.layout;
int8_t a = in_ptr[i], b = in_ptr[i + 1]; auto dim_in = ly.shape[ly.ndim - 1];
a = std::min(a, DTypeTrait<dtype::QuantizedS4>::max()); auto elems = ly.total_nr_elems();
a = std::max(a, DTypeTrait<dtype::QuantizedS4>::min()); auto dim_out = elems / dim_in;
b = std::min(b, DTypeTrait<dtype::QuantizedS4>::max()); auto stride_out = div_ceil(dim_in, 2_z);
b = std::max(b, DTypeTrait<dtype::QuantizedS4>::min()); for (size_t i = 0; i < dim_out; ++i) {
out_ptr[i / 2] = (a & 0xF) | (b << 4); for (size_t j = 0; j < dim_in; j += 2) {
int8_t a = in_ptr[j];
int8_t b = 0;
if (j + 1 < dim_in)
b = in_ptr[j + 1];
a = std::min(a, DTypeTrait<dtype::QuantizedS4>::max());
a = std::max(a, DTypeTrait<dtype::QuantizedS4>::min());
b = std::min(b, DTypeTrait<dtype::QuantizedS4>::max());
b = std::max(b, DTypeTrait<dtype::QuantizedS4>::min());
out_ptr[j / 2] = (a & 0xF) | (b << 4);
}
in_ptr += dim_in;
out_ptr += stride_out;
} }
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册