From 2a2a7f45522bb631c2cad88f08689433bac62934 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 30 Mar 2021 10:27:47 +0800 Subject: [PATCH] test(mgb/opr): add testcase for conv bias int4 GitOrigin-RevId: e3fff5e30b8be8398bfc2a96ea3753624a8e7161 --- dnn/src/fallback/conv_bias/algos.cpp | 4 +- dnn/src/fallback/conv_bias/opr_impl.cpp | 3 +- dnn/src/naive/conv_bias/opr_impl.cpp | 2 + dnn/src/naive/lowbit_utils.cpp | 88 ++++++++++++++++++------- 4 files changed, 72 insertions(+), 25 deletions(-) diff --git a/dnn/src/fallback/conv_bias/algos.cpp b/dnn/src/fallback/conv_bias/algos.cpp index 7ee22937e..31c8254ba 100644 --- a/dnn/src/fallback/conv_bias/algos.cpp +++ b/dnn/src/fallback/conv_bias/algos.cpp @@ -187,7 +187,9 @@ bool ConvBiasImpl::AlgoNaive::usable( const NCBKernSizeParam& param, AlgoSelectionStrategy /*algo_selection_strategy*/) const { MIDOUT_BEGIN(megdnn_fallback_naive, 0) { - return param.filter_meta.format == param::ConvBias::Format::NCHW; + auto algo_data_type = param.deduce_algo_data_type(); + return param.filter_meta.format == param::ConvBias::Format::NCHW && + contain_data_type(get_algo_type().data_type, algo_data_type); } MIDOUT_END(); return false; diff --git a/dnn/src/fallback/conv_bias/opr_impl.cpp b/dnn/src/fallback/conv_bias/opr_impl.cpp index 290b23b1b..b60a2ac0f 100644 --- a/dnn/src/fallback/conv_bias/opr_impl.cpp +++ b/dnn/src/fallback/conv_bias/opr_impl.cpp @@ -342,7 +342,8 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param( param().format == Param::Format::NCHW4 || param().format == Param::Format::NCHW44 || param().format == Param::Format::NCHW44_DOT || - param().format == Param::Format::NCHW) { + param().format == Param::Format::NCHW || + param().format == Param::Format::NCHW64) { spatial_pos = 2; } else if (param().format == Param::Format::NHWC) { spatial_pos = 1; diff --git a/dnn/src/naive/conv_bias/opr_impl.cpp b/dnn/src/naive/conv_bias/opr_impl.cpp index d25e7e889..227353dec 100644 --- a/dnn/src/naive/conv_bias/opr_impl.cpp +++ b/dnn/src/naive/conv_bias/opr_impl.cpp @@ -123,6 +123,7 @@ void forward_bias( auto param = layout.dtype.param(); ret.dtype = dtype::Quantized8Asymm(param.scale, param.zero_point); ret.format = TensorFormat(ret.dtype); + ret.init_contiguous_stride(); return ret; }; TensorND new_src = {workspace_ptr, convert_layout(src.layout)}; @@ -147,6 +148,7 @@ void forward_bias( auto param = layout.dtype.param(); ret.dtype = dtype::QuantizedS8(param.scale); ret.format = TensorFormat(ret.dtype); + ret.init_contiguous_stride(); return ret; }; TensorND new_src = {workspace_ptr, convert_layout(src.layout)}; diff --git a/dnn/src/naive/lowbit_utils.cpp b/dnn/src/naive/lowbit_utils.cpp index 552bd7086..aab9dbbaa 100644 --- a/dnn/src/naive/lowbit_utils.cpp +++ b/dnn/src/naive/lowbit_utils.cpp @@ -16,10 +16,20 @@ void megdnn::naive::uint4_to_uint8(const TensorND& in, const TensorND& out) { auto in_ptr = static_cast(in.raw_ptr) + in.layout.span().low_byte; auto out_ptr = out.compatible_ptr() + out.layout.span().low_byte; - for (size_t i = 0; i < in.layout.span().dist_elem(); i += 2) { - uint8_t val = in_ptr[i / 2]; - out_ptr[i] = val & 0xF; - out_ptr[i + 1] = (val >> 4) & 0xF; + const auto& ly = in.layout; + auto dim_in = ly.shape[ly.ndim - 1]; + auto elems = ly.total_nr_elems(); + auto dim_out = elems / dim_in; + auto stride_out = div_ceil(dim_in, 2_z); + for (size_t i = 0; i < dim_out; ++i) { + for (size_t j = 0; j < dim_in; j += 2) { + uint8_t val = in_ptr[j / 2]; + out_ptr[j] = val & 0xF; + if (j + 1 < dim_in) + out_ptr[j + 1] = (val >> 4) & 0xF; + } + in_ptr += stride_out; + out_ptr += dim_in; } } @@ -27,11 +37,23 @@ void megdnn::naive::uint8_to_uint4(const TensorND& in, const TensorND& out) { auto in_ptr = static_cast(in.raw_ptr) + in.layout.span().low_byte; auto out_ptr = static_cast(out.raw_ptr) + out.layout.span().low_byte; - for (size_t i = 0; i < out.layout.span().dist_elem(); i += 2) { - uint8_t a = in_ptr[i], b = in_ptr[i + 1]; - a = std::min(a, DTypeTrait::max()); - b = std::min(b, DTypeTrait::max()); - out_ptr[i / 2] = a + (b << 4); + const auto& ly = in.layout; + auto dim_in = ly.shape[ly.ndim - 1]; + auto elems = ly.total_nr_elems(); + auto dim_out = elems / dim_in; + auto stride_out = div_ceil(dim_in, 2_z); + for (size_t i = 0; i < dim_out; ++i) { + for (size_t j = 0; j < dim_in; j += 2) { + uint8_t a = in_ptr[j]; + uint8_t b = 0; + if (j + 1 < dim_in) + b = in_ptr[j + 1]; + a = std::min(a, DTypeTrait::max()); + b = std::min(b, DTypeTrait::max()); + out_ptr[j / 2] = a + (b << 4); + } + in_ptr += dim_in; + out_ptr += stride_out; } } @@ -40,13 +62,21 @@ void megdnn::naive::int4_to_int8(const TensorND& in, const TensorND& out) { auto in_ptr = static_cast(in.raw_ptr) + in.layout.span().low_byte; auto out_ptr = static_cast(out.raw_ptr) + out.layout.span().low_byte; - - megdnn_assert(in.layout.span().dist_elem() % 2 == 0); - for (size_t i = 0; i < in.layout.span().dist_elem(); i += 2) { - int8_t cur = in_ptr[i / 2]; - out_ptr[i] = cur << 4; - out_ptr[i] = out_ptr[i] >> 4; - out_ptr[i + 1] = cur >> 4; + const auto& ly = in.layout; + auto dim_in = ly.shape[ly.ndim - 1]; + auto elems = ly.total_nr_elems(); + auto dim_out = elems / dim_in; + auto stride_out = div_ceil(dim_in, 2_z); + for (size_t i = 0; i < dim_out; ++i) { + for (size_t j = 0; j < dim_in; j += 2) { + int8_t cur = in_ptr[j / 2]; + out_ptr[j] = cur << 4; + out_ptr[j] = out_ptr[j] >> 4; + if (j + 1 < dim_in) + out_ptr[j + 1] = cur >> 4; + } + in_ptr += stride_out; + out_ptr += dim_in; } } @@ -54,12 +84,24 @@ void megdnn::naive::int8_to_int4(const TensorND& in, const TensorND& out) { auto in_ptr = static_cast(in.raw_ptr) + in.layout.span().low_byte; auto out_ptr = static_cast(out.raw_ptr) + out.layout.span().low_byte; - for (size_t i = 0; i < out.layout.span().dist_elem(); i += 2) { - int8_t a = in_ptr[i], b = in_ptr[i + 1]; - a = std::min(a, DTypeTrait::max()); - a = std::max(a, DTypeTrait::min()); - b = std::min(b, DTypeTrait::max()); - b = std::max(b, DTypeTrait::min()); - out_ptr[i / 2] = (a & 0xF) | (b << 4); + const auto& ly = in.layout; + auto dim_in = ly.shape[ly.ndim - 1]; + auto elems = ly.total_nr_elems(); + auto dim_out = elems / dim_in; + auto stride_out = div_ceil(dim_in, 2_z); + for (size_t i = 0; i < dim_out; ++i) { + for (size_t j = 0; j < dim_in; j += 2) { + int8_t a = in_ptr[j]; + int8_t b = 0; + if (j + 1 < dim_in) + b = in_ptr[j + 1]; + a = std::min(a, DTypeTrait::max()); + a = std::max(a, DTypeTrait::min()); + b = std::min(b, DTypeTrait::max()); + b = std::max(b, DTypeTrait::min()); + out_ptr[j / 2] = (a & 0xF) | (b << 4); + } + in_ptr += dim_in; + out_ptr += stride_out; } } -- GitLab