#include "src/naive/resize/opr_impl.h" #include "midout.h" #include "src/common/cv/enums.h" #include "src/common/resize.cuh" #include "src/common/rounding_converter.cuh" #include "src/common/utils.cuh" #include "src/naive/handle.h" #include "src/naive/resize/resize_cv.h" MIDOUT_DECL(megdnn_naive_resize_layout) MIDOUT_DECL(megdnn_naive_resize_nchw) using namespace megdnn; using namespace naive; using namespace resize; template ResizeImpl::KernParam ResizeImpl::KernParam::from_tensors( Format format, InterpolationMode imode, _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { KernParam ret; ret.format = format; ret.imode = imode; ret.n = src.layout.shape[0]; if (format == Format::NCHW) { ret.c = src.layout.shape[1]; ret.ih = src.layout.shape[2]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[2]; ret.ow = dst.layout.shape[3]; ret.s_in = src.layout.stride[0]; ret.s_ic = src.layout.stride[1]; ret.s_ih = src.layout.stride[2]; ret.s_iw = src.layout.stride[3]; } else if (format == Format::NHWC) { ret.c = src.layout.shape[3]; ret.ih = src.layout.shape[1]; ret.iw = src.layout.shape[2]; ret.oh = dst.layout.shape[1]; ret.ow = dst.layout.shape[2]; } else if (format == Format::NCHW4) { ret.c = src.layout.shape[1] * 4; ret.ih = src.layout.shape[2]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[2]; ret.ow = dst.layout.shape[3]; } else if (format == Format::NCHW44) { ret.c = src.layout.shape[1] * 4; ret.ih = src.layout.shape[2]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[2]; ret.ow = dst.layout.shape[3]; } else if (format == Format::NCHW88) { ret.c = src.layout.shape[1] * 8; ret.ih = src.layout.shape[2]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[2]; ret.ow = dst.layout.shape[3]; } else { megdnn_assert(format == Format::NHWCD4); ret.c = src.layout.shape[2] * 4; ret.ih = src.layout.shape[1]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[1]; ret.ow = dst.layout.shape[3]; } if (src.layout.dtype.enumv() == DTypeEnum::Float32 || DNN_FLOAT16_SELECT(src.layout.dtype.enumv() == DTypeEnum::Float16, false) || src.layout.dtype.enumv() == DTypeEnum::Int8 || src.layout.dtype.enumv() == DTypeEnum::Uint8 || src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { ret.sptr = src.get_ref_ptr(); ret.dptr = dst.get_ref_ptr(); } else { megdnn_assert( 0, "current do not support dtype %s in resize", src.layout.dtype.name()); } ret.workspace = workspace; return ret; } #define INST(_dtype) template struct ResizeImpl::KernParam<_dtype>; INST(dt_float32); #ifndef MEGDNN_DISABLE_FLOAT16 INST(dt_float16); #endif INST(dt_int8); INST(dt_uint8); INST(dt_qint8); INST(dt_quint8); #undef INST template void ResizeImpl::kern_nchw( const KernParam& kern_param, InterpolationMode imode) { megdnn_assert(kern_param.format == Format::NCHW); UNPACK_RESIZE_FWD_KERN_PARAM_WITH_STRIDE(kern_param); float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; rounding::RoundingConverter output_converter; rep(n, N) { rep(oh, OH) rep(ow, OW) { switch (imode) { case InterpolationMode::NEAREST: { auto ih = get_nearest_src(scale_h, IH, oh); auto iw = get_nearest_src(scale_w, IW, ow); rep(c, static_cast(C)) { dptr[c * OH * OW + oh * OW + ow] = sptr[c * S_IC + ih * S_IH + iw * S_IW]; } break; } case InterpolationMode::INTER_LINEAR: { int ih0, ih1, iw0, iw1; float ah0, ah1, aw0, aw1; std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh); std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow); rep(c, static_cast(C)) { dptr[c * OH * OW + oh * OW + ow] = output_converter( sptr[c * S_IC + ih0 * S_IH + iw0 * S_IW] * ah0 * aw0 + sptr[c * S_IC + ih0 * S_IH + iw1 * S_IW] * ah0 * aw1 + sptr[c * S_IC + ih1 * S_IH + iw0 * S_IW] * ah1 * aw0 + sptr[c * S_IC + ih1 * S_IH + iw1 * S_IW] * ah1 * aw1); } break; } case InterpolationMode::INTER_CUBIC: { auto coord_h = get_cubic_coord(scale_h, oh); auto coord_w = get_cubic_coord(scale_w, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second - 1; int iw0 = coord_w.second - 1; float h_coeff[4], w_coeff[4]; interpolate_cubic(alphah, h_coeff); interpolate_cubic(alphaw, w_coeff); rep(c, static_cast(C)) { constexpr int ksize = 4; float ret = 0; rep(kh, ksize) { int h = saturate(ih0 + kh, 0, IH - 1); rep(kw, ksize) { int w = saturate(iw0 + kw, 0, IW - 1); ret += sptr[c * S_IC + h * S_IH + w * S_IW] * h_coeff[kh] * w_coeff[kw]; } } dptr[c * OH * OW + oh * OW + ow] = output_converter(ret); } break; } default: megdnn_throw("unsupported mode in ResizeBackwardImpl"); break; } } sptr += S_IN; dptr += C * OH * OW; } } template void ResizeImpl::kern_naive(const KernParam& kern_param) { if (kern_param.format == Format::NHWC) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(0)) { kern_naive_nhwc(kern_param); } MIDOUT_END(); return; } else if (kern_param.format == Format::NHWCD4) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(1)) { kern_naive_nhwcd4(kern_param); } MIDOUT_END(); return; } else if (kern_param.format == Format::NCHW4) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(2)) { kern_naive_nchwx(kern_param); } MIDOUT_END(); return; } else if (kern_param.format == Format::NCHW44) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(3)) { kern_naive_nchwx(kern_param); } MIDOUT_END(); return; } else if (kern_param.format == Format::NCHW88) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(4)) { kern_naive_nchwx(kern_param); } MIDOUT_END(); return; } } template void ResizeImpl::kern_naive_nhwc(const KernParam& kern_param) { UNPACK_RESIZE_FWD_KERN_PARAM(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; rep(n, N) { rep(oh, OH) rep(ow, OW) { int ih0, ih1, iw0, iw1; float ah0, ah1, aw0, aw1; std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh); std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow); rep(c, C) { dptr[(oh * OW + ow) * C + c] = output_converter( sptr[(ih0 * IW + iw0) * C + c] * ah0 * aw0 + sptr[(ih0 * IW + iw1) * C + c] * ah0 * aw1 + sptr[(ih1 * IW + iw0) * C + c] * ah1 * aw0 + sptr[(ih1 * IW + iw1) * C + c] * ah1 * aw1); } } sptr += C * IH * IW; dptr += C * OH * OW; } } template void ResizeImpl::kern_naive_nhwcd4(const KernParam& kern_param) { UNPACK_RESIZE_FWD_KERN_PARAM(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; auto get_tensor_addr = [&](size_t h, size_t w, size_t c, size_t W, size_t C) -> size_t { megdnn_assert((C & 0x3) == 0); size_t CBLK = (C >> 2); return (h * W * CBLK * 4 + (c >> 2) * W * 4 + w * 4 + (c & 0x3)); }; rep(n, N) { rep(oh, OH) rep(ow, OW) { int ih0, ih1, iw0, iw1; float ah0, ah1, aw0, aw1; std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh); std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow); rep(c, C) { dptr[get_tensor_addr(oh, ow, c, OW, C)] = output_converter( sptr[get_tensor_addr(ih0, iw0, c, IW, C)] * ah0 * aw0 + sptr[get_tensor_addr(ih0, iw1, c, IW, C)] * ah0 * aw1 + sptr[get_tensor_addr(ih1, iw0, c, IW, C)] * ah1 * aw0 + sptr[get_tensor_addr(ih1, iw1, c, IW, C)] * ah1 * aw1); } } sptr += IH * (C / 4) * IW * 4; dptr += OH * (C / 4) * OW * 4; } } template void ResizeImpl::kern_naive_nchwx(const KernParam& kern_param) { UNPACK_RESIZE_FWD_KERN_PARAM(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; megdnn_assert(pack_size == 4 || pack_size == 8); size_t log_pack_size = 2; if (pack_size == 8) { log_pack_size = 3; } auto get_tensor_addr = [&](size_t h, size_t w, size_t c, size_t H, size_t W, size_t C) -> size_t { megdnn_assert((C & (pack_size - 1)) == 0); return (((c >> log_pack_size) * H * W + h * W + w) << log_pack_size) + (c & (pack_size - 1)); }; rep(n, N) { rep(oh, OH) rep(ow, OW) { int ih0, ih1, iw0, iw1; float ah0, ah1, aw0, aw1; std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh); std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow); rep(c, C) { dptr[get_tensor_addr(oh, ow, c, OH, OW, C)] = output_converter( sptr[get_tensor_addr(ih0, iw0, c, IH, IW, C)] * ah0 * aw0 + sptr[get_tensor_addr(ih0, iw1, c, IH, IW, C)] * ah0 * aw1 + sptr[get_tensor_addr(ih1, iw0, c, IH, IW, C)] * ah1 * aw0 + sptr[get_tensor_addr(ih1, iw1, c, IH, IW, C)] * ah1 * aw1); } } sptr += IH * IW * C; dptr += OH * OW * C; } } void ResizeImpl::exec( _megdnn_tensor_in src, _megdnn_tensor_in dst, _megdnn_workspace workspace) { check_exec(src.layout, dst.layout, workspace.size); if (param().format == param::Resize::Format::NCHW) { #define cb(dt, ct, _midout_iv) \ case DTypeTrait
::enumv: { \ MIDOUT_BEGIN(megdnn_naive_resize_nchw, midout_iv(_midout_iv)) { \ auto kparam = KernParam::from_tensors( \ param().format, param().imode, src, dst, workspace); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_nchw(kparam, param().imode)); \ } \ MIDOUT_END(); \ return; \ } switch (src.layout.dtype.enumv()) { cb(dtype::Float32, float, 0); DNN_INC_FLOAT16(cb(dtype::Float16, dt_float16, 1)); cb(dtype::Int8, int8_t, 2); cb(dtype::QuantizedS8, int8_t, 3); cb(dtype::Uint8, uint8_t, 4); cb(dtype::Quantized8Asymm, uint8_t, 5); default: megdnn_throw(ssprintf( "Unsupported input DType in Resize " "NEAREST mode: %s", src.layout.dtype.name()) .c_str()); return; } #undef cb } if (((src.layout[3] != 1 && src.layout[3] != 3) || !is_nhwc_contig_wc(src.layout)) || (param().imode == param::Resize::InterpolationMode::LINEAR)) { #define cb(dt, ct, _midout_iv) \ case DTypeTrait
::enumv: { \ MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(_midout_iv)) { \ auto kparam = KernParam::from_tensors( \ param().format, param().imode, src, dst, workspace); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ } \ MIDOUT_END(); \ return; \ } switch (src.layout.dtype.enumv()) { cb(dtype::Float32, float, 0); DNN_INC_FLOAT16(cb(dtype::Float16, dt_float16, 1)); cb(dtype::Int8, int8_t, 2); cb(dtype::QuantizedS8, int8_t, 3); cb(dtype::Uint8, uint8_t, 4); cb(dtype::Quantized8Asymm, uint8_t, 5); default: megdnn_throw(ssprintf( "Unsupported input DType in Resize: %s", src.layout.dtype.name()) .c_str()); return; } #undef cb } else { megdnn_assert( param().format == param::Resize::Format::NHWC, "invalid resize format"); MEGDNN_DISPATCH_CPU_KERN_OPR(resize_cv_exec(src, dst, param().imode)); } } void ResizeBackwardImpl::exec( _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) { check_exec(diff.layout, grad.layout, workspace.size); megdnn_assert( param().format == param::Resize::Format::NCHW, "invalid resize format"); const int N = grad.layout.shape[0], C = grad.layout.shape[1], IH = grad.layout.shape[2], IW = grad.layout.shape[3]; const int OH = diff.layout.shape[2], OW = diff.layout.shape[3]; const float* hptr_ = diff.ptr(); float* sptr_ = grad.ptr(); float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; auto kern = [=]() { auto hptr = hptr_; auto sptr = sptr_; std::memset(sptr, 0, sizeof(float) * N * C * IH * IW); rep(n, N) { rep(oh, OH) rep(ow, OW) { switch (param().imode) { case InterpolationMode::INTER_LINEAR: { int ih0, ih1, iw0, iw1; float ah0, ah1, aw0, aw1; std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord( param().imode, scale_h, IH, oh); std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord( param().imode, scale_w, IW, ow); rep(c, C) { float hidden = hptr[c * OH * OW + oh * OW + ow]; sptr[c * IH * IW + ih0 * IW + iw0] += ah0 * aw0 * hidden; sptr[c * IH * IW + ih1 * IW + iw0] += ah1 * aw0 * hidden; sptr[c * IH * IW + ih0 * IW + iw1] += ah0 * aw1 * hidden; sptr[c * IH * IW + ih1 * IW + iw1] += ah1 * aw1 * hidden; } break; } case InterpolationMode::NEAREST: { auto ih = get_nearest_src(scale_h, IH, oh); auto iw = get_nearest_src(scale_w, IW, ow); rep(c, static_cast(C)) { sptr[c * IH * IW + ih * IW + iw] += hptr[c * OH * OW + oh * OW + ow]; } break; } case InterpolationMode::INTER_CUBIC: { auto coord_h = get_cubic_coord(scale_h, oh); auto coord_w = get_cubic_coord(scale_w, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second - 1; int iw0 = coord_w.second - 1; float h_coeff[4], w_coeff[4]; interpolate_cubic(alphah, h_coeff); interpolate_cubic(alphaw, w_coeff); rep(c, static_cast(C)) { constexpr int ksize = 4; rep(kh, ksize) { int h = saturate(ih0 + kh, 0, IH - 1); rep(kw, ksize) { int w = saturate(iw0 + kw, 0, IW - 1); sptr[c * IH * IW + h * IW + w] += hptr[c * OH * OW + oh * OW + ow] * h_coeff[kh] * w_coeff[kw]; } } } break; } default: { megdnn_throw("unsupported mode in ResizeBackwardImpl"); break; } } } sptr += C * IH * IW; hptr += C * OH * OW; } }; MEGDNN_DISPATCH_CPU_KERN_OPR(kern()); } // vim: syntax=cpp.doxygen