/** * \file dnn/src/naive/resize/opr_impl.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "src/common/rounding_converter.cuh" #include "src/naive/handle.h" #include "src/naive/resize/opr_impl.h" #include "src/naive/resize/resize_cv.h" #include "midout.h" MIDOUT_DECL(megdnn_naive_resize_layout) using namespace megdnn; using namespace naive; template ResizeImpl::KernParam ResizeImpl::KernParam::from_tensors( Format format, _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { KernParam ret; ret.format = format; ret.n = src.layout.shape[0]; if (format == Format::NCHW) { ret.c = src.layout.shape[1]; ret.ih = src.layout.shape[2]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[2]; ret.ow = dst.layout.shape[3]; ret.s_in = src.layout.stride[0]; ret.s_ic = src.layout.stride[1]; ret.s_ih = src.layout.stride[2]; ret.s_iw = src.layout.stride[3]; } else if (format == Format::NHWC) { ret.c = src.layout.shape[3]; ret.ih = src.layout.shape[1]; ret.iw = src.layout.shape[2]; ret.oh = dst.layout.shape[1]; ret.ow = dst.layout.shape[2]; } else if (format == Format::NCHW4) { ret.c = src.layout.shape[1] * 4; ret.ih = src.layout.shape[2]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[2]; ret.ow = dst.layout.shape[3]; } else { megdnn_assert(format == Format::NHWCD4); ret.c = src.layout.shape[2] * 4; ret.ih = src.layout.shape[1]; ret.iw = src.layout.shape[3]; ret.oh = dst.layout.shape[1]; ret.ow = dst.layout.shape[3]; } if (src.layout.dtype.enumv() == DTypeEnum::Float32 || MEGDNN_FLOAT16_SELECT(src.layout.dtype.enumv() == DTypeEnum::Float16, false) || src.layout.dtype.enumv() == DTypeEnum::Int8 || src.layout.dtype.enumv() == DTypeEnum::Uint8 || src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { ret.sptr = src.compatible_ptr(); ret.dptr = dst.compatible_ptr(); } else { megdnn_assert(0, "current do not support dtype %s in resize", src.layout.dtype.name()); } ret.workspace = workspace; return ret; } #define INST(_dtype) template struct ResizeImpl::KernParam<_dtype>; INST(dt_float32); #ifndef MEGDNN_DISABLE_FLOAT16 INST(dt_float16); #endif INST(dt_int8); INST(dt_uint8); INST(dt_qint8); INST(dt_quint8); #undef INST template void ResizeImpl::kern_naive(const KernParam& kern_param) { if (kern_param.format == Format::NHWC) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(0)) { kern_naive_nhwc(kern_param); } MIDOUT_END(); return; } else if (kern_param.format == Format::NHWCD4) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(1)) { kern_naive_nhwcd4(kern_param); } MIDOUT_END(); return; } else if (kern_param.format == Format::NCHW4) { MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(2)) { kern_naive_nchw4(kern_param); } MIDOUT_END(); return; } megdnn_assert(kern_param.format == Format::NCHW); UNPACK_RESIZE_FWD_KERN_PARAM_WITH_STRIDE(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; rep(n, N) { rep(oh, OH) rep(ow, OW) { auto coord_h = get_origin_coord(scale_h, IH, oh); auto coord_w = get_origin_coord(scale_w, IW, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second; int ih1 = ih0 + 1; int iw0 = coord_w.second; int iw1 = iw0 + 1; rep(c, static_cast(C)) { dptr[c * OH * OW + oh * OW + ow] = output_converter( sptr[c * S_IC + ih0 * S_IH + iw0 * S_IW] * (1.0f - alphaw) * (1.0f - alphah) + sptr[c * S_IC + ih0 * S_IH + iw1 * S_IW] * alphaw * (1.0f - alphah) + sptr[c * S_IC + ih1 * S_IH + iw0 * S_IW] * (1.0f - alphaw) * alphah + sptr[c * S_IC + ih1 * S_IH + iw1 * S_IW] * alphaw * alphah); } } sptr += S_IN; dptr += C * OH * OW; } } template void ResizeImpl::kern_naive_nhwc(const KernParam& kern_param) { UNPACK_RESIZE_FWD_KERN_PARAM(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; rep(n, N) { rep(oh, OH) rep(ow, OW) { auto coord_h = get_origin_coord(scale_h, IH, oh); auto coord_w = get_origin_coord(scale_w, IW, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second; int ih1 = ih0 + 1; int iw0 = coord_w.second; int iw1 = iw0 + 1; rep(c, C) { dptr[(oh * OW + ow) * C + c] = output_converter( sptr[(ih0 * IW + iw0) * C + c] * (1.0f - alphaw) * (1.0f - alphah) + sptr[(ih0 * IW + iw1) * C + c] * alphaw * (1.0f - alphah) + sptr[(ih1 * IW + iw0) * C + c] * (1.0f - alphaw) * alphah + sptr[(ih1 * IW + iw1) * C + c] * alphaw * alphah); } } sptr += C * IH * IW; dptr += C * OH * OW; } } template void ResizeImpl::kern_naive_nhwcd4(const KernParam& kern_param) { UNPACK_RESIZE_FWD_KERN_PARAM(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; auto get_tensor_addr = [&](size_t h, size_t w, size_t c, size_t W, size_t C) -> size_t { megdnn_assert((C & 0x3) == 0); size_t CBLK = (C >> 2); return (h * W * CBLK * 4 + (c >> 2) * W * 4 + w * 4 + (c & 0x3)); }; rep(n, N) { rep(oh, OH) rep(ow, OW) { auto coord_h = get_origin_coord(scale_h, IH, oh); auto coord_w = get_origin_coord(scale_w, IW, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second; int ih1 = ih0 + 1; int iw0 = coord_w.second; int iw1 = iw0 + 1; rep(c, C) { dptr[get_tensor_addr(oh, ow, c, OW, C)] = output_converter( sptr[get_tensor_addr(ih0, iw0, c, IW, C)] * (1.0f - alphaw) * (1.0f - alphah) + sptr[get_tensor_addr(ih0, iw1, c, IW, C)] * alphaw * (1.0f - alphah) + sptr[get_tensor_addr(ih1, iw0, c, IW, C)] * (1.0f - alphaw) * alphah + sptr[get_tensor_addr(ih1, iw1, c, IW, C)] * alphaw * alphah); } } sptr += IH * (C / 4) * IW * 4; dptr += OH * (C / 4) * OW * 4; } } template void ResizeImpl::kern_naive_nchw4(const KernParam& kern_param) { UNPACK_RESIZE_FWD_KERN_PARAM(kern_param); rounding::RoundingConverter output_converter; float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; auto get_tensor_addr = [&](size_t h, size_t w, size_t c, size_t H, size_t W, size_t C) -> size_t { megdnn_assert((C & 0x3) == 0); return (((c >> 2) * H * W + h * W + w) << 2) + (c & 0b11); }; rep(n, N) { rep(oh, OH) rep(ow, OW) { auto coord_h = get_origin_coord(scale_h, IH, oh); auto coord_w = get_origin_coord(scale_w, IW, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second; int ih1 = ih0 + 1; int iw0 = coord_w.second; int iw1 = iw0 + 1; rep(c, C) { dptr[get_tensor_addr(oh, ow, c, OH, OW, C)] = output_converter( sptr[get_tensor_addr(ih0, iw0, c, IH, IW, C)] * (1.0f - alphaw) * (1.0f - alphah) + sptr[get_tensor_addr(ih0, iw1, c, IH, IW, C)] * alphaw * (1.0f - alphah) + sptr[get_tensor_addr(ih1, iw0, c, IH, IW, C)] * (1.0f - alphaw) * alphah + sptr[get_tensor_addr(ih1, iw1, c, IH, IW, C)] * alphaw * alphah); } } sptr += IH * IW * C; dptr += OH * OW * C; } } void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst, _megdnn_workspace workspace) { check_exec(src.layout, dst.layout, workspace.size); if ((param().format == param::Resize::Format::NCHW || (src.layout[3] != 1 && src.layout[3] != 3) || !is_nhwc_contig_wc(src.layout)) || (param().imode == param::Resize::InterpolationMode::LINEAR)) { #define cb(dt, ct, _midout_iv) \ case DTypeTrait
::enumv: { \ MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(_midout_iv)) { \ auto kparam = KernParam::from_tensors(param().format, src, \ dst, workspace); \ MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ } \ MIDOUT_END(); \ return; \ } switch (src.layout.dtype.enumv()) { cb(dtype::Float32, float, 0); MEGDNN_INC_FLOAT16(cb(dtype::Float16, dt_float16, 1)); cb(dtype::Int8, int8_t, 2); cb(dtype::QuantizedS8, int8_t, 3); cb(dtype::Uint8, uint8_t, 4); cb(dtype::Quantized8Asymm, uint8_t, 5); default: megdnn_throw(ssprintf("Unsupported input DType in Resize: %s", src.layout.dtype.name()) .c_str()); return; } #undef cb } else { megdnn_assert(param().format == param::Resize::Format::NHWC, "invalid resize format"); MEGDNN_DISPATCH_CPU_KERN_OPR(resize_cv_exec(src, dst, param().imode)); } } void ResizeBackwardImpl::exec(_megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) { check_exec(diff.layout, grad.layout, workspace.size); megdnn_assert(param().format == param::WarpPerspective::Format::NCHW, "invalid warp_perspective format"); const int N = grad.layout.shape[0], C = grad.layout.shape[1], IH = grad.layout.shape[2], IW = grad.layout.shape[3]; const int OH = diff.layout.shape[2], OW = diff.layout.shape[3]; const float* hptr_ = diff.ptr(); float* sptr_ = grad.ptr(); float scale_h = static_cast(OH) / IH; float scale_w = static_cast(OW) / IW; auto kern = [=]() { auto hptr = hptr_; auto sptr = sptr_; std::memset(sptr, 0, sizeof(float) * N * C * IH * IW); rep(n, N) { rep(oh, OH) rep(ow, OW) { auto coord_h = get_origin_coord(scale_h, IH, oh); auto coord_w = get_origin_coord(scale_w, IW, ow); float alphah = coord_h.first; float alphaw = coord_w.first; int ih0 = coord_h.second; int ih1 = ih0 + 1; int iw0 = coord_w.second; int iw1 = iw0 + 1; rep(c, C) { float hidden = hptr[c * OH * OW + oh * OW + ow]; sptr[c * IH * IW + ih0 * IW + iw0] += (1.0f - alphaw) * (1.0f - alphah) * hidden; sptr[c * IH * IW + ih1 * IW + iw0] += (1.0f - alphaw) * alphah * hidden; sptr[c * IH * IW + ih0 * IW + iw1] += alphaw * (1.0f - alphah) * hidden; sptr[c * IH * IW + ih1 * IW + iw1] += alphaw * alphah * hidden; } } sptr += C * IH * IW; hptr += C * OH * OW; } }; MEGDNN_DISPATCH_CPU_KERN_OPR(kern()); } // vim: syntax=cpp.doxygen