提交 bc9cfc27 编写于 作者: M Megvii Engine Team

feat(mgb): add arm resize nchwxx and naive nearest interp

GitOrigin-RevId: d5fbd59a3021ac82527b1f093d97a828b3d725a4
上级 589b427e
......@@ -197,7 +197,11 @@ public:
protected:
//! get origin coord
std::pair<float, int> get_origin_coord(float scale, int size, int idx, bool cubic=false);
std::pair<float, int> get_cubic_coord(float scale, int idx);
std::tuple<float, int, float, int> get_nearest_linear_coord(
InterpolationMode imode, float scale, int size, int idx);
//! get nearest index in src
int get_nearest_src(float scale, int size, int idx);
......
......@@ -6,12 +6,14 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/resize/opr_impl.h"
#include "src/arm_common/handle.h"
#include "src/arm_common/resize/resize_cv.h"
#include "src/arm_common/simd_macro/marm_neon.h"
using namespace megdnn;
using namespace arm_common;
......@@ -19,9 +21,58 @@ using namespace arm_common;
void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_workspace workspace) {
check_exec(src.layout, dst.layout, workspace.size);
if (param().format == param::Resize::Format::NCHW ||
(src.layout[3] != 1 && src.layout[3] != 3) ||
!is_nhwc_contig_wc(src.layout)) {
if (param().format == param::Resize::Format::NCHW44 ||
param().format == param::Resize::Format::NCHW88) {
bool is_contiguous =
src.layout.is_contiguous() && dst.layout.is_contiguous();
bool dtype_same = src.layout.dtype == dst.layout.dtype;
bool nchw44_enable = param().format == param::Resize::Format::NCHW44 &&
src.layout.dtype == dtype::Float32();
bool nchw88_enable =
param().format == param::Resize::Format::NCHW88 &&
DNN_FLOAT16_SELECT(src.layout.dtype == dtype::Float16(), false);
bool interp_supported =
param().imode ==
param::Resize::InterpolationMode::INTER_NEAREST ||
param().imode == param::Resize::InterpolationMode::INTER_LINEAR;
bool is_upsample2 =
param().imode ==
param::Resize::InterpolationMode::INTER_NEAREST &&
src.layout.shape[2] * 2 == dst.layout.shape[2] &&
src.layout.shape[3] * 2 == dst.layout.shape[3];
bool need_fallback = !is_contiguous || !dtype_same ||
!interp_supported ||
(!nchw44_enable && !nchw88_enable);
if (need_fallback) {
fallback::ResizeImpl::exec(src, dst, workspace);
} else if (nchw44_enable) {
auto kern_param = KernParam<float>::from_tensors(
param().format, param().imode, src, dst, workspace);
if (is_upsample2) {
MEGDNN_DISPATCH_CPU_KERN_OPR(
kern_nearest_upsample2_pack_simd_width(src, dst));
} else {
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_nchw44_fp32(kern_param));
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
} else if (nchw88_enable) {
auto kern_param = KernParam<dt_float16>::from_tensors(
param().format, param().imode, src, dst, workspace);
if (is_upsample2) {
MEGDNN_DISPATCH_CPU_KERN_OPR(
kern_nearest_upsample2_pack_simd_width(src, dst));
} else {
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_nchw88_fp16(kern_param));
}
#endif
} else {
fallback::ResizeImpl::exec(src, dst, workspace);
}
} else if (param().format == param::Resize::Format::NCHW ||
(src.layout[3] != 1 && src.layout[3] != 3) ||
!is_nhwc_contig_wc(src.layout)) {
fallback::ResizeImpl::exec(src, dst, workspace);
} else {
megdnn_assert(param().format == param::Resize::Format::NHWC,
......@@ -30,4 +81,143 @@ void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
}
}
template <typename ctype>
void ResizeImpl::kern_nchw44_fp32(const KernParam<ctype>& kern_param) {
UNPACK_RESIZE_FWD_KERN_PARAM(kern_param);
float scale_h = static_cast<float>(OH) / IH;
float scale_w = static_cast<float>(OW) / IW;
for (size_t n = 0; n < N; ++n) {
for (size_t c = 0; c < C / 4; ++c) {
for (size_t oh = 0; oh < OH; ++oh) {
for (size_t ow = 0; ow < OW; ++ow) {
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(
kern_param.imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(
kern_param.imode, scale_w, IW, ow);
#define SRC_ADDRESS(ih, iw) \
(sptr + n * C * IH * IW + (c * IH * IW + ih * IW + iw) * 4)
#define DST_ADDRESS(oh, ow) \
(dptr + n * C * OH * OW + (c * OH * OW + oh * OW + ow) * 4)
float32x4_t r0 = vld1q_f32(SRC_ADDRESS(ih0, iw0));
float32_t a0 = ah0 * aw0;
float32x4_t r1 = vld1q_f32(SRC_ADDRESS(ih0, iw1));
float32_t a1 = ah0 * aw1;
float32x4_t r2 = vld1q_f32(SRC_ADDRESS(ih1, iw0));
float32_t a2 = ah1 * aw0;
float32x4_t r3 = vld1q_f32(SRC_ADDRESS(ih1, iw1));
float32_t a3 = ah1 * aw1;
r0 = vmulq_n_f32(r0, a0);
#if defined(__ARM_FEATURE_FMA) && defined(__aarch64__)
r0 = vfmaq_n_f32(r0, r1, a1);
r0 = vfmaq_n_f32(r0, r2, a2);
r0 = vfmaq_n_f32(r0, r3, a3);
#else
r0 = vaddq_f32(r0, vmulq_n_f32(r1, a1));
r0 = vaddq_f32(r0, vmulq_n_f32(r2, a2));
r0 = vaddq_f32(r0, vmulq_n_f32(r3, a3));
#endif
vst1q_f32(DST_ADDRESS(oh, ow), r0);
#undef SRC_ADDRESS
#undef DST_ADDRESS
}
}
}
}
}
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template <typename ctype>
void ResizeImpl::kern_nchw88_fp16(const KernParam<ctype>& kern_param) {
UNPACK_RESIZE_FWD_KERN_PARAM(kern_param);
float scale_h = static_cast<float>(OH) / IH;
float scale_w = static_cast<float>(OW) / IW;
const float16_t* src_ptr = reinterpret_cast<float16_t*>(sptr);
float16_t* dst_ptr = reinterpret_cast<float16_t*>(dptr);
for (size_t n = 0; n < N; ++n) {
for (size_t c = 0; c < C / 8; ++c) {
for (size_t oh = 0; oh < OH; ++oh) {
for (size_t ow = 0; ow < OW; ++ow) {
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(
kern_param.imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(
kern_param.imode, scale_w, IW, ow);
#define SRC_ADDRESS(ih, iw) \
(src_ptr + n * C * IH * IW + (c * IH * IW + ih * IW + iw) * 8)
#define DST_ADDRESS(oh, ow) \
(dst_ptr + n * C * OH * OW + (c * OH * OW + oh * OW + ow) * 8)
float16x8_t r0 = vld1q_f16(SRC_ADDRESS(ih0, iw0));
float32_t a0 = ah0 * aw0;
float16x8_t r1 = vld1q_f16(SRC_ADDRESS(ih0, iw1));
float32_t a1 = ah0 * aw1;
float16x8_t r2 = vld1q_f16(SRC_ADDRESS(ih1, iw0));
float32_t a2 = ah1 * aw0;
float16x8_t r3 = vld1q_f16(SRC_ADDRESS(ih1, iw1));
float32_t a3 = ah1 * aw1;
r0 = vmulq_n_f16(r0, a0);
#if defined(__ARM_FEATURE_FMA) && defined(__aarch64__)
r0 = vfmaq_n_f16(r0, r1, a1);
r0 = vfmaq_n_f16(r0, r2, a2);
r0 = vfmaq_n_f16(r0, r3, a3);
#else
r0 = vaddq_f16(r0, vmulq_n_f16(r1, a1));
r0 = vaddq_f16(r0, vmulq_n_f16(r2, a2));
r0 = vaddq_f16(r0, vmulq_n_f16(r3, a3));
#endif
vst1q_f16(DST_ADDRESS(oh, ow), r0);
#undef SRC_ADDRESS
#undef DST_ADDRESS
}
}
}
}
}
#endif
void ResizeImpl::kern_nearest_upsample2_pack_simd_width(
_megdnn_tensor_in src, _megdnn_tensor_out dst) {
const uint8_t* src_ptr = reinterpret_cast<uint8_t*>(src.raw_ptr);
uint8_t* dst_ptr = reinterpret_cast<uint8_t*>(dst.raw_ptr);
size_t S = 2;
size_t N = src.layout.shape[0];
size_t IC = src.layout.shape[1];
size_t IH = src.layout.shape[2];
size_t IW = src.layout.shape[3];
size_t OH = dst.layout.shape[2];
size_t OW = dst.layout.shape[3];
for (size_t i = 0; i < N * IC; ++i) {
for (size_t ih = 0; ih < IH; ++ih) {
for (size_t iw = 0; iw < IW; ++iw) {
size_t oh = ih * S;
size_t ow = iw * S;
uint8x16_t r0 = vld1q_u8(src_ptr + i * IH * IW * 16 +
ih * IW * 16 + iw * 16);
for (size_t fh = 0; fh < S; ++fh) {
for (size_t fw = 0; fw < S; ++fw) {
vst1q_u8(dst_ptr + i * OH * OW * 16 +
(oh + fh) * OW * 16 + (ow + fw) * 16,
r0);
}
}
}
}
}
}
// vim: syntax=cpp.doxygen
......@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
......@@ -25,6 +26,16 @@ public:
const TensorLayout&) override {
return 0;
}
private:
template <typename ctype>
void kern_nchw44_fp32(const KernParam<ctype>& kern_param);
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
template <typename ctype>
void kern_nchw88_fp16(const KernParam<ctype>& kern_param);
#endif
void kern_nearest_upsample2_pack_simd_width(_megdnn_tensor_in src,
_megdnn_tensor_out dst);
};
} // namespace arm_common
......
......@@ -40,11 +40,29 @@ void ResizeBase::check_layout_fwd(const TensorLayout& src,
megdnn_assert(src.dtype.enumv() == DTypeEnum::QuantizedS8);
megdnn_assert(src.shape[4] == 4);
megdnn_assert(dst.shape[4] == 4);
} else if (param().format == Param::Format::NCHW44) {
megdnn_assert(src.ndim == 5);
megdnn_assert(src.shape[4] == 4);
megdnn_assert(dst.shape[4] == 4);
megdnn_assert(param().imode ==
param::Resize::InterpolationMode::INTER_LINEAR ||
param().imode ==
param::Resize::InterpolationMode::INTER_NEAREST);
} else if (param().format == Param::Format::NCHW88) {
megdnn_assert(src.ndim == 5);
megdnn_assert(src.shape[4] == 8);
megdnn_assert(dst.shape[4] == 8);
megdnn_assert(param().imode ==
param::Resize::InterpolationMode::INTER_LINEAR ||
param().imode ==
param::Resize::InterpolationMode::INTER_NEAREST);
} else {
megdnn_assert(param().format == Param::Format::NHWCD4,
"invalid resize tensor format");
megdnn_assert(param().imode ==
param::Resize::InterpolationMode::INTER_LINEAR);
param::Resize::InterpolationMode::INTER_LINEAR ||
param().imode ==
param::Resize::InterpolationMode::INTER_NEAREST);
megdnn_assert(dst.shape[2] == src.shape[2], "%s", errmsg().c_str());
}
}
......@@ -67,24 +85,39 @@ void ResizeBackward::check_exec(const TensorLayout& diff,
"Backward resize only supports Float32 and NCHW.");
}
std::pair<float, int> ResizeBase::get_origin_coord(float scale, int size,
int idx, bool cubic) {
//! copy from resize_cv.cpp
std::pair<float, int> ResizeBase::get_cubic_coord(float scale, int idx) {
float alpha = (idx + 0.5f) / scale - 0.5f;
int origin_idx = static_cast<int>(floor(alpha));
alpha -= origin_idx;
if (!cubic) {
if (origin_idx < 0) {
origin_idx = 0;
alpha = 0;
} else if (origin_idx + 1 >= size) {
origin_idx = size - 2;
alpha = 1;
}
}
return {alpha, origin_idx};
}
std::tuple<float, int, float, int> ResizeBase::get_nearest_linear_coord(
InterpolationMode imode, float scale, int size, int idx) {
if (size == 1) {
return std::make_tuple(1.0f, 0, 0.0f, 0);
}
float alpha = (idx + 0.5f) / scale - 0.5f;
int origin_idx = static_cast<int>(floor(alpha));
alpha -= origin_idx;
if (imode == InterpolationMode::INTER_NEAREST) {
origin_idx = get_nearest_src(scale, size, idx);
alpha = 0;
}
if (origin_idx < 0) {
origin_idx = 0;
alpha = 0;
} else if (origin_idx + 1 >= size) {
origin_idx = size - 2;
alpha = 1;
}
return std::make_tuple(1 - alpha, origin_idx, alpha, origin_idx + 1);
}
int ResizeBase::get_nearest_src(float scale, int size, int idx) {
return std::min(static_cast<int>(idx / scale), size - 1);
}
......
......@@ -6,13 +6,14 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/fallback/resize/opr_impl.h"
#include <vector>
#include "src/fallback/handle.h"
#include "src/common/rounding_converter.cuh"
#include "src/fallback/handle.h"
using namespace megdnn;
using namespace fallback;
......@@ -30,37 +31,36 @@ void ResizeImpl::kern_fallback(const KernParam<ctype>& kern_param) {
float scale_h = static_cast<float>(OH) / IH;
float scale_w = static_cast<float>(OW) / IW;
auto build_table = [this](float scale, int isize,
int osize) -> std::vector<std::pair<float, int>> {
std::vector<std::pair<float, int>> table;
rep(i, osize) { table.push_back(get_origin_coord(scale, isize, i)); }
auto build_table = [this](InterpolationMode imode, float scale, int isize,
int osize) {
std::vector<std::tuple<float, int, float, int>> table;
rep(i, osize) {
table.push_back(get_nearest_linear_coord(imode, scale, isize, i));
}
return table;
};
auto table_h = build_table(scale_h, IH, OH);
auto table_w = build_table(scale_w, IW, OW);
auto table_h = build_table(kern_param.imode, scale_h, IH, OH);
auto table_w = build_table(kern_param.imode, scale_w, IW, OW);
rep(n, N) {
rep(c, static_cast<int>(C)) {
rep(oh, OH) {
auto coord_h = table_h[oh];
float alphah = coord_h.first;
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
float ah0, ah1, aw0, aw1;
int ih0, ih1, iw0, iw1;
std::tie(ah0, ih0, ah1, ih1) = table_h[oh];
rep(ow, OW) {
auto coord_w = table_w[ow];
float alphaw = coord_w.first;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
std::tie(aw0, iw0, aw1, iw1) = table_w[ow];
dptr[c * OH * OW + oh * OW + ow] = output_converter(
sptr[c * S_IC + ih0 * S_IH + iw0 * S_IW] *
(1.0f - alphaw) * (1.0f - alphah) +
sptr[c * S_IC + ih0 * S_IH + iw1 * S_IW] *
alphaw * (1.0f - alphah) +
sptr[c * S_IC + ih1 * S_IH + iw0 * S_IW] *
(1.0f - alphaw) * alphah +
sptr[c * S_IC + ih1 * S_IH + iw1 * S_IW] *
alphaw * alphah);
sptr[c * S_IC + ih0 * S_IH + iw0 * S_IW] * ah0 *
aw0 +
sptr[c * S_IC + ih0 * S_IH + iw1 * S_IW] * ah0 *
aw1 +
sptr[c * S_IC + ih1 * S_IH + iw0 * S_IW] * ah1 *
aw0 +
sptr[c * S_IC + ih1 * S_IH + iw1 * S_IW] * ah1 *
aw1);
}
}
}
......@@ -76,35 +76,31 @@ void ResizeImpl::kern_fallback_nhwc(const KernParam<ctype>& kern_param) {
float scale_h = static_cast<float>(OH) / IH;
float scale_w = static_cast<float>(OW) / IW;
auto build_table = [this](float scale, int isize,
int osize) -> std::vector<std::pair<float, int>> {
std::vector<std::pair<float, int>> table;
rep(i, osize) { table.push_back(get_origin_coord(scale, isize, i)); }
auto build_table = [this](InterpolationMode imode, float scale, int isize,
int osize) {
std::vector<std::tuple<float, int, float, int>> table;
rep(i, osize) {
table.push_back(get_nearest_linear_coord(imode, scale, isize, i));
}
return table;
};
auto table_h = build_table(scale_h, IH, OH);
auto table_w = build_table(scale_w, IW, OW);
auto table_h = build_table(kern_param.imode, scale_h, IH, OH);
auto table_w = build_table(kern_param.imode, scale_w, IW, OW);
rep(n, N) {
rep(oh, OH) {
auto coord_h = table_h[oh];
float alphah = coord_h.first;
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
float ah0, ah1, aw0, aw1;
int ih0, ih1, iw0, iw1;
std::tie(ah0, ih0, ah1, ih1) = table_h[oh];
rep(ow, OW) {
auto coord_w = table_w[ow];
float alphaw = coord_w.first;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
std::tie(aw0, iw0, aw1, iw1) = table_w[ow];
rep(c, C) {
dptr[(oh * OW + ow) * C + c] = output_converter(
sptr[(ih0 * IW + iw0) * C + c] * (1.0f - alphaw) *
(1.0f - alphah) +
sptr[(ih0 * IW + iw1) * C + c] * alphaw *
(1.0f - alphah) +
sptr[(ih1 * IW + iw0) * C + c] * (1.0f - alphaw) *
alphah +
sptr[(ih1 * IW + iw1) * C + c] * alphaw * alphah);
sptr[(ih0 * IW + iw0) * C + c] * ah0 * aw0 +
sptr[(ih0 * IW + iw1) * C + c] * ah0 * aw1 +
sptr[(ih1 * IW + iw0) * C + c] * ah1 * aw0 +
sptr[(ih1 * IW + iw1) * C + c] * ah1 * aw1);
}
}
}
......@@ -117,6 +113,8 @@ void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
_megdnn_workspace workspace) {
check_exec(src.layout, dst.layout, workspace.size);
if (param().format == param::Resize::Format::NCHW4 ||
param().format == param::Resize::Format::NCHW44 ||
param().format == param::Resize::Format::NCHW88 ||
(param().format == param::Resize::Format::NCHW &&
param().imode != param::Resize::InterpolationMode::INTER_LINEAR)) {
naive::ResizeImpl::exec(src, dst, workspace);
......@@ -125,12 +123,12 @@ void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
if ((param().format == param::Resize::Format::NCHW ||
(src.layout[3] != 1 && src.layout[3] != 3)) ||
(param().imode == param::Resize::InterpolationMode::LINEAR)) {
#define cb(dt, ct) \
case DTypeTrait<dt>::enumv: { \
auto kparam = KernParam<ct>::from_tensors(param().format, src, dst, \
workspace); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_fallback(kparam)); \
return; \
#define cb(dt, ct) \
case DTypeTrait<dt>::enumv: { \
auto kparam = KernParam<ct>::from_tensors( \
param().format, param().imode, src, dst, workspace); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_fallback(kparam)); \
return; \
}
switch (src.layout.dtype.enumv()) {
......@@ -141,10 +139,9 @@ void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
cb(dtype::Uint8, uint8_t);
cb(dtype::Quantized8Asymm, uint8_t);
default:
megdnn_throw(
ssprintf("Unsupported input DType in Resize: %s",
src.layout.dtype.name())
.c_str());
megdnn_throw(ssprintf("Unsupported input DType in Resize: %s",
src.layout.dtype.name())
.c_str());
return;
}
......
......@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/naive/resize/opr_impl.h"
......@@ -27,10 +28,11 @@ using namespace resize;
template <typename ctype>
ResizeImpl::KernParam<ctype> ResizeImpl::KernParam<ctype>::from_tensors(
Format format, _megdnn_tensor_in src, _megdnn_tensor_out dst,
_megdnn_workspace workspace) {
Format format, InterpolationMode imode, _megdnn_tensor_in src,
_megdnn_tensor_out dst, _megdnn_workspace workspace) {
KernParam<ctype> ret;
ret.format = format;
ret.imode = imode;
ret.n = src.layout.shape[0];
if (format == Format::NCHW) {
ret.c = src.layout.shape[1];
......@@ -54,6 +56,18 @@ ResizeImpl::KernParam<ctype> ResizeImpl::KernParam<ctype>::from_tensors(
ret.iw = src.layout.shape[3];
ret.oh = dst.layout.shape[2];
ret.ow = dst.layout.shape[3];
} else if (format == Format::NCHW44) {
ret.c = src.layout.shape[1] * 4;
ret.ih = src.layout.shape[2];
ret.iw = src.layout.shape[3];
ret.oh = dst.layout.shape[2];
ret.ow = dst.layout.shape[3];
} else if (format == Format::NCHW88) {
ret.c = src.layout.shape[1] * 8;
ret.ih = src.layout.shape[2];
ret.iw = src.layout.shape[3];
ret.oh = dst.layout.shape[2];
ret.ow = dst.layout.shape[3];
} else {
megdnn_assert(format == Format::NHWCD4);
ret.c = src.layout.shape[2] * 4;
......@@ -115,33 +129,30 @@ void ResizeImpl::kern_nchw(const KernParam<ctype>& kern_param,
break;
}
case InterpolationMode::INTER_LINEAR: {
auto coord_h = get_origin_coord(scale_h, IH, oh);
auto coord_w = get_origin_coord(scale_w, IW, ow);
float alphah = coord_h.first;
float alphaw = coord_w.first;
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(
kern_param.imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(
kern_param.imode, scale_w, IW, ow);
rep(c, static_cast<int>(C)) {
dptr[c * OH * OW + oh * OW + ow] = output_converter(
sptr[c * S_IC + ih0 * S_IH + iw0 * S_IW] *
(1.0f - alphaw) * (1.0f - alphah) +
sptr[c * S_IC + ih0 * S_IH + iw1 * S_IW] *
alphaw * (1.0f - alphah) +
sptr[c * S_IC + ih1 * S_IH + iw0 * S_IW] *
(1.0f - alphaw) * alphah +
sptr[c * S_IC + ih1 * S_IH + iw1 * S_IW] *
alphaw * alphah);
sptr[c * S_IC + ih0 * S_IH + iw0 * S_IW] * ah0 *
aw0 +
sptr[c * S_IC + ih0 * S_IH + iw1 * S_IW] * ah0 *
aw1 +
sptr[c * S_IC + ih1 * S_IH + iw0 * S_IW] * ah1 *
aw0 +
sptr[c * S_IC + ih1 * S_IH + iw1 * S_IW] * ah1 *
aw1);
}
break;
}
case InterpolationMode::INTER_CUBIC: {
auto coord_h = get_origin_coord(scale_h, IH, oh, true);
auto coord_w = get_origin_coord(scale_w, IW, ow, true);
auto coord_h = get_cubic_coord(scale_h, oh);
auto coord_w = get_cubic_coord(scale_w, ow);
float alphah = coord_h.first;
float alphaw = coord_w.first;
......@@ -193,7 +204,19 @@ void ResizeImpl::kern_naive(const KernParam<ctype>& kern_param) {
return;
} else if (kern_param.format == Format::NCHW4) {
MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(2)) {
kern_naive_nchw4(kern_param);
kern_naive_nchwx<ctype, 4>(kern_param);
}
MIDOUT_END();
return;
} else if (kern_param.format == Format::NCHW44) {
MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(3)) {
kern_naive_nchwx<ctype, 4>(kern_param);
}
MIDOUT_END();
return;
} else if (kern_param.format == Format::NCHW88) {
MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(4)) {
kern_naive_nchwx<ctype, 8>(kern_param);
}
MIDOUT_END();
return;
......@@ -209,25 +232,20 @@ void ResizeImpl::kern_naive_nhwc(const KernParam<ctype>& kern_param) {
rep(n, N) {
rep(oh, OH) rep(ow, OW) {
auto coord_h = get_origin_coord(scale_h, IH, oh);
auto coord_w = get_origin_coord(scale_w, IW, ow);
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
float alphah = coord_h.first;
float alphaw = coord_w.first;
std::tie(ah0, ih0, ah1, ih1) =
get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) =
get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow);
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
rep(c, C) {
dptr[(oh * OW + ow) * C + c] = output_converter(
sptr[(ih0 * IW + iw0) * C + c] * (1.0f - alphaw) *
(1.0f - alphah) +
sptr[(ih0 * IW + iw1) * C + c] * alphaw *
(1.0f - alphah) +
sptr[(ih1 * IW + iw0) * C + c] * (1.0f - alphaw) *
alphah +
sptr[(ih1 * IW + iw1) * C + c] * alphaw * alphah);
sptr[(ih0 * IW + iw0) * C + c] * ah0 * aw0 +
sptr[(ih0 * IW + iw1) * C + c] * ah0 * aw1 +
sptr[(ih1 * IW + iw0) * C + c] * ah1 * aw0 +
sptr[(ih1 * IW + iw1) * C + c] * ah1 * aw1);
}
}
sptr += C * IH * IW;
......@@ -251,26 +269,20 @@ void ResizeImpl::kern_naive_nhwcd4(const KernParam<ctype>& kern_param) {
rep(n, N) {
rep(oh, OH) rep(ow, OW) {
auto coord_h = get_origin_coord(scale_h, IH, oh);
auto coord_w = get_origin_coord(scale_w, IW, ow);
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
float alphah = coord_h.first;
float alphaw = coord_w.first;
std::tie(ah0, ih0, ah1, ih1) =
get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) =
get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow);
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
rep(c, C) {
dptr[get_tensor_addr(oh, ow, c, OW, C)] = output_converter(
sptr[get_tensor_addr(ih0, iw0, c, IW, C)] *
(1.0f - alphaw) * (1.0f - alphah) +
sptr[get_tensor_addr(ih0, iw1, c, IW, C)] * alphaw *
(1.0f - alphah) +
sptr[get_tensor_addr(ih1, iw0, c, IW, C)] *
(1.0f - alphaw) * alphah +
sptr[get_tensor_addr(ih1, iw1, c, IW, C)] * alphaw *
alphah);
sptr[get_tensor_addr(ih0, iw0, c, IW, C)] * ah0 * aw0 +
sptr[get_tensor_addr(ih0, iw1, c, IW, C)] * ah0 * aw1 +
sptr[get_tensor_addr(ih1, iw0, c, IW, C)] * ah1 * aw0 +
sptr[get_tensor_addr(ih1, iw1, c, IW, C)] * ah1 * aw1);
}
}
sptr += IH * (C / 4) * IW * 4;
......@@ -278,41 +290,46 @@ void ResizeImpl::kern_naive_nhwcd4(const KernParam<ctype>& kern_param) {
}
}
template <typename ctype>
void ResizeImpl::kern_naive_nchw4(const KernParam<ctype>& kern_param) {
template <typename ctype, size_t pack_size>
void ResizeImpl::kern_naive_nchwx(const KernParam<ctype>& kern_param) {
UNPACK_RESIZE_FWD_KERN_PARAM(kern_param);
rounding::RoundingConverter<ctype> output_converter;
float scale_h = static_cast<float>(OH) / IH;
float scale_w = static_cast<float>(OW) / IW;
megdnn_assert(pack_size == 4 || pack_size == 8);
size_t log_pack_size = 2;
if (pack_size == 8) {
log_pack_size = 3;
}
auto get_tensor_addr = [&](size_t h, size_t w, size_t c, size_t H, size_t W,
size_t C) -> size_t {
megdnn_assert((C & 0x3) == 0);
return (((c >> 2) * H * W + h * W + w) << 2) + (c & 0b11);
megdnn_assert((C & (pack_size - 1)) == 0);
return (((c >> log_pack_size) * H * W + h * W + w) << log_pack_size) +
(c & (pack_size - 1));
};
rep(n, N) {
rep(oh, OH) rep(ow, OW) {
auto coord_h = get_origin_coord(scale_h, IH, oh);
auto coord_w = get_origin_coord(scale_w, IW, ow);
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
float alphah = coord_h.first;
float alphaw = coord_w.first;
std::tie(ah0, ih0, ah1, ih1) =
get_nearest_linear_coord(kern_param.imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) =
get_nearest_linear_coord(kern_param.imode, scale_w, IW, ow);
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
rep(c, C) {
dptr[get_tensor_addr(oh, ow, c, OH, OW, C)] = output_converter(
sptr[get_tensor_addr(ih0, iw0, c, IH, IW, C)] *
(1.0f - alphaw) * (1.0f - alphah) +
sptr[get_tensor_addr(ih0, iw1, c, IH, IW, C)] * alphaw *
(1.0f - alphah) +
sptr[get_tensor_addr(ih1, iw0, c, IH, IW, C)] *
(1.0f - alphaw) * alphah +
sptr[get_tensor_addr(ih1, iw1, c, IH, IW, C)] * alphaw *
alphah);
sptr[get_tensor_addr(ih0, iw0, c, IH, IW, C)] * ah0 *
aw0 +
sptr[get_tensor_addr(ih0, iw1, c, IH, IW, C)] * ah0 *
aw1 +
sptr[get_tensor_addr(ih1, iw0, c, IH, IW, C)] * ah1 *
aw0 +
sptr[get_tensor_addr(ih1, iw1, c, IH, IW, C)] * ah1 *
aw1);
}
}
sptr += IH * IW * C;
......@@ -327,8 +344,8 @@ void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
#define cb(dt, ct, _midout_iv) \
case DTypeTrait<dt>::enumv: { \
MIDOUT_BEGIN(megdnn_naive_resize_nchw, midout_iv(_midout_iv)) { \
auto kparam = KernParam<ct>::from_tensors(param().format, src, \
dst, workspace); \
auto kparam = KernParam<ct>::from_tensors( \
param().format, param().imode, src, dst, workspace); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_nchw(kparam, param().imode)); \
} \
MIDOUT_END(); \
......@@ -356,15 +373,15 @@ void ResizeImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
if (((src.layout[3] != 1 && src.layout[3] != 3) ||
!is_nhwc_contig_wc(src.layout)) ||
(param().imode == param::Resize::InterpolationMode::LINEAR)) {
#define cb(dt, ct, _midout_iv) \
case DTypeTrait<dt>::enumv: { \
MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(_midout_iv)) { \
auto kparam = KernParam<ct>::from_tensors(param().format, src, \
dst, workspace); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
} \
MIDOUT_END(); \
return; \
#define cb(dt, ct, _midout_iv) \
case DTypeTrait<dt>::enumv: { \
MIDOUT_BEGIN(megdnn_naive_resize_layout, midout_iv(_midout_iv)) { \
auto kparam = KernParam<ct>::from_tensors( \
param().format, param().imode, src, dst, workspace); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
} \
MIDOUT_END(); \
return; \
}
switch (src.layout.dtype.enumv()) {
......@@ -409,27 +426,24 @@ void ResizeBackwardImpl::exec(_megdnn_tensor_in diff, _megdnn_tensor_out grad,
rep(oh, OH) rep(ow, OW) {
switch (param().imode) {
case InterpolationMode::INTER_LINEAR: {
auto coord_h = get_origin_coord(scale_h, IH, oh);
auto coord_w = get_origin_coord(scale_w, IW, ow);
float alphah = coord_h.first;
float alphaw = coord_w.first;
int ih0, ih1, iw0, iw1;
float ah0, ah1, aw0, aw1;
int ih0 = coord_h.second;
int ih1 = ih0 + 1;
int iw0 = coord_w.second;
int iw1 = iw0 + 1;
std::tie(ah0, ih0, ah1, ih1) = get_nearest_linear_coord(
param().imode, scale_h, IH, oh);
std::tie(aw0, iw0, aw1, iw1) = get_nearest_linear_coord(
param().imode, scale_w, IW, ow);
rep(c, C) {
float hidden = hptr[c * OH * OW + oh * OW + ow];
sptr[c * IH * IW + ih0 * IW + iw0] +=
(1.0f - alphaw) * (1.0f - alphah) * hidden;
ah0 * aw0 * hidden;
sptr[c * IH * IW + ih1 * IW + iw0] +=
(1.0f - alphaw) * alphah * hidden;
ah1 * aw0 * hidden;
sptr[c * IH * IW + ih0 * IW + iw1] +=
alphaw * (1.0f - alphah) * hidden;
ah0 * aw1 * hidden;
sptr[c * IH * IW + ih1 * IW + iw1] +=
alphaw * alphah * hidden;
ah1 * aw1 * hidden;
}
break;
}
......@@ -443,8 +457,8 @@ void ResizeBackwardImpl::exec(_megdnn_tensor_in diff, _megdnn_tensor_out grad,
break;
}
case InterpolationMode::INTER_CUBIC: {
auto coord_h = get_origin_coord(scale_h, IH, oh, true);
auto coord_w = get_origin_coord(scale_w, IW, ow, true);
auto coord_h = get_cubic_coord(scale_h, oh);
auto coord_w = get_cubic_coord(scale_w, ow);
float alphah = coord_h.first;
float alphaw = coord_w.first;
......@@ -460,7 +474,8 @@ void ResizeBackwardImpl::exec(_megdnn_tensor_in diff, _megdnn_tensor_out grad,
rep(kh, ksize) {
int h = saturate<int, int>(ih0 + kh, 0, IH - 1);
rep(kw, ksize) {
int w = saturate<int, int>(iw0 + kw, 0, IW - 1);
int w = saturate<int, int>(iw0 + kw, 0,
IW - 1);
sptr[c * IH * IW + h * IW + w] +=
hptr[c * OH * OW + oh * OW + ow] *
h_coeff[kh] * w_coeff[kw];
......
......@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
......@@ -19,15 +20,18 @@ namespace naive {
class ResizeImpl : public Resize {
public:
using Format = Param::Format;
using InterpolationMode = Param::InterpolationMode;
template <typename ctype>
struct KernParam {
Format format;
InterpolationMode imode;
size_t n, c, ih, iw, oh, ow;
ptrdiff_t s_in, s_ic, s_ih, s_iw;
ctype *sptr, *dptr;
Workspace workspace;
static KernParam from_tensors(Format format, _megdnn_tensor_in src,
static KernParam from_tensors(Format format, InterpolationMode imode,
_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace);
};
......@@ -41,6 +45,7 @@ public:
const TensorLayout&) override {
return 0;
}
private:
// ctype: C type of input data type.
template <typename ctype>
......@@ -55,8 +60,8 @@ private:
template <typename ctype>
void kern_naive_nhwcd4(const KernParam<ctype>& kern_param);
template <typename ctype>
void kern_naive_nchw4(const KernParam<ctype>& kern_param);
template <typename ctype, size_t pack_size>
void kern_naive_nchwx(const KernParam<ctype>& kern_param);
}; // class ResizeImpl
......@@ -65,15 +70,15 @@ private:
ctype* __restrict sptr = p.sptr; \
ctype* __restrict dptr = p.dptr;
#define UNPACK_RESIZE_FWD_KERN_PARAM_WITH_STRIDE(p) \
UNPACK_RESIZE_FWD_KERN_PARAM(p) \
#define UNPACK_RESIZE_FWD_KERN_PARAM_WITH_STRIDE(p) \
UNPACK_RESIZE_FWD_KERN_PARAM(p) \
auto S_IN = p.s_in, S_IC = p.s_ic, S_IH = p.s_ih, S_IW = p.s_iw;
class ResizeBackwardImpl: public ResizeBackward {
class ResizeBackwardImpl : public ResizeBackward {
public:
using ResizeBackward::ResizeBackward;
void exec(_megdnn_tensor_in diff,
_megdnn_tensor_out grad, _megdnn_workspace workspace) override;
void exec(_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout&,
const TensorLayout&) override {
return 0;
......
......@@ -6,40 +6,66 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/arm_common/fixture.h"
#include "test/common/resize.h"
#include "test/arm_common/fixture.h"
#include "test/common/checker.h"
namespace megdnn {
namespace test {
TEST_F(ARM_COMMON, RESIZE_CV)
{
TEST_F(ARM_COMMON, RESIZE_CV) {
using namespace resize;
std::vector<TestArg> args = get_cv_args();
Checker<Resize> checker(handle());
for (auto &&arg: args) {
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_epsilon(1 + 1e-3)
.set_dtype(0, dtype::Uint8())
.set_dtype(1, dtype::Uint8())
.execs({arg.src, arg.dst});
.set_epsilon(1 + 1e-3)
.set_dtype(0, dtype::Uint8())
.set_dtype(1, dtype::Uint8())
.execs({arg.src, arg.dst});
}
for (auto &&arg: args) {
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
}
}
TEST_F(ARM_COMMON, RESIZE_NCHW44) {
using namespace resize;
std::vector<TestArg> args = get_nchw44_args();
Checker<Resize> checker(handle());
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.execs({arg.src, arg.dst});
}
}
TEST_F(ARM_COMMON, RESIZE_NCHW88) {
using namespace resize;
std::vector<TestArg> args = get_nchw88_args();
Checker<Resize> checker(handle());
for (auto&& arg : args) {
checker.set_param(arg.param)
.set_epsilon(0.01)
.set_dtype(0, dtype::Float16())
.set_dtype(1, dtype::Float16())
.execs({arg.src, arg.dst});
}
}
} // namespace test
} // namespace megdnn
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -6,12 +6,13 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/opr_param_defs.h"
#include "megdnn/basic_types.h"
#include <iostream>
#include "megdnn/basic_types.h"
#include "megdnn/opr_param_defs.h"
#include "./rng.h"
namespace megdnn {
......@@ -68,13 +69,15 @@ static inline std::vector<TestArg> get_args(IMode imode = IMode::INTER_LINEAR) {
std::vector<TestArg> args;
set_nchw_args(args);
if(imode == IMode::INTER_LINEAR) {
//! test NHWC with ch != 1 or ch != 3
if (imode == IMode::INTER_LINEAR) {
//! test NHWC with ch != 1 or ch != 3
param::Resize param;
param.format = param::Resize::Format::NHWC;
param.imode = imode;
args.emplace_back(param, TensorShape{2, 2, 3, 4}, TensorShape{2, 4, 6, 4});
args.emplace_back(param, TensorShape{2, 4, 6, 4}, TensorShape{2, 2, 3, 4});
args.emplace_back(param, TensorShape{2, 2, 3, 4},
TensorShape{2, 4, 6, 4});
args.emplace_back(param, TensorShape{2, 4, 6, 4},
TensorShape{2, 2, 3, 4});
}
return args;
}
......@@ -108,6 +111,48 @@ static inline std::vector<TestArg> get_nchw4_args() {
return args;
}
static inline std::vector<TestArg> get_nchw44_args() {
std::vector<TestArg> args;
param::Resize param;
param.format = param::Resize::Format::NCHW44;
param.imode = param::Resize::InterpolationMode::LINEAR;
rep(n, 4ul) rep(c, 4ul) rep(ih, 4ul) rep(iw, 4ul) rep(oh, 4ul) rep(ow, 4ul)
args.emplace_back(
param,
TensorShape{n + 1ul, c + 1ul, ih + 1ul, iw + 1ul, 4ul},
TensorShape{n + 1ul, c + 1ul, oh + 1ul, ow + 1ul, 4ul});
param.imode = param::Resize::InterpolationMode::NEAREST;
rep(n, 4ul) rep(c, 4ul) rep(ih, 4ul) rep(iw, 4ul) rep(oh, 4ul) rep(ow, 4ul)
args.emplace_back(
param,
TensorShape{n + 1ul, c + 1ul, ih + 1ul, iw + 1ul, 4ul},
TensorShape{n + 1ul, c + 1ul, oh + 1ul, ow + 1ul, 4ul});
return args;
}
static inline std::vector<TestArg> get_nchw88_args() {
std::vector<TestArg> args;
param::Resize param;
param.format = param::Resize::Format::NCHW88;
param.imode = param::Resize::InterpolationMode::LINEAR;
rep(n, 4ul) rep(c, 4ul) rep(ih, 4ul) rep(iw, 4ul) rep(oh, 4ul) rep(ow, 4ul)
args.emplace_back(
param,
TensorShape{n + 1ul, c + 1ul, ih + 1ul, iw + 1ul, 8ul},
TensorShape{n + 1ul, c + 1ul, oh + 1ul, ow + 1ul, 8ul});
param.imode = param::Resize::InterpolationMode::NEAREST;
rep(n, 4ul) rep(c, 4ul) rep(ih, 4ul) rep(iw, 4ul) rep(oh, 4ul) rep(ow, 4ul)
args.emplace_back(
param,
TensorShape{n + 1ul, c + 1ul, ih + 1ul, iw + 1ul, 8ul},
TensorShape{n + 1ul, c + 1ul, oh + 1ul, ow + 1ul, 8ul});
return args;
}
static inline std::vector<TestArg> get_cv_args() {
std::vector<TestArg> args;
......
此差异已折叠。
......@@ -10,9 +10,9 @@
* implied.
*/
#include "megbrain/opr/imgproc.h"
#include "./internal/megdnn_opr_wrapper.inl"
#include "megbrain/graph/grad_impl.h"
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/utility.h"
......@@ -340,7 +340,9 @@ void ResizeForward::outshape_by_symvar_do_get_output_shape(
//! The index of height, e.g.,[b, h, w, c], the height_idx = 1
size_t height_idx = 0;
if (param().format == Param::Format::NCHW ||
param().format == Param::Format::NCHW4) {
param().format == Param::Format::NCHW4 ||
param().format == Param::Format::NCHW44 ||
param().format == Param::Format::NCHW88) {
height_idx = 2;
} else {
height_idx = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册