提交 a7e28ebe 编写于 作者: M Megvii Engine Team

fix(dnn): fix winograd load error and cpuinfo test error

GitOrigin-RevId: 32c9ee58d1cffe1a6668b0c5871b026e128761af
上级 41b9db85
...@@ -339,6 +339,9 @@ void winograd_2x3_8x8_s8::input( ...@@ -339,6 +339,9 @@ void winograd_2x3_8x8_s8::input(
size_t nr_units_in_tile) { size_t nr_units_in_tile) {
megdnn_assert(IC % 8 == 0); megdnn_assert(IC % 8 == 0);
constexpr int alpha = 3 + 2 - 1; constexpr int alpha = 3 + 2 - 1;
constexpr int SIMD_WIDTH = 4;
//! the input is load with int8 this is used to keep the borad load valid
constexpr int board_security_width = std::max(2 * SIMD_WIDTH, alpha);
// OW = IW + 2 * PW - KERNEL_SIZE + 1 // OW = IW + 2 * PW - KERNEL_SIZE + 1
auto units_w = div_ceil<size_t>(IW + 2 * PW - KERNEL_SIZE + 1, OUTPUT_BLOCK_SIZE); auto units_w = div_ceil<size_t>(IW + 2 * PW - KERNEL_SIZE + 1, OUTPUT_BLOCK_SIZE);
...@@ -353,7 +356,8 @@ void winograd_2x3_8x8_s8::input( ...@@ -353,7 +356,8 @@ void winograd_2x3_8x8_s8::input(
int ih_start = nh * OUTPUT_BLOCK_SIZE - PH; int ih_start = nh * OUTPUT_BLOCK_SIZE - PH;
int iw_start = nw * OUTPUT_BLOCK_SIZE - PW; int iw_start = nw * OUTPUT_BLOCK_SIZE - PW;
if (ih_start >= 0 && ih_start + alpha <= static_cast<int>(IH) && if (ih_start >= 0 && ih_start + alpha <= static_cast<int>(IH) &&
iw_start >= 0 && iw_start + alpha <= static_cast<int>(IW)) { iw_start >= 0 &&
iw_start + board_security_width <= static_cast<int>(IW)) {
InputTransform2X3_qs8::prepare<true>( InputTransform2X3_qs8::prepare<true>(
input, patch, patchT, ih_start, iw_start, IH, IW, ic, IC); input, patch, patchT, ih_start, iw_start, IH, IW, ic, IC);
InputTransform2X3_qs8::transform( InputTransform2X3_qs8::transform(
......
...@@ -202,9 +202,9 @@ TEST(ARM_RUNTIME, CPUINFO_SDM8GEN1) { ...@@ -202,9 +202,9 @@ TEST(ARM_RUNTIME, CPUINFO_SDM8GEN1) {
ASSERT_TRUE(cpuinfo_has_arm_neon_dot()); ASSERT_TRUE(cpuinfo_has_arm_neon_dot());
ASSERT_FALSE(cpuinfo_has_arm_sve2()); ASSERT_FALSE(cpuinfo_has_arm_sve2());
#if MEGDNN_AARCH64
ASSERT_TRUE(cpuinfo_has_arm_i8mm()); ASSERT_TRUE(cpuinfo_has_arm_i8mm());
#endif
for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) { for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) {
ASSERT_EQ(cpuinfo_get_core(i), cpuinfo_get_processor(i)->core); ASSERT_EQ(cpuinfo_get_core(i), cpuinfo_get_processor(i)->core);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册