diff --git a/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32.cc b/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32.cc index 9de59d2185debc30f8f9a002f977f29cbbf300d0..66d61413fc43fd518e0b34c7bc8d7b7bf5cc72a7 100644 --- a/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32.cc +++ b/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32.cc @@ -617,7 +617,7 @@ void conv_depthwise_3x3s1_fp32(const float *din, "fcmge v18.4s, v12.4s, %[vzero].4s \n" /* vcgeq_f32 */ \ "fcmge v19.4s, v13.4s, %[vzero].4s \n" /* vcgeq_f32 */ \ "fmul v20.4s, v12.4s, %[vscale].4s \n" /* mul */ \ - "fmul v21.4s, v12.4s, %[vscale].4s \n" /* mul */ \ + "fmul v21.4s, v13.4s, %[vscale].4s \n" /* mul */ \ "ld1 {v8.4s}, [%[din_ptr4]], #16 \n" /*vld1q_f32(din_ptr0)*/ \ \ "fmla v15.4s , v16.4s, %[w1].s[0]\n" /* outr00 += din2_0123 * w0[1]*/ \ @@ -1627,7 +1627,7 @@ void conv_depthwise_3x3s1_fp32(const float *din, \ "vbif q4, q6, q15 @ choose \n" \ "vcge.f32 q7, q5, %q[vzero] @ q0 > 0 \n" \ - "vmul.f32 q6, q4, q14 \n" \ + "vmul.f32 q6, q5, q14 \n" \ "vst1.32 {d8-d9}, [%[dout_ptr1]]! @ store result, add pointer\n" \ "vld1.32 {d28-d29}, [%[din3_ptr]]! @ load din r0\n" \ \ @@ -1815,8 +1815,8 @@ void conv_depthwise_3x3s1_fp32(const float *din, "vmul.f32 q12, q14, q9 \n" \ "vmul.f32 q13, q15, q9 \n" \ \ - "vbif q14, q10, q12 \n" \ - "vbif q15, q11, q13 \n" \ + "vbif q14, q12, q10 \n" \ + "vbif q15, q13, q11 \n" \ \ "vst1.32 {d28-d29}, [%[out1]]\n" \ "vst1.32 {d30-d31}, [%[out2]]\n" diff --git a/lite/backends/arm/math/conv5x5s2_depthwise_fp32.cc b/lite/backends/arm/math/conv5x5s2_depthwise_fp32.cc index 5524732029f07a0cd4d31f3c28a2435d45b50d67..a72b7553e0c8fddcb9028b0e6125281a07e65387 100644 --- a/lite/backends/arm/math/conv5x5s2_depthwise_fp32.cc +++ b/lite/backends/arm/math/conv5x5s2_depthwise_fp32.cc @@ -209,9 +209,9 @@ namespace math { "fcmge v7.4s, v22.4s, v0.4s \n" /* vcgeq_f32 */ \ "fmul v8.4s, v22.4s, %[vscale].4s \n" /* mul */ \ "bif v19.16b, v2.16b, v1.16b \n" /* choose*/ \ - "bif v19.16b, v4.16b, v3.16b \n" /* choose*/ \ - "bif v19.16b, v6.16b, v5.16b \n" /* choose*/ \ - "bif v19.16b, v8.16b, v7.16b \n" /* choose*/ + "bif v20.16b, v4.16b, v3.16b \n" /* choose*/ \ + "bif v21.16b, v6.16b, v5.16b \n" /* choose*/ \ + "bif v22.16b, v8.16b, v7.16b \n" /* choose*/ #define STORE /* save result */ \ "str q19, [%[outc0]], #16\n" \ "str q20, [%[outc1]], #16\n" \ diff --git a/lite/tests/math/conv_compute_test.cc b/lite/tests/math/conv_compute_test.cc index 53a9a00ccf2ad80e5ccd9d9b3a7244be769c9d7a..df238ceae9e39541fb954d9262832d01cd9d3b7f 100644 --- a/lite/tests/math/conv_compute_test.cc +++ b/lite/tests/math/conv_compute_test.cc @@ -306,8 +306,8 @@ void test_conv_fp32(const std::vector& input_dims, const float leakey_relu_scale) {} #endif // LITE_WITH_ARM -// TODO(chenjiaoAngel): fix me, diff: 3x3 depthwise conv -#if 0 /// 3x3dw +// TODO(chenjiaoAngel): fix multi-threds, diff: 3x3 depthwise conv +#if 1 /// 3x3dw TEST(TestConv3x3DW, test_conv3x3_depthwise) { if (FLAGS_basic_test) { for (auto& stride : {1, 2}) { @@ -334,7 +334,7 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) { {1, 1}, flag_bias, flag_act, - {1, 2, 4}, + {1}, {FLAGS_power_mode}, leakey_relu_scale); } @@ -352,12 +352,7 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) { #if 1 /// 5x5dw TEST(TestConv5x5DW, test_conv5x5_depthwise) { if (FLAGS_basic_test) { -#ifdef __aarch64__ - // TODO(chenjiaoAngel): fix me, diff: arm64 5x5s2 depthwise conv - for (auto& stride : {1}) { -#else for (auto& stride : {1, 2}) { -#endif for (auto& pad_left : {0, 1, 2}) { for (auto& pad_right : {0, 1, 2}) { for (auto& pad_top : {0, 1, 2}) {