test=develop

e2df8071 · chenjiaoAngel · ff8c95d8 · e2df8071 · e2df8071 · e2df8071
4 changed file
--- a/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32.cc
+++ b/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32.cc
@@ -2630,8 +2630,8 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
        int cnt = cnt_col;
        asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU
-                         MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1
+                         MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU
-                             RIGHT_RESULT_S1_LEAKY_RELU
+                             RIGHT_COMPUTE_S1 RIGHT_RESULT_S1_LEAKY_RELU
                     : [cnt] "+r"(cnt),
                       [din_ptr0] "+r"(din_ptr0),
                       [din_ptr1] "+r"(din_ptr1),
@@ -2728,8 +2728,8 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
        unsigned int *rmask_ptr = rmask;
        unsigned int *vmask_ptr = vmask;
        asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU
-                         MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1
+                         MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU
-                             RIGHT_RESULT_S1_LEAKY_RELU
+                             RIGHT_COMPUTE_S1 RIGHT_RESULT_S1_LEAKY_RELU
                     : [dout_ptr1] "+r"(doutr0),
                       [dout_ptr2] "+r"(doutr1),
                       [din0_ptr] "+r"(din_ptr0),

--- a/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32_relu.cc
+++ b/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32_relu.cc
@@ -1202,7 +1202,7 @@ namespace math {
 * \brief depthwise convolution, kernel size 3x3, stride 1, pad 1, with bias,
 * width > 4
 */
- void conv_depthwise_3x3s1p1_bias_no_relu(float *dout,
+void conv_depthwise_3x3s1p1_bias_no_relu(float *dout,
                                         const float *din,
                                         const float *weights,
                                         const float *bias,
@@ -1670,7 +1670,7 @@ void conv_depthwise_3x3s1p1_bias_relu(float *dout,
              [din_ptr5] "+r"(din_ptr5),
              [doutr0] "+r"(doutr0),
              [doutr1] "+r"(doutr1),
-              [doutr2] "+r"(doutr2)，
+              [doutr2] "+r"(doutr2),
              [doutr3] "+r"(doutr3)
            : [w0] "w"(wr0),
              [w1] "w"(wr1),
@@ -2609,17 +2609,17 @@ void conv_depthwise_3x3s1p0_bias_relu(float *dout,
        int cnt = tile_w;
        unsigned int *rmask_ptr = rmask;
        unsigned int *vmask_ptr = vmask;
-        asm volatile(INIT_S1
+        asm volatile(
+            INIT_S1
            "sub %[din0_ptr], #8 @ 0pad + 2 float data overlap\n"
            "sub %[din1_ptr], #8 @ 0pad + 2 float data overlap\n"
            "sub %[din2_ptr], #8 @ 0pad + 2 float data overlap\n"
            "sub %[din3_ptr], #8 @ 0pad + 2 float data overlap\n"
            "vext.32  q6, q8, q9, #1     @ 0012\n"
-                     "vext.32  q7, q8, q9, #2     @ 1234\n" MID_COMPUTE_S1
+            "vext.32  q7, q8, q9, #2     @ 1234\n" MID_COMPUTE_S1 MID_RESULT_S1_RELU
-                         MID_RESULT_S1_RELU
            "cmp  %[remain], #1             \n"
-                     "blt 0f                         \n" RIGHT_COMPUTE_S1
+            "blt 0f                         \n" RIGHT_COMPUTE_S1 RIGHT_RESULT_S1_RELU
-                         RIGHT_RESULT_S1_RELU "0:                         \n"
+            "0:                         \n"
            : [dout_ptr1] "+r"(doutr0),
              [dout_ptr2] "+r"(doutr1),
              [din0_ptr] "+r"(din_ptr0),

--- a/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32.cc
+++ b/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32.cc
--- a/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32_relu.cc
+++ b/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32_relu.cc
@@ -1663,8 +1663,8 @@ void conv_depthwise_3x3s2p0_bias_relu(float* dout,
        }
        int cnt = tile_w;
        unsigned int* mask_ptr = dmask;
-        asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2_RELU
+        asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2_RELU RIGHT_COMPUTE_S2
-                        RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_RELU
+                         RIGHT_RESULT_S2_RELU
                     : [din0_ptr] "+r"(din0_ptr),
                       [din1_ptr] "+r"(din1_ptr),
                       [din2_ptr] "+r"(din2_ptr),
@@ -1838,8 +1838,7 @@ void conv_depthwise_3x3s2p0_bias_no_relu(float* dout,
            MID_COMPUTE_S2 MID_RESULT_S2
            "cmp %w[remain], #1                           \n"
            "blt 4f                                     \n" RIGHT_COMPUTE_S2
-                  RIGHT_RESULT_S2
+                 RIGHT_RESULT_S2 "4:                                          \n"
-              "4:                                          \n"
            : [inptr0] "+r"(din0_ptr),
              [inptr1] "+r"(din1_ptr),
              [inptr2] "+r"(din2_ptr),
@@ -1908,8 +1907,8 @@ void conv_depthwise_3x3s2p0_bias_no_relu(float* dout,
        }
        int cnt = tile_w;
        unsigned int* mask_ptr = dmask;
-        asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2
+        asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2 RIGHT_COMPUTE_S2
-                        RIGHT_COMPUTE_S2 RIGHT_RESULT_S2
+                         RIGHT_RESULT_S2
                     : [din0_ptr] "+r"(din0_ptr),
                       [din1_ptr] "+r"(din1_ptr),
                       [din2_ptr] "+r"(din2_ptr),