提交 e2df8071 编写于 作者: C chenjiaoAngel

test=develop

上级 ff8c95d8
......@@ -2630,8 +2630,8 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
int cnt = cnt_col;
asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU
MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1
RIGHT_RESULT_S1_LEAKY_RELU
MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU
RIGHT_COMPUTE_S1 RIGHT_RESULT_S1_LEAKY_RELU
: [cnt] "+r"(cnt),
[din_ptr0] "+r"(din_ptr0),
[din_ptr1] "+r"(din_ptr1),
......@@ -2728,8 +2728,8 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
unsigned int *rmask_ptr = rmask;
unsigned int *vmask_ptr = vmask;
asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU
MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1
RIGHT_RESULT_S1_LEAKY_RELU
MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU
RIGHT_COMPUTE_S1 RIGHT_RESULT_S1_LEAKY_RELU
: [dout_ptr1] "+r"(doutr0),
[dout_ptr2] "+r"(doutr1),
[din0_ptr] "+r"(din_ptr0),
......
......@@ -1202,7 +1202,7 @@ namespace math {
* \brief depthwise convolution, kernel size 3x3, stride 1, pad 1, with bias,
* width > 4
*/
void conv_depthwise_3x3s1p1_bias_no_relu(float *dout,
void conv_depthwise_3x3s1p1_bias_no_relu(float *dout,
const float *din,
const float *weights,
const float *bias,
......@@ -1670,7 +1670,7 @@ void conv_depthwise_3x3s1p1_bias_relu(float *dout,
[din_ptr5] "+r"(din_ptr5),
[doutr0] "+r"(doutr0),
[doutr1] "+r"(doutr1),
[doutr2] "+r"(doutr2)
[doutr2] "+r"(doutr2),
[doutr3] "+r"(doutr3)
: [w0] "w"(wr0),
[w1] "w"(wr1),
......@@ -2609,17 +2609,17 @@ void conv_depthwise_3x3s1p0_bias_relu(float *dout,
int cnt = tile_w;
unsigned int *rmask_ptr = rmask;
unsigned int *vmask_ptr = vmask;
asm volatile(INIT_S1
asm volatile(
INIT_S1
"sub %[din0_ptr], #8 @ 0pad + 2 float data overlap\n"
"sub %[din1_ptr], #8 @ 0pad + 2 float data overlap\n"
"sub %[din2_ptr], #8 @ 0pad + 2 float data overlap\n"
"sub %[din3_ptr], #8 @ 0pad + 2 float data overlap\n"
"vext.32 q6, q8, q9, #1 @ 0012\n"
"vext.32 q7, q8, q9, #2 @ 1234\n" MID_COMPUTE_S1
MID_RESULT_S1_RELU
"vext.32 q7, q8, q9, #2 @ 1234\n" MID_COMPUTE_S1 MID_RESULT_S1_RELU
"cmp %[remain], #1 \n"
"blt 0f \n" RIGHT_COMPUTE_S1
RIGHT_RESULT_S1_RELU "0: \n"
"blt 0f \n" RIGHT_COMPUTE_S1 RIGHT_RESULT_S1_RELU
"0: \n"
: [dout_ptr1] "+r"(doutr0),
[dout_ptr2] "+r"(doutr1),
[din0_ptr] "+r"(din_ptr0),
......
......@@ -1663,8 +1663,8 @@ void conv_depthwise_3x3s2p0_bias_relu(float* dout,
}
int cnt = tile_w;
unsigned int* mask_ptr = dmask;
asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2_RELU
RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_RELU
asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2_RELU RIGHT_COMPUTE_S2
RIGHT_RESULT_S2_RELU
: [din0_ptr] "+r"(din0_ptr),
[din1_ptr] "+r"(din1_ptr),
[din2_ptr] "+r"(din2_ptr),
......@@ -1838,8 +1838,7 @@ void conv_depthwise_3x3s2p0_bias_no_relu(float* dout,
MID_COMPUTE_S2 MID_RESULT_S2
"cmp %w[remain], #1 \n"
"blt 4f \n" RIGHT_COMPUTE_S2
RIGHT_RESULT_S2
"4: \n"
RIGHT_RESULT_S2 "4: \n"
: [inptr0] "+r"(din0_ptr),
[inptr1] "+r"(din1_ptr),
[inptr2] "+r"(din2_ptr),
......@@ -1908,8 +1907,8 @@ void conv_depthwise_3x3s2p0_bias_no_relu(float* dout,
}
int cnt = tile_w;
unsigned int* mask_ptr = dmask;
asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2
RIGHT_COMPUTE_S2 RIGHT_RESULT_S2
asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2 RIGHT_COMPUTE_S2
RIGHT_RESULT_S2
: [din0_ptr] "+r"(din0_ptr),
[din1_ptr] "+r"(din1_ptr),
[din2_ptr] "+r"(din2_ptr),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册