提交 ff8c95d8 编写于 作者: C chenjiaoAngel

fxi foormat test=develop

上级 7b282a0a
...@@ -2631,7 +2631,7 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout, ...@@ -2631,7 +2631,7 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
int cnt = cnt_col; int cnt = cnt_col;
asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU
MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1 MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1
RIGHT_RESULT_S1_LEAKY_RELU RIGHT_RESULT_S1_LEAKY_RELU
: [cnt] "+r"(cnt), : [cnt] "+r"(cnt),
[din_ptr0] "+r"(din_ptr0), [din_ptr0] "+r"(din_ptr0),
[din_ptr1] "+r"(din_ptr1), [din_ptr1] "+r"(din_ptr1),
...@@ -2729,7 +2729,7 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout, ...@@ -2729,7 +2729,7 @@ void conv_depthwise_3x3s1p1_bias_leakyRelu(float *dout,
unsigned int *vmask_ptr = vmask; unsigned int *vmask_ptr = vmask;
asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU asm volatile(INIT_S1 LEFT_COMPUTE_S1 LEFT_RESULT_S1_LEAKY_RELU
MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1 MID_COMPUTE_S1 MID_RESULT_S1_LEAKY_RELU RIGHT_COMPUTE_S1
RIGHT_RESULT_S1_LEAKY_RELU RIGHT_RESULT_S1_LEAKY_RELU
: [dout_ptr1] "+r"(doutr0), : [dout_ptr1] "+r"(doutr0),
[dout_ptr2] "+r"(doutr1), [dout_ptr2] "+r"(doutr1),
[din0_ptr] "+r"(din_ptr0), [din0_ptr] "+r"(din_ptr0),
......
...@@ -1747,53 +1747,52 @@ void conv_depthwise_3x3s2p1_bias_leakyRelu(float* dout, ...@@ -1747,53 +1747,52 @@ void conv_depthwise_3x3s2p1_bias_leakyRelu(float* dout,
doutr1_ptr = write_ptr; doutr1_ptr = write_ptr;
} }
int cnt = cnt_col; int cnt = cnt_col;
asm volatile( asm volatile(INIT_S2 LEFT_COMPUTE_S2 LEFT_RESULT_S2_LEAKY_RELU
INIT_S2 LEFT_COMPUTE_S2 LEFT_RESULT_S2_LEAKY_RELU MID_COMPUTE_S2 MID_RESULT_S2_LEAKY_RELU
MID_COMPUTE_S2 MID_RESULT_S2_LEAKY_RELU RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_LEAKY_RELU
RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_LEAKY_RELU : [inptr0] "+r"(din0_ptr),
: [inptr0] "+r"(din0_ptr), [inptr1] "+r"(din1_ptr),
[inptr1] "+r"(din1_ptr), [inptr2] "+r"(din2_ptr),
[inptr2] "+r"(din2_ptr), [inptr3] "+r"(din3_ptr),
[inptr3] "+r"(din3_ptr), [inptr4] "+r"(din4_ptr),
[inptr4] "+r"(din4_ptr), [outptr0] "+r"(doutr0_ptr),
[outptr0] "+r"(doutr0_ptr), [outptr1] "+r"(doutr1_ptr),
[outptr1] "+r"(doutr1_ptr), [cnt] "+r"(cnt)
[cnt] "+r"(cnt) : [vzero] "w"(vzero),
: [vzero] "w"(vzero), [w0] "w"(wr0),
[w0] "w"(wr0), [w1] "w"(wr1),
[w1] "w"(wr1), [w2] "w"(wr2),
[w2] "w"(wr2), [remain] "r"(cnt_remain),
[remain] "r"(cnt_remain), [scale_ptr] "r"(scale),
[scale_ptr] "r"(scale), [mask1] "w"(vmask_rp1),
[mask1] "w"(vmask_rp1), [mask2] "w"(vmask_rp2),
[mask2] "w"(vmask_rp2), [wmask] "w"(wmask),
[wmask] "w"(wmask), [vbias] "w"(wbias)
[vbias] "w"(wbias) : "cc",
: "cc", "memory",
"memory", "v0",
"v0", "v1",
"v1", "v2",
"v2", "v3",
"v3", "v4",
"v4", "v5",
"v5", "v6",
"v6", "v7",
"v7", "v8",
"v8", "v9",
"v9", "v10",
"v10", "v11",
"v11", "v12",
"v12", "v13",
"v13", "v14",
"v14", "v15",
"v15", "v16",
"v16", "v17",
"v17", "v18",
"v18", "v19",
"v19", "v20",
"v20", "v21",
"v21", "v22");
"v22");
doutr0 = doutr0 + 2 * w_out; doutr0 = doutr0 + 2 * w_out;
} }
#else #else
...@@ -1830,36 +1829,36 @@ void conv_depthwise_3x3s2p1_bias_leakyRelu(float* dout, ...@@ -1830,36 +1829,36 @@ void conv_depthwise_3x3s2p1_bias_leakyRelu(float* dout,
} }
int cnt = cnt_col; int cnt = cnt_col;
unsigned int* mask_ptr = dmask; unsigned int* mask_ptr = dmask;
asm volatile( asm volatile(INIT_S2 LEFT_COMPUTE_S2 LEFT_RESULT_S2_LEAKY_RELU
INIT_S2 LEFT_COMPUTE_S2 LEFT_RESULT_S2_LEAKY_RELU MID_COMPUTE_S2 MID_COMPUTE_S2 MID_RESULT_S2_LEAKY_RELU
MID_RESULT_S2_LEAKY_RELU RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_LEAKY_RELU RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_LEAKY_RELU
: [din0_ptr] "+r"(din0_ptr), : [din0_ptr] "+r"(din0_ptr),
[din1_ptr] "+r"(din1_ptr), [din1_ptr] "+r"(din1_ptr),
[din2_ptr] "+r"(din2_ptr), [din2_ptr] "+r"(din2_ptr),
[outptr] "+r"(doutr0_ptr), [outptr] "+r"(doutr0_ptr),
[cnt] "+r"(cnt), [cnt] "+r"(cnt),
[mask_ptr] "+r"(mask_ptr) [mask_ptr] "+r"(mask_ptr)
: [remain] "r"(cnt_remain), : [remain] "r"(cnt_remain),
[wr0] "w"(wr0), [wr0] "w"(wr0),
[wr1] "w"(wr1), [wr1] "w"(wr1),
[wr2] "w"(wr2), [wr2] "w"(wr2),
[scale_ptr] "r"(scale), [scale_ptr] "r"(scale),
[bias] "r"(bias_c) [bias] "r"(bias_c)
: "cc", : "cc",
"memory", "memory",
"q3", "q3",
"q4", "q4",
"q5", "q5",
"q6", "q6",
"q7", "q7",
"q8", "q8",
"q9", "q9",
"q10", "q10",
"q11", "q11",
"q12", "q12",
"q13", "q13",
"q14", "q14",
"q15"); "q15");
doutr0 = doutr0 + w_out; doutr0 = doutr0 + w_out;
} }
#endif #endif
...@@ -2349,8 +2348,8 @@ void conv_depthwise_3x3s2p0_bias_relu6(float* dout, ...@@ -2349,8 +2348,8 @@ void conv_depthwise_3x3s2p0_bias_relu6(float* dout,
} }
int cnt = tile_w; int cnt = tile_w;
unsigned int* mask_ptr = dmask; unsigned int* mask_ptr = dmask;
asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2_RELU6 asm volatile(INIT_S2 MID_COMPUTE_S2 MID_RESULT_S2_RELU6 RIGHT_COMPUTE_S2
RIGHT_COMPUTE_S2 RIGHT_RESULT_S2_RELU6 RIGHT_RESULT_S2_RELU6
: [din0_ptr] "+r"(din0_ptr), : [din0_ptr] "+r"(din0_ptr),
[din1_ptr] "+r"(din1_ptr), [din1_ptr] "+r"(din1_ptr),
[din2_ptr] "+r"(din2_ptr), [din2_ptr] "+r"(din2_ptr),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册