提交 149870a2 编写于 作者: E eclipsess

dws2 if pad

上级 6e90d1a1
...@@ -1275,7 +1275,8 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1275,7 +1275,8 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
const int inhxw = in_h * in_w; const int inhxw = in_h * in_w;
const int outhxw = out_h * out_w; const int outhxw = out_h * out_w;
/// todo : fix if_pad when w != h /// todo : fix if_pad when w != h
const int if_pad = in_w - 1 == (out_w - 1) * 2 ? 1 : 0; const int if_pad_r = in_w - 1 == (out_w - 1) * 2 ? 1 : 0;
const int if_pad_b = in_h - 1 == (out_h - 1) * 2 ? 1 : 0;
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int c = static_cast<int>(input->dims()[1]); const int c = static_cast<int>(input->dims()[1]);
const float *input_row_ptr; const float *input_row_ptr;
...@@ -1366,7 +1367,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1366,7 +1367,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10); elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
if (!if_pad) { if (!if_pad_b) {
elewise_res1 = elewise_res1 =
vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21); vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
elewise_res0 = elewise_res0 =
...@@ -1401,7 +1402,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1401,7 +1402,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
w20 * input_const[out2in_mid + in_w - 1] + w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] + w21 * input_const[out2in_mid + in_w] +
(1 - if_pad) * (w12 * input_const[out2in_mid + 1] + (1 - if_pad_r) * (w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]); w22 * input_const[out2in_mid + in_w + 1]);
out2in_mid = (out_h - 1) * 2 * in_w; out2in_mid = (out_h - 1) * 2 * in_w;
...@@ -1410,7 +1411,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1410,7 +1411,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
w01 * input_const[out2in_mid - in_w] + w01 * input_const[out2in_mid - in_w] +
w02 * input_const[out2in_mid - in_w + 1] + w02 * input_const[out2in_mid - in_w + 1] +
w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] + w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] +
(1 - if_pad) * (w21 * input_const[out2in_mid + in_w] + (1 - if_pad_b) * (w21 * input_const[out2in_mid + in_w] +
w22 * input_const[out2in_mid + in_w + 1]); w22 * input_const[out2in_mid + in_w + 1]);
out2in_mid = (out_h - 1) * 2 * in_w + (out_w - 1) * 2; out2in_mid = (out_h - 1) * 2 * in_w + (out_w - 1) * 2;
...@@ -1418,11 +1419,12 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1418,11 +1419,12 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
w00 * input_const[out2in_mid - in_w - 1] + w00 * input_const[out2in_mid - in_w - 1] +
w01 * input_const[out2in_mid - in_w] + w01 * input_const[out2in_mid - in_w] +
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
(1 - if_pad) * (w20 * input_const[out2in_mid + in_w - 1] + (1 - if_pad_r) * (w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] + w21 * input_const[out2in_mid + in_w]) +
w02 * input_const[out2in_mid - in_w + 1] + (1 - if_pad_b) * (w02 * input_const[out2in_mid - in_w + 1] +
w12 * input_const[out2in_mid + 1] + w12 * input_const[out2in_mid + 1]) +
w22 * input_const[out2in_mid + in_w + 1]); (1 - if_pad_r) * (1 - if_pad_b) * w22 *
input_const[out2in_mid + in_w + 1];
if (if_bias) { if (if_bias) {
output_data_tmp[0] += bias_data[j]; output_data_tmp[0] += bias_data[j];
output_data_tmp[out_w - 1] += bias_data[j]; output_data_tmp[out_w - 1] += bias_data[j];
...@@ -1445,7 +1447,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1445,7 +1447,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
w20 * input_const[out2in_mid + in_w - 1] + w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] + w21 * input_const[out2in_mid + in_w] +
(1 - if_pad) * (w02 * input_const[out2in_mid - in_w + 1] + (1 - if_pad_r) * (w02 * input_const[out2in_mid - in_w + 1] +
w12 * input_const[out2in_mid + 1] + w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]); w22 * input_const[out2in_mid + in_w + 1]);
if (if_bias) { if (if_bias) {
...@@ -1662,7 +1664,8 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1662,7 +1664,8 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
const int inhxw = in_h * in_w; const int inhxw = in_h * in_w;
const int outhxw = out_h * out_w; const int outhxw = out_h * out_w;
/// todo : fix if_pad when w != h /// todo : fix if_pad when w != h
const int if_pad = in_w - 1 == (out_w - 1) * 2 ? 1 : 0; const int if_pad_r = in_w - 1 == (out_w - 1) * 2 ? 1 : 0;
const int if_pad_b = in_h - 1 == (out_h - 1) * 2 ? 1 : 0;
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int c = static_cast<int>(input->dims()[1]); const int c = static_cast<int>(input->dims()[1]);
const int w_times = (out_w - 2) / 3; const int w_times = (out_w - 2) / 3;
...@@ -1756,7 +1759,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1756,7 +1759,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10); elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
if (!if_pad) { if (!if_pad_b) {
elewise_res1 = elewise_res1 =
vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21); vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
elewise_res0 = elewise_res0 =
...@@ -1796,7 +1799,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1796,7 +1799,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
w20 * input_const[out2in_mid + in_w - 1] + w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] + w21 * input_const[out2in_mid + in_w] +
(1 - if_pad) * (w12 * input_const[out2in_mid + 1] + (1 - if_pad_r) * (w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]); w22 * input_const[out2in_mid + in_w + 1]);
out2in_mid = (out_h - 1) * 2 * in_w; out2in_mid = (out_h - 1) * 2 * in_w;
...@@ -1805,7 +1808,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1805,7 +1808,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
w01 * input_const[out2in_mid - in_w] + w01 * input_const[out2in_mid - in_w] +
w02 * input_const[out2in_mid - in_w + 1] + w02 * input_const[out2in_mid - in_w + 1] +
w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] + w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] +
(1 - if_pad) * (w21 * input_const[out2in_mid + in_w] + (1 - if_pad_b) * (w21 * input_const[out2in_mid + in_w] +
w22 * input_const[out2in_mid + in_w + 1]); w22 * input_const[out2in_mid + in_w + 1]);
out2in_mid = (out_h - 1) * 2 * in_w + (out_w - 1) * 2; out2in_mid = (out_h - 1) * 2 * in_w + (out_w - 1) * 2;
...@@ -1813,11 +1816,12 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1813,11 +1816,12 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
w00 * input_const[out2in_mid - in_w - 1] + w00 * input_const[out2in_mid - in_w - 1] +
w01 * input_const[out2in_mid - in_w] + w01 * input_const[out2in_mid - in_w] +
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
(1 - if_pad) * (w20 * input_const[out2in_mid + in_w - 1] + (1 - if_pad_r) * (w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] + w21 * input_const[out2in_mid + in_w]) +
w02 * input_const[out2in_mid - in_w + 1] + (1 - if_pad_b) * (w02 * input_const[out2in_mid - in_w + 1] +
w12 * input_const[out2in_mid + 1] + w12 * input_const[out2in_mid + 1]) +
w22 * input_const[out2in_mid + in_w + 1]); (1 - if_pad_r) * (1 - if_pad_b) * w22 *
input_const[out2in_mid + in_w + 1];
output_data_tmp[0] = output_data_tmp[0] =
output_data_tmp[0] * newscale_data[j] + newbias_data[j]; output_data_tmp[0] * newscale_data[j] + newbias_data[j];
output_data_tmp[out_w - 1] = output_data_tmp[out_w - 1] =
...@@ -1857,7 +1861,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1857,7 +1861,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
w20 * input_const[out2in_mid + in_w - 1] + w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] + w21 * input_const[out2in_mid + in_w] +
(1 - if_pad) * (w02 * input_const[out2in_mid - in_w + 1] + (1 - if_pad_r) * (w02 * input_const[out2in_mid - in_w + 1] +
w12 * input_const[out2in_mid + 1] + w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]); w22 * input_const[out2in_mid + in_w + 1]);
output_data_tmp[i * out_w] = output_data_tmp[i * out_w] =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册