diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index 1ca3797882807ae5f12b16483d90e359da6dfb99..c93278a661f72152debcef7066bdd751bccc5b4e 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -699,7 +699,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, : output_data[(output_height - 1) * output_width + j]; } } - #pragma omp parallel for +#pragma omp parallel for for (int i = 1; i < output_height - 1; i++) { for (int m = 1; (m + 3) < output_width - 1; m = m + 4) { float *output_ptr = output_data + i * output_width + m; @@ -1466,6 +1466,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu) { #if __ARM_NEON +#ifdef _OPENMP const float *input_data = input->data(); const float *filter_data = filter->data(); float *output_data = output->data(); @@ -1642,251 +1643,239 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, } } - // const float *input_data = input->data(); - // const float *filter_data = filter->data(); - // float *output_data = output->data(); - // const float *newscale_data = new_scale->data(); - // const float *newbias_data = new_bias->data(); - // - // float32x4_t vnewbias = vdupq_n_f32(0.0); - // float32x4_t vnewscale = vdupq_n_f32(1.0); - // - // const int in_h = static_cast(input->dims()[2]); - // const int in_w = static_cast(input->dims()[3]); - // const int out_h = static_cast(output->dims()[2]); - // const int out_w = static_cast(output->dims()[3]); - // const int out_l = out_h; - // const int in_l = in_h; - // const int inhxw = in_h * in_w; - // const int outhxw = out_h * out_w; - // const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0; - // const int batch_size = static_cast(input->dims()[0]); - // const int c = static_cast(input->dims()[1]); - // const float *input_row_ptr; - // float *output_row_ptr; - // - // const int w_times = (out_w - 2) / 3; - // - // float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1]; - // float32x4_t elewise_res0, elewise_res1, elewise_res2, res3; - // int out2in_mid; - // float32x4_t zero = vdupq_n_f32(0.0); - // for (int b = batch_size; b > 0; --b) { - // const float *filter_data_tmp = filter_data; - // for (int j = 0; j < c; ++j) { - // auto output_data_tmp = output_data + j * out_h * out_w; - // auto input_data_tmp = input_data + j * in_h * in_w; - // auto input_const = input_data_tmp; - // - // vnewbias = vdupq_n_f32(newbias_data[j]); - // vnewscale = vdupq_n_f32(newscale_data[j]); - // - // float w00 = filter_data_tmp[0]; - // float w01 = filter_data_tmp[1]; - // float w02 = filter_data_tmp[2]; - // float w10 = filter_data_tmp[3]; - // float w11 = filter_data_tmp[4]; - // float w12 = filter_data_tmp[5]; - // float w20 = filter_data_tmp[6]; - // float w21 = filter_data_tmp[7]; - // float w22 = filter_data_tmp[8]; - // - // int h_mid = 0; - // - // for (; h_mid < out_h - 1; h_mid++) { - // input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w; - // output_row_ptr = output_data_tmp + 1 + h_mid * out_w; - // - // for (int w4 = 0; w4 < w_times + 1; w4++) { - // if (h_mid == 0) { - // elewise_res1 = zero; - // elewise_res0 = zero; - // elewise_res2 = zero; - // } else { - // elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01); - // elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00); - // elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02); - // } - // input_buff_mid = vld2q_f32(input_row_ptr); - // input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w); - // - // elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], - // w11); elewise_res0 = vmlaq_n_f32(elewise_res0, - // input_buff_mid.val[0], w10); elewise_res2 = - // vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); - // - // elewise_res1 = - // vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], - // w21); - // elewise_res0 = - // vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], - // w20); - // elewise_res2 = - // vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], - // w22); - // - // res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1), - // vaddq_f32(elewise_res0, elewise_res1)); - // res3 = vmlaq_f32(vnewbias, vnewscale, res3); - // - // if (if_relu) { - // res3 = vmaxq_f32(res3, zero); - // } - // vst1q_f32(output_row_ptr, res3); - // - // input_row_ptr += 6; - // output_row_ptr += 3; - // } - // } - // clock(); - // - // input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w; - // output_row_ptr = output_data_tmp + 1 + h_mid * out_w; - // - // for (int w4 = 0; w4 < w_times + 1; w4++) { - // elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01); - // elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00); - // elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02); - // - // input_buff_mid = vld2q_f32(input_row_ptr); - // input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w); - // - // elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], - // w11); elewise_res0 = vmlaq_n_f32(elewise_res0, - // input_buff_mid.val[0], w10); elewise_res2 = - // vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); - // - // if (!if_pad) { - // elewise_res1 = - // vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], - // w21); - // elewise_res0 = - // vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], - // w20); - // elewise_res2 = - // vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], - // w22); - // } - // res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1), - // vaddq_f32(elewise_res0, elewise_res1)); - // res3 = vmlaq_f32(vnewbias, vnewscale, res3); - // - // if (if_relu) { - // res3 = vmaxq_f32(res3, zero); - // } - // if ((w4 != w_times)) { - // vst1q_f32(output_row_ptr, res3); - // } else { - // if (out_l - 2 - w_times * 3 == 1) { - // vst1q_lane_f32(output_row_ptr, res3, 0); - // } else if (out_l - 2 - w_times * 3 == 2) { - // vst1q_lane_f32(output_row_ptr, res3, 0); - // vst1q_lane_f32(output_row_ptr + 1, res3, 1); - // } - // } - // input_row_ptr += 6; - // output_row_ptr += 3; - // } - // - // output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 + - // input_const[in_l] * w21 + - // input_const[in_l + 1] * w22; - // - // out2in_mid = (out_l - 1) * 2; - // output_data_tmp[out_l - 1] = - // w10 * input_const[out2in_mid - 1] + w11 * - // input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w - - // 1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) * (w12 - // * input_const[out2in_mid + 1] + - // w22 * input_const[out2in_mid + in_w + 1]); - // - // out2in_mid = (out_l - 1) * 2 * in_w; - // - // output_data_tmp[out_l * (out_l - 1)] = - // w01 * input_const[out2in_mid - in_w] + - // w02 * input_const[out2in_mid - in_w + 1] + - // w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + - // 1] + (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] + - // w22 * input_const[out2in_mid + in_w + 1]); - // out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2; - // - // output_data_tmp[out_l * out_l - 1] = - // w00 * input_const[out2in_mid - in_w - 1] + - // w01 * input_const[out2in_mid - in_w] + - // w10 * input_const[out2in_mid - 1] + w11 * - // input_const[out2in_mid] + (1 - if_pad) * (w20 * - // input_const[out2in_mid + in_w - 1] + - // w21 * input_const[out2in_mid + in_w] + - // w02 * input_const[out2in_mid - in_w + 1] + - // w12 * input_const[out2in_mid + 1] + - // w22 * input_const[out2in_mid + in_w + 1]); - // output_data_tmp[0] = - // output_data_tmp[0] * newscale_data[j] + newbias_data[j]; - // output_data_tmp[out_l - 1] = - // output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j]; - // output_data_tmp[out_l * (out_l - 1)] = - // output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] + - // newbias_data[j]; - // output_data_tmp[out_l * out_l - 1] = - // output_data_tmp[out_l * out_l - 1] * newscale_data[j] + - // newbias_data[j]; - // if (if_relu) { - // output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 : - // output_data_tmp[0]; output_data_tmp[out_l - 1] = - // output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l - - // 1]; - // output_data_tmp[out_l * (out_l - 1)] = - // output_data_tmp[out_l * (out_l - 1)] < 0 - // ? 0 - // : output_data_tmp[out_l * (out_l - 1)]; - // output_data_tmp[out_l * out_l - 1] = - // output_data_tmp[out_l * out_l - 1] < 0 - // ? 0 - // : output_data_tmp[out_l * out_l - 1]; - // } - // for (int i = 1; i < out_h - 1; i++) { - // out2in_mid = i * 2 * in_w; - // output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w] - // + - // w02 * input_const[out2in_mid - in_w + - // 1] + w11 * input_const[out2in_mid] + - // w12 * input_const[out2in_mid + 1] + - // w21 * input_const[out2in_mid + in_w] - // + w22 * input_const[out2in_mid + in_w - // + 1]; - // - // out2in_mid = i * 2 * in_w + (out_l - 1) * 2; - // output_data_tmp[i * out_l + out_l - 1] = - // w00 * input_const[out2in_mid - in_w - 1] + - // w01 * input_const[out2in_mid - in_w] + - // w10 * input_const[out2in_mid - 1] + w11 * - // input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w - // - 1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) * - // (w02 * input_const[out2in_mid - in_w + 1] + - // w12 * input_const[out2in_mid + 1] + - // w22 * input_const[out2in_mid + in_w + 1]); - // output_data_tmp[i * out_l] = - // output_data_tmp[i * out_l] * newscale_data[j] + - // newbias_data[j]; - // output_data_tmp[i * out_l + out_l - 1] = - // output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] + - // newbias_data[j]; - // if (if_relu) { - // output_data_tmp[i * out_l] = - // output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i * - // out_l]; - // output_data_tmp[i * out_l + out_l - 1] = - // output_data_tmp[i * out_l + out_l - 1] < 0 - // ? 0 - // : output_data_tmp[i * out_l + out_l - 1]; - // } - // } - // filter_data_tmp += 9; - // } - // input_data += inhxw * c; - // output_data += outhxw * c; - // } +#else + + const float *input_data = input->data(); + const float *filter_data = filter->data(); + float *output_data = output->data(); + const float *newscale_data = new_scale->data(); + const float *newbias_data = new_bias->data(); + + float32x4_t vnewbias = vdupq_n_f32(0.0); + float32x4_t vnewscale = vdupq_n_f32(1.0); + + const int in_h = static_cast(input->dims()[2]); + const int in_w = static_cast(input->dims()[3]); + const int out_h = static_cast(output->dims()[2]); + const int out_w = static_cast(output->dims()[3]); + const int out_l = out_h; + const int in_l = in_h; + const int inhxw = in_h * in_w; + const int outhxw = out_h * out_w; + const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0; + const int batch_size = static_cast(input->dims()[0]); + const int c = static_cast(input->dims()[1]); + const float *input_row_ptr; + float *output_row_ptr; + + const int w_times = (out_w - 2) / 3; + + float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1]; + float32x4_t elewise_res0, elewise_res1, elewise_res2, res3; + int out2in_mid; + float32x4_t zero = vdupq_n_f32(0.0); + for (int b = batch_size; b > 0; --b) { + const float *filter_data_tmp = filter_data; + for (int j = 0; j < c; ++j) { + auto output_data_tmp = output_data + j * out_h * out_w; + auto input_data_tmp = input_data + j * in_h * in_w; + auto input_const = input_data_tmp; + + vnewbias = vdupq_n_f32(newbias_data[j]); + vnewscale = vdupq_n_f32(newscale_data[j]); + + float w00 = filter_data_tmp[0]; + float w01 = filter_data_tmp[1]; + float w02 = filter_data_tmp[2]; + float w10 = filter_data_tmp[3]; + float w11 = filter_data_tmp[4]; + float w12 = filter_data_tmp[5]; + float w20 = filter_data_tmp[6]; + float w21 = filter_data_tmp[7]; + float w22 = filter_data_tmp[8]; + + int h_mid = 0; + + for (; h_mid < out_h - 1; h_mid++) { + input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w; + output_row_ptr = output_data_tmp + 1 + h_mid * out_w; + + for (int w4 = 0; w4 < w_times + 1; w4++) { + if (h_mid == 0) { + elewise_res1 = zero; + elewise_res0 = zero; + elewise_res2 = zero; + } else { + elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01); + elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00); + elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02); + } + input_buff_mid = vld2q_f32(input_row_ptr); + input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w); + + elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11); + elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10); + elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); + + elewise_res1 = + vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21); + elewise_res0 = + vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20); + elewise_res2 = + vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22); + + res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1), + vaddq_f32(elewise_res0, elewise_res1)); + res3 = vmlaq_f32(vnewbias, vnewscale, res3); + + if (if_relu) { + res3 = vmaxq_f32(res3, zero); + } + vst1q_f32(output_row_ptr, res3); + + input_row_ptr += 6; + output_row_ptr += 3; + } + } + clock(); + + input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w; + output_row_ptr = output_data_tmp + 1 + h_mid * out_w; + + for (int w4 = 0; w4 < w_times + 1; w4++) { + elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01); + elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00); + elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02); + + input_buff_mid = vld2q_f32(input_row_ptr); + input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w); + + elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11); + elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10); + elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); + + if (!if_pad) { + elewise_res1 = + vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21); + elewise_res0 = + vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20); + elewise_res2 = + vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22); + } + res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1), + vaddq_f32(elewise_res0, elewise_res1)); + res3 = vmlaq_f32(vnewbias, vnewscale, res3); + + if (if_relu) { + res3 = vmaxq_f32(res3, zero); + } + if ((w4 != w_times)) { + vst1q_f32(output_row_ptr, res3); + } else { + if (out_l - 2 - w_times * 3 == 1) { + vst1q_lane_f32(output_row_ptr, res3, 0); + } else if (out_l - 2 - w_times * 3 == 2) { + vst1q_lane_f32(output_row_ptr, res3, 0); + vst1q_lane_f32(output_row_ptr + 1, res3, 1); + } + } + input_row_ptr += 6; + output_row_ptr += 3; + } + + output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 + + input_const[in_l] * w21 + + input_const[in_l + 1] * w22; + + out2in_mid = (out_l - 1) * 2; + output_data_tmp[out_l - 1] = + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + + w20 * input_const[out2in_mid + in_w - 1] + + w21 * input_const[out2in_mid + in_w] + + (1 - if_pad) * (w12 * input_const[out2in_mid + 1] + + w22 * input_const[out2in_mid + in_w + 1]); + + out2in_mid = (out_l - 1) * 2 * in_w; + + output_data_tmp[out_l * (out_l - 1)] = + w01 * input_const[out2in_mid - in_w] + + w02 * input_const[out2in_mid - in_w + 1] + + w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] + + (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] + + w22 * input_const[out2in_mid + in_w + 1]); + out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2; + + output_data_tmp[out_l * out_l - 1] = + w00 * input_const[out2in_mid - in_w - 1] + + w01 * input_const[out2in_mid - in_w] + + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + + (1 - if_pad) * (w20 * input_const[out2in_mid + in_w - 1] + + w21 * input_const[out2in_mid + in_w] + + w02 * input_const[out2in_mid - in_w + 1] + + w12 * input_const[out2in_mid + 1] + + w22 * input_const[out2in_mid + in_w + 1]); + output_data_tmp[0] = + output_data_tmp[0] * newscale_data[j] + newbias_data[j]; + output_data_tmp[out_l - 1] = + output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j]; + output_data_tmp[out_l * (out_l - 1)] = + output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] + + newbias_data[j]; + output_data_tmp[out_l * out_l - 1] = + output_data_tmp[out_l * out_l - 1] * newscale_data[j] + + newbias_data[j]; + if (if_relu) { + output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 : output_data_tmp[0]; + output_data_tmp[out_l - 1] = + output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l - 1]; + output_data_tmp[out_l * (out_l - 1)] = + output_data_tmp[out_l * (out_l - 1)] < 0 + ? 0 + : output_data_tmp[out_l * (out_l - 1)]; + output_data_tmp[out_l * out_l - 1] = + output_data_tmp[out_l * out_l - 1] < 0 + ? 0 + : output_data_tmp[out_l * out_l - 1]; + } + for (int i = 1; i < out_h - 1; i++) { + out2in_mid = i * 2 * in_w; + output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w] + + w02 * input_const[out2in_mid - in_w + 1] + + w11 * input_const[out2in_mid] + + w12 * input_const[out2in_mid + 1] + + w21 * input_const[out2in_mid + in_w] + + w22 * input_const[out2in_mid + in_w + 1]; + out2in_mid = i * 2 * in_w + (out_l - 1) * 2; + output_data_tmp[i * out_l + out_l - 1] = + w00 * input_const[out2in_mid - in_w - 1] + + w01 * input_const[out2in_mid - in_w] + + w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] + + w20 * input_const[out2in_mid + in_w - 1] + + w21 * input_const[out2in_mid + in_w] + + (1 - if_pad) * (w02 * input_const[out2in_mid - in_w + 1] + + w12 * input_const[out2in_mid + 1] + + w22 * input_const[out2in_mid + in_w + 1]); + output_data_tmp[i * out_l] = + output_data_tmp[i * out_l] * newscale_data[j] + newbias_data[j]; + output_data_tmp[i * out_l + out_l - 1] = + output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] + + newbias_data[j]; + if (if_relu) { + output_data_tmp[i * out_l] = + output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i * out_l]; + output_data_tmp[i * out_l + out_l - 1] = + output_data_tmp[i * out_l + out_l - 1] < 0 + ? 0 + : output_data_tmp[i * out_l + out_l - 1]; + } + } + filter_data_tmp += 9; + } + input_data += inhxw * c; + output_data += outhxw * c; + } +#endif #endif } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f4a14f1bc4197051594a0f8609b4662ad4c7cefb..468cbd4ed6d579f7b39f8628a3e052e90ae26644 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenet paddle-mobile) + + # gen test + ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-mobilenet-combine paddle-mobile) + elseif ("yolo" IN_LIST NET) # gen test ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) @@ -138,6 +143,10 @@ else () ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenetssd paddle-mobile) + # gen test + ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-mobilenet-combine paddle-mobile) + # gen test ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) target_link_libraries(test-sigmoid paddle-mobile) diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index d7793f729866024e2560ad13ac5613172eecc4dd..5a3cc43a552ccec34817af2409af98e8db0ec9e5 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -44,5 +44,8 @@ int main() { << std::endl; } + std::cout + << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?" + << std::endl; return 0; } diff --git a/test/net/test_mobilenet_combine.cpp b/test/net/test_mobilenet_combine.cpp new file mode 100644 index 0000000000000000000000000000000000000000..af93d105ea0c290b1dd3a80310a39e0f52c8abaa --- /dev/null +++ b/test/net/test_mobilenet_combine.cpp @@ -0,0 +1,51 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + auto time1 = time(); + if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model", + std::string(g_mobilenet_combined) + "/params", true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + std::vector input; + std::vector dims{1, 3, 224, 224}; + GetInput(g_test_image_1x3x224x224_banana, &input, dims); + + // 预热一次 + auto vec_result = paddle_mobile.Predict(input, dims); + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; + + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); + } + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; + } + std::cout + << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?" + << std::endl; + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index 658af447d6cfcd85c68ff350b104c2468d442e40..f6ad597ab122f4abda2ed255f0ec957c56d3cb46 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -27,6 +27,7 @@ limitations under the License. */ static const char *g_ocr = "../models/ocr"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; +static const char *g_mobilenet_combined = "../models/mobilenet_combine"; static const char *g_squeezenet = "../models/squeezenet"; static const char *g_googlenet = "../models/googlenet"; static const char *g_mobilenet = "../models/mobilenet"; diff --git a/tools/op.cmake b/tools/op.cmake index 6b6cb13dbc49b2a6cd672ea4e637f6650d60f8d2..0eab67267032d3956a52b80ab7494c6572df7074 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET) set(ELEMENTWISEADD_OP ON) set(RELU_OP ON) set(SOFTMAX_OP ON) - set(SOFTMAX_OP ON) + set(MUL_OP ON) set(DEPTHWISECONV_OP ON) set(BATCHNORM_OP ON) set(POOL_OP ON)