未验证 提交 ac3e7709 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge branch 'develop' into develop

...@@ -699,7 +699,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter, ...@@ -699,7 +699,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
: output_data[(output_height - 1) * output_width + j]; : output_data[(output_height - 1) * output_width + j];
} }
} }
#pragma omp parallel for #pragma omp parallel for
for (int i = 1; i < output_height - 1; i++) { for (int i = 1; i < output_height - 1; i++) {
for (int m = 1; (m + 3) < output_width - 1; m = m + 4) { for (int m = 1; (m + 3) < output_width - 1; m = m + 4) {
float *output_ptr = output_data + i * output_width + m; float *output_ptr = output_data + i * output_width + m;
...@@ -1466,6 +1466,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1466,6 +1466,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
Tensor *output, const Tensor *new_scale, Tensor *output, const Tensor *new_scale,
const Tensor *new_bias, bool if_relu) { const Tensor *new_bias, bool if_relu) {
#if __ARM_NEON #if __ARM_NEON
#ifdef _OPENMP
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>(); const float *filter_data = filter->data<float>();
float *output_data = output->data<float>(); float *output_data = output->data<float>();
...@@ -1642,251 +1643,239 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, ...@@ -1642,251 +1643,239 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
} }
} }
// const float *input_data = input->data<float>(); #else
// const float *filter_data = filter->data<float>();
// float *output_data = output->data<float>(); const float *input_data = input->data<float>();
// const float *newscale_data = new_scale->data<float>(); const float *filter_data = filter->data<float>();
// const float *newbias_data = new_bias->data<float>(); float *output_data = output->data<float>();
// const float *newscale_data = new_scale->data<float>();
// float32x4_t vnewbias = vdupq_n_f32(0.0); const float *newbias_data = new_bias->data<float>();
// float32x4_t vnewscale = vdupq_n_f32(1.0);
// float32x4_t vnewbias = vdupq_n_f32(0.0);
// const int in_h = static_cast<int>(input->dims()[2]); float32x4_t vnewscale = vdupq_n_f32(1.0);
// const int in_w = static_cast<int>(input->dims()[3]);
// const int out_h = static_cast<int>(output->dims()[2]); const int in_h = static_cast<int>(input->dims()[2]);
// const int out_w = static_cast<int>(output->dims()[3]); const int in_w = static_cast<int>(input->dims()[3]);
// const int out_l = out_h; const int out_h = static_cast<int>(output->dims()[2]);
// const int in_l = in_h; const int out_w = static_cast<int>(output->dims()[3]);
// const int inhxw = in_h * in_w; const int out_l = out_h;
// const int outhxw = out_h * out_w; const int in_l = in_h;
// const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0; const int inhxw = in_h * in_w;
// const int batch_size = static_cast<int>(input->dims()[0]); const int outhxw = out_h * out_w;
// const int c = static_cast<int>(input->dims()[1]); const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0;
// const float *input_row_ptr; const int batch_size = static_cast<int>(input->dims()[0]);
// float *output_row_ptr; const int c = static_cast<int>(input->dims()[1]);
// const float *input_row_ptr;
// const int w_times = (out_w - 2) / 3; float *output_row_ptr;
//
// float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1]; const int w_times = (out_w - 2) / 3;
// float32x4_t elewise_res0, elewise_res1, elewise_res2, res3;
// int out2in_mid; float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1];
// float32x4_t zero = vdupq_n_f32(0.0); float32x4_t elewise_res0, elewise_res1, elewise_res2, res3;
// for (int b = batch_size; b > 0; --b) { int out2in_mid;
// const float *filter_data_tmp = filter_data; float32x4_t zero = vdupq_n_f32(0.0);
// for (int j = 0; j < c; ++j) { for (int b = batch_size; b > 0; --b) {
// auto output_data_tmp = output_data + j * out_h * out_w; const float *filter_data_tmp = filter_data;
// auto input_data_tmp = input_data + j * in_h * in_w; for (int j = 0; j < c; ++j) {
// auto input_const = input_data_tmp; auto output_data_tmp = output_data + j * out_h * out_w;
// auto input_data_tmp = input_data + j * in_h * in_w;
// vnewbias = vdupq_n_f32(newbias_data[j]); auto input_const = input_data_tmp;
// vnewscale = vdupq_n_f32(newscale_data[j]);
// vnewbias = vdupq_n_f32(newbias_data[j]);
// float w00 = filter_data_tmp[0]; vnewscale = vdupq_n_f32(newscale_data[j]);
// float w01 = filter_data_tmp[1];
// float w02 = filter_data_tmp[2]; float w00 = filter_data_tmp[0];
// float w10 = filter_data_tmp[3]; float w01 = filter_data_tmp[1];
// float w11 = filter_data_tmp[4]; float w02 = filter_data_tmp[2];
// float w12 = filter_data_tmp[5]; float w10 = filter_data_tmp[3];
// float w20 = filter_data_tmp[6]; float w11 = filter_data_tmp[4];
// float w21 = filter_data_tmp[7]; float w12 = filter_data_tmp[5];
// float w22 = filter_data_tmp[8]; float w20 = filter_data_tmp[6];
// float w21 = filter_data_tmp[7];
// int h_mid = 0; float w22 = filter_data_tmp[8];
//
// for (; h_mid < out_h - 1; h_mid++) { int h_mid = 0;
// input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
// output_row_ptr = output_data_tmp + 1 + h_mid * out_w; for (; h_mid < out_h - 1; h_mid++) {
// input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
// for (int w4 = 0; w4 < w_times + 1; w4++) { output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
// if (h_mid == 0) {
// elewise_res1 = zero; for (int w4 = 0; w4 < w_times + 1; w4++) {
// elewise_res0 = zero; if (h_mid == 0) {
// elewise_res2 = zero; elewise_res1 = zero;
// } else { elewise_res0 = zero;
// elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01); elewise_res2 = zero;
// elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00); } else {
// elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02); elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
// } elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
// input_buff_mid = vld2q_f32(input_row_ptr); elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
// input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w); }
// input_buff_mid = vld2q_f32(input_row_ptr);
// elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
// w11); elewise_res0 = vmlaq_n_f32(elewise_res0,
// input_buff_mid.val[0], w10); elewise_res2 = elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11);
// vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12); elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
// elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
// elewise_res1 =
// vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], elewise_res1 =
// w21); vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
// elewise_res0 = elewise_res0 =
// vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20);
// w20); elewise_res2 =
// elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22);
// vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0],
// w22); res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
// vaddq_f32(elewise_res0, elewise_res1));
// res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1), res3 = vmlaq_f32(vnewbias, vnewscale, res3);
// vaddq_f32(elewise_res0, elewise_res1));
// res3 = vmlaq_f32(vnewbias, vnewscale, res3); if (if_relu) {
// res3 = vmaxq_f32(res3, zero);
// if (if_relu) { }
// res3 = vmaxq_f32(res3, zero); vst1q_f32(output_row_ptr, res3);
// }
// vst1q_f32(output_row_ptr, res3); input_row_ptr += 6;
// output_row_ptr += 3;
// input_row_ptr += 6; }
// output_row_ptr += 3; }
// } clock();
// }
// clock(); input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
// output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
// input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
// output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
//
// for (int w4 = 0; w4 < w_times + 1; w4++) {
// elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
// elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
// elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
//
// input_buff_mid = vld2q_f32(input_row_ptr);
// input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
//
// elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1],
// w11); elewise_res0 = vmlaq_n_f32(elewise_res0,
// input_buff_mid.val[0], w10); elewise_res2 =
// vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
//
// if (!if_pad) {
// elewise_res1 =
// vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1],
// w21);
// elewise_res0 =
// vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0],
// w20);
// elewise_res2 =
// vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0],
// w22);
// }
// res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
// vaddq_f32(elewise_res0, elewise_res1));
// res3 = vmlaq_f32(vnewbias, vnewscale, res3);
//
// if (if_relu) {
// res3 = vmaxq_f32(res3, zero);
// }
// if ((w4 != w_times)) {
// vst1q_f32(output_row_ptr, res3);
// } else {
// if (out_l - 2 - w_times * 3 == 1) {
// vst1q_lane_f32(output_row_ptr, res3, 0);
// } else if (out_l - 2 - w_times * 3 == 2) {
// vst1q_lane_f32(output_row_ptr, res3, 0);
// vst1q_lane_f32(output_row_ptr + 1, res3, 1);
// }
// }
// input_row_ptr += 6;
// output_row_ptr += 3;
// }
//
// output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 +
// input_const[in_l] * w21 +
// input_const[in_l + 1] * w22;
//
// out2in_mid = (out_l - 1) * 2;
// output_data_tmp[out_l - 1] =
// w10 * input_const[out2in_mid - 1] + w11 *
// input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w -
// 1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) * (w12
// * input_const[out2in_mid + 1] +
// w22 * input_const[out2in_mid + in_w + 1]);
//
// out2in_mid = (out_l - 1) * 2 * in_w;
//
// output_data_tmp[out_l * (out_l - 1)] =
// w01 * input_const[out2in_mid - in_w] +
// w02 * input_const[out2in_mid - in_w + 1] +
// w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid +
// 1] + (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] +
// w22 * input_const[out2in_mid + in_w + 1]);
// out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2;
//
// output_data_tmp[out_l * out_l - 1] =
// w00 * input_const[out2in_mid - in_w - 1] +
// w01 * input_const[out2in_mid - in_w] +
// w10 * input_const[out2in_mid - 1] + w11 *
// input_const[out2in_mid] + (1 - if_pad) * (w20 *
// input_const[out2in_mid + in_w - 1] +
// w21 * input_const[out2in_mid + in_w] +
// w02 * input_const[out2in_mid - in_w + 1] +
// w12 * input_const[out2in_mid + 1] +
// w22 * input_const[out2in_mid + in_w + 1]);
// output_data_tmp[0] =
// output_data_tmp[0] * newscale_data[j] + newbias_data[j];
// output_data_tmp[out_l - 1] =
// output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j];
// output_data_tmp[out_l * (out_l - 1)] =
// output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] +
// newbias_data[j];
// output_data_tmp[out_l * out_l - 1] =
// output_data_tmp[out_l * out_l - 1] * newscale_data[j] +
// newbias_data[j];
// if (if_relu) {
// output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 :
// output_data_tmp[0]; output_data_tmp[out_l - 1] =
// output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l -
// 1];
// output_data_tmp[out_l * (out_l - 1)] =
// output_data_tmp[out_l * (out_l - 1)] < 0
// ? 0
// : output_data_tmp[out_l * (out_l - 1)];
// output_data_tmp[out_l * out_l - 1] =
// output_data_tmp[out_l * out_l - 1] < 0
// ? 0
// : output_data_tmp[out_l * out_l - 1];
// }
// for (int i = 1; i < out_h - 1; i++) {
// out2in_mid = i * 2 * in_w;
// output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w]
// +
// w02 * input_const[out2in_mid - in_w +
// 1] + w11 * input_const[out2in_mid] +
// w12 * input_const[out2in_mid + 1] +
// w21 * input_const[out2in_mid + in_w]
// + w22 * input_const[out2in_mid + in_w
// + 1];
//
// out2in_mid = i * 2 * in_w + (out_l - 1) * 2;
// output_data_tmp[i * out_l + out_l - 1] =
// w00 * input_const[out2in_mid - in_w - 1] +
// w01 * input_const[out2in_mid - in_w] +
// w10 * input_const[out2in_mid - 1] + w11 *
// input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w
// - 1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) *
// (w02 * input_const[out2in_mid - in_w + 1] +
// w12 * input_const[out2in_mid + 1] +
// w22 * input_const[out2in_mid + in_w + 1]);
// output_data_tmp[i * out_l] =
// output_data_tmp[i * out_l] * newscale_data[j] +
// newbias_data[j];
// output_data_tmp[i * out_l + out_l - 1] =
// output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] +
// newbias_data[j];
// if (if_relu) {
// output_data_tmp[i * out_l] =
// output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i *
// out_l];
// output_data_tmp[i * out_l + out_l - 1] =
// output_data_tmp[i * out_l + out_l - 1] < 0
// ? 0
// : output_data_tmp[i * out_l + out_l - 1];
// }
// }
// filter_data_tmp += 9;
// }
// input_data += inhxw * c;
// output_data += outhxw * c;
// }
for (int w4 = 0; w4 < w_times + 1; w4++) {
elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
input_buff_mid = vld2q_f32(input_row_ptr);
input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11);
elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
if (!if_pad) {
elewise_res1 =
vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
elewise_res0 =
vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20);
elewise_res2 =
vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22);
}
res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
vaddq_f32(elewise_res0, elewise_res1));
res3 = vmlaq_f32(vnewbias, vnewscale, res3);
if (if_relu) {
res3 = vmaxq_f32(res3, zero);
}
if ((w4 != w_times)) {
vst1q_f32(output_row_ptr, res3);
} else {
if (out_l - 2 - w_times * 3 == 1) {
vst1q_lane_f32(output_row_ptr, res3, 0);
} else if (out_l - 2 - w_times * 3 == 2) {
vst1q_lane_f32(output_row_ptr, res3, 0);
vst1q_lane_f32(output_row_ptr + 1, res3, 1);
}
}
input_row_ptr += 6;
output_row_ptr += 3;
}
output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 +
input_const[in_l] * w21 +
input_const[in_l + 1] * w22;
out2in_mid = (out_l - 1) * 2;
output_data_tmp[out_l - 1] =
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] +
(1 - if_pad) * (w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]);
out2in_mid = (out_l - 1) * 2 * in_w;
output_data_tmp[out_l * (out_l - 1)] =
w01 * input_const[out2in_mid - in_w] +
w02 * input_const[out2in_mid - in_w + 1] +
w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] +
(1 - if_pad) * (w21 * input_const[out2in_mid + in_w] +
w22 * input_const[out2in_mid + in_w + 1]);
out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2;
output_data_tmp[out_l * out_l - 1] =
w00 * input_const[out2in_mid - in_w - 1] +
w01 * input_const[out2in_mid - in_w] +
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
(1 - if_pad) * (w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] +
w02 * input_const[out2in_mid - in_w + 1] +
w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]);
output_data_tmp[0] =
output_data_tmp[0] * newscale_data[j] + newbias_data[j];
output_data_tmp[out_l - 1] =
output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j];
output_data_tmp[out_l * (out_l - 1)] =
output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] +
newbias_data[j];
output_data_tmp[out_l * out_l - 1] =
output_data_tmp[out_l * out_l - 1] * newscale_data[j] +
newbias_data[j];
if (if_relu) {
output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 : output_data_tmp[0];
output_data_tmp[out_l - 1] =
output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l - 1];
output_data_tmp[out_l * (out_l - 1)] =
output_data_tmp[out_l * (out_l - 1)] < 0
? 0
: output_data_tmp[out_l * (out_l - 1)];
output_data_tmp[out_l * out_l - 1] =
output_data_tmp[out_l * out_l - 1] < 0
? 0
: output_data_tmp[out_l * out_l - 1];
}
for (int i = 1; i < out_h - 1; i++) {
out2in_mid = i * 2 * in_w;
output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w] +
w02 * input_const[out2in_mid - in_w + 1] +
w11 * input_const[out2in_mid] +
w12 * input_const[out2in_mid + 1] +
w21 * input_const[out2in_mid + in_w] +
w22 * input_const[out2in_mid + in_w + 1];
out2in_mid = i * 2 * in_w + (out_l - 1) * 2;
output_data_tmp[i * out_l + out_l - 1] =
w00 * input_const[out2in_mid - in_w - 1] +
w01 * input_const[out2in_mid - in_w] +
w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
w20 * input_const[out2in_mid + in_w - 1] +
w21 * input_const[out2in_mid + in_w] +
(1 - if_pad) * (w02 * input_const[out2in_mid - in_w + 1] +
w12 * input_const[out2in_mid + 1] +
w22 * input_const[out2in_mid + in_w + 1]);
output_data_tmp[i * out_l] =
output_data_tmp[i * out_l] * newscale_data[j] + newbias_data[j];
output_data_tmp[i * out_l + out_l - 1] =
output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] +
newbias_data[j];
if (if_relu) {
output_data_tmp[i * out_l] =
output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i * out_l];
output_data_tmp[i * out_l + out_l - 1] =
output_data_tmp[i * out_l + out_l - 1] < 0
? 0
: output_data_tmp[i * out_l + out_l - 1];
}
}
filter_data_tmp += 9;
}
input_data += inhxw * c;
output_data += outhxw * c;
}
#endif
#endif #endif
} }
......
...@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET) ...@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet paddle-mobile) target_link_libraries(test-mobilenet paddle-mobile)
# gen test
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile)
elseif ("yolo" IN_LIST NET) elseif ("yolo" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h)
...@@ -138,6 +143,10 @@ else () ...@@ -138,6 +143,10 @@ else ()
ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenetssd paddle-mobile) target_link_libraries(test-mobilenetssd paddle-mobile)
# gen test
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile) target_link_libraries(test-sigmoid paddle-mobile)
......
...@@ -44,5 +44,8 @@ int main() { ...@@ -44,5 +44,8 @@ int main() {
<< std::endl; << std::endl;
} }
std::cout
<< "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
<< std::endl;
return 0; return 0;
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
std::string(g_mobilenet_combined) + "/params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
// 预热一次
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
std::cout
<< "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
<< std::endl;
return 0;
}
...@@ -27,6 +27,7 @@ limitations under the License. */ ...@@ -27,6 +27,7 @@ limitations under the License. */
static const char *g_ocr = "../models/ocr"; static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_mobilenet_combined = "../models/mobilenet_combine";
static const char *g_squeezenet = "../models/squeezenet"; static const char *g_squeezenet = "../models/squeezenet";
static const char *g_googlenet = "../models/googlenet"; static const char *g_googlenet = "../models/googlenet";
static const char *g_mobilenet = "../models/mobilenet"; static const char *g_mobilenet = "../models/mobilenet";
......
...@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET) ...@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(SOFTMAX_OP ON) set(MUL_OP ON)
set(DEPTHWISECONV_OP ON) set(DEPTHWISECONV_OP ON)
set(BATCHNORM_OP ON) set(BATCHNORM_OP ON)
set(POOL_OP ON) set(POOL_OP ON)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册