diff --git a/python/tools/imagetools/imagetools.py b/python/tools/imagetools/imagetools.py new file mode 100644 index 0000000000000000000000000000000000000000..2a4432858007d6858f2728815670cfd1ed5ec786 --- /dev/null +++ b/python/tools/imagetools/imagetools.py @@ -0,0 +1,61 @@ +# coding=utf-8 +import cv2 +from array import array + + +def resize_take_rgbs(path, shape_h_w): + print '--------------resize_take_rgbs-----------------begin' + image = cv2.imread(path) + # print image.shape + cv2.imshow("before", image) + + print_rgb(image[0, 0]) + # image len may be for .just check it + # image.resize(shape_h_w) + + image = cv2.resize(image, (shape_h_w[0], shape_h_w[1])) + + cv2.imshow("after", image) + print image.shape + height = shape_h_w[0] + width = shape_h_w[1] + + rs_ = [] + gs_ = [] + bs_ = [] + for h in range(0, height): + for w in range(0, width): + bs_.append(image[h, w, 0]) + gs_.append(image[h, w, 1]) + rs_.append(image[h, w, 2]) + + # print image[2, 2, 0]/255. + print len(bs_) + print len(gs_) + print len(rs_) + print '--------------resize_take_rgbs-----------------end' + return bs_, gs_, rs_ + + +def print_rgb((b, g, r)): + print "像素 - R:%d,G:%d,B:%d" % (r, g, b) # 显示像素值 + # + # image[0, 0] = (100, 150, 200) # 更改位置(0,0)处的像素 + # + # (b, g, r) = image[0, 0] # 再次读取(0,0)像素 + # print "位置(0,0)处的像素 - 红:%d,绿:%d,蓝:%d" % (r, g, b) # 显示更改后的像素值 + # + # corner = image[0:100, 0:100] # 读取像素块 + # cv2.imshow("Corner", corner) # 显示读取的像素块 + # + # image[0:100, 0:100] = (0, 255, 0); # 更改读取的像素块 + # + # cv2.imshow("Updated", image) # 显示图像 + # + # cv2.waitKey(0) # 程序暂停 + + +def save_to_file(to_file_name, array): + to_file = open(to_file_name, "wb") + array.tofile(to_file) + to_file.close() diff --git a/python/tools/imagetools/img2nchw.py b/python/tools/imagetools/img2nchw.py new file mode 100644 index 0000000000000000000000000000000000000000..70ca456a1b1b5d20b92d0aaa51b01abb352c1d54 --- /dev/null +++ b/python/tools/imagetools/img2nchw.py @@ -0,0 +1,69 @@ +# coding=utf-8 +import cv2 +from array import array +import imagetools as tools +from enum import Enum + + +class ChannelType(Enum): + RGB = 0, + BGR = 1 + + +def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR): + print '--------------combine_bgrs_nchw-----------------begin' + print "scale: %f" % scale + print means_b_g_r + # print len(bgrs) + bs = bgrs[0] + gs = bgrs[1] + rs = bgrs[2] + + assert len(bs) == len(gs) == len(rs) + print len(bs) + bgrs_float_array = array('f') + + if channel_type == ChannelType.BGR: + print 'bgr' + for i in range(0, len(bs)): + bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale) # b + for i in range(0, len(gs)): + bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale) # g + for i in range(0, len(rs)): + bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale) # r + elif channel_type == ChannelType.RGB: + print 'rgb' + + for i in range(0, len(rs)): + bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale) # r + for i in range(0, len(gs)): + bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale) # g + for i in range(0, len(bs)): + bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale) # b + + print len(bgrs_float_array) + + print '------------------' + print bgrs_float_array[0] + print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2] + + # for i in range(0, 9): + # print'bs %d' % i + # print bs[i] / 255. + + print bs[416 * 2 + 2] / 255. + print '--------------combine_bgrs_nchw-----------------end' + + return bgrs_float_array + + +# bgrs = tools.resize_take_rgbs('banana.jpeg', (224, 224, 3)) +# array = combine_bgrs_nchw(bgrs, (103.94, 116.78, 123.68), 0.017, array,ChannelType.BGR) +# tools.save_to_file('banana_1_3_224_224_nchw_float') + +# cv2.waitKey(0) + + +bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3)) +array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB) +tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array) diff --git a/python/tools/imagetools/img2nhwc.py b/python/tools/imagetools/img2nhwc.py new file mode 100644 index 0000000000000000000000000000000000000000..c982fe303ecde08a9de1827ca67024567322d47f --- /dev/null +++ b/python/tools/imagetools/img2nhwc.py @@ -0,0 +1,34 @@ +# coding=utf-8 +import cv2 +from array import array +import imagetools as tools + + +def combine_bgrs_nhwc(bgrs, means_b_g_r, scale): + print "scale: %f" % scale + print means_b_g_r + # print len(bgrs) + bs = bgrs[0] + gs = bgrs[1] + rs = bgrs[2] + assert len(bs) == len(gs) == len(rs) + # print len(bs) + bgrs_float_array = array('f') + for i in range(0, len(bs)): + bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale) # r + bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale) # g + bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale) # b + + print len(bgrs_float_array) + + print '------------------' + print bgrs_float_array[0] + print bgrs_float_array[999] + return bgrs_float_array + + +bgrs = tools.resize_take_rgbs('newyolo_1.jpg', (416, 416, 3)) +array = combine_bgrs_nhwc(bgrs, (0, 0, 0), 1.0 / 255) +tools.save_to_file('desktop_1_3_416_416_nhwc_float', array) + +cv2.waitKey(0) diff --git a/python/tools/imagetools/numpy2binary.py b/python/tools/imagetools/numpy2binary.py new file mode 100644 index 0000000000000000000000000000000000000000..dd4bc6e10074183b8dcee4122860c4140ff54229 --- /dev/null +++ b/python/tools/imagetools/numpy2binary.py @@ -0,0 +1,47 @@ +# coding=utf-8 + +# 这个脚本是可以将numpy合并到二进制 +import cv2 +import numpy as np +import imagetools as tools +from array import array + +# +# image = cv2.imread(path) +# print image.shape +# +# print_rgb(image[0, 0]) +# # image len may be for .just check it +# image.resize(shape_h_w) + + +data = np.fromfile('datas/img.res') +print data.size +print data[0] + +data.reshape(1, 3, 416, 416) +out_array = array('f') +print'--------------------' +print data.size +print data[0] + +print '如果是nhwc --------' +# rgb rgb rgb rgb rgb +print data[416 * 3 * 2 + 3 * 2 + 2] +# print data[2] + +print '如果是nchw --------' +# rgb rgb rgb rgb rgb +print data[416 * 416 * 2 + 416 * 2 + 2] +# print data[2] + +# 明明是nchw + +for i in range(0, data.size): + out_array.append(data[i]) + +print len(out_array) + +print out_array[416 * 416 * 2 + 416 * 2 + 2] + +tools.save_to_file('datas/in_put_1_3_416_416_2', out_array) diff --git a/python/tools/mdl2fluid/model_combine.py b/python/tools/mdl2fluid/model_combine.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3ca8a786dc0d4032deda35c33f44d3d96e983d --- /dev/null +++ b/python/tools/mdl2fluid/model_combine.py @@ -0,0 +1,19 @@ +# coding=utf-8 +import os + +path = "yolo_v2_tofile_source/" # 文件夹目录 +to_file_path = "yolo_v2_tofile_combined/params" +files = os.listdir(path) # 得到文件夹下的所有文件名称 +files.sort(cmp=None, key=str.lower) +to_file = open(to_file_path, "wb") + +for file in files: # 遍历文件夹 + if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开 + f = open(path + "/" + file) # 打开文件 + name = f.name + print 'name: ' + name + from_file = open(name, "rb") + to_file.write(from_file.read()) + from_file.close() + +to_file.close() diff --git a/python/tools/mdl2fluid/swicher.py b/python/tools/mdl2fluid/swicher.py index 0cf39959ff59bee4495ca92f8276dcc49e094686..bfe0360fd5b32f5e6fa61f6f05a0a384fb3a1e9b 100644 --- a/python/tools/mdl2fluid/swicher.py +++ b/python/tools/mdl2fluid/swicher.py @@ -66,7 +66,7 @@ class Swichter: def read_head(self, head_file): from_file = open(head_file, "rb") - read = from_file.read(20) + read = from_file.read(24) # print read from_file.close() # print read @@ -84,9 +84,32 @@ class Swichter: to_file.close() pass + def copy_padding_add_head(self, from_file_name, to_file_name, tmp_file_name, padding): + print'padding = %d' % padding + from_file = open(from_file_name, "rb") + # print len(from_file.read()) + from_file.seek(padding, 0) + + read = from_file.read() + print len(read) + + to_file = open(to_file_name, "wb") + # tmp_file = open(tmp_file_name, "wb") + + head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') + to_file.write(head) + to_file.write(read) + from_file.close() + to_file.close() + pass + +# Swichter().nhwc2nchw_one_slice_add_head( +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv1_0.bin', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/conv1_0', +# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/.tmp', +# 32, +# 3, 3, 3) + +# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') -# Swichter().nhwc2nchw_one_slice( -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv5_6_dw_0.bin', -# '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/conv5_6_dw_0', 1, -# 512, 3, 3) -Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases') +# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '') diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 33a6ff359515b0cb6f8e9c2dd2c10af6001490e5..c8d8f52a427bb1ee2b9fa04c9ef09f8e626f11b0 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -233,6 +233,13 @@ void Executor::InitMemory() { Get_binary_data(program_.model_path + "/" + var_desc->Name()); char *data = origin_data; LoadMemory(*var_desc, tensor, &data); + + // DLOG << "----- " << var_desc->Name(); + // DLOG << "----- " << tensor->dims(); + // float *pDouble = tensor->template data(); + // for (int i = 0; i < tensor->numel() && i < 30; ++i) { + // std::cout << pDouble[i] << std::endl; + // } delete origin_data; } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { diff --git a/src/operators/kernel/central-arm-func/conv_add_arm_func.h b/src/operators/kernel/central-arm-func/conv_add_arm_func.h index 643ee84529e01aebc33a144b4c7a8181ff39a1c9..d71bc235977236fbd0dd332df556ea4bd41eacf4 100644 --- a/src/operators/kernel/central-arm-func/conv_add_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_add_arm_func.h @@ -129,10 +129,13 @@ void ConvAddCompute(const FusionConvAddParam ¶m) { // param.Paddings(), // param.Filter(), param.Bias(), // param.Output(), false); - - math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(), param.Output(), + if (param.Paddings()[0] == 0) { + math::DepthwiseConv3x3s2p0(param.Input(), param.Filter(), param.Output(), *param.Bias(), true); - + } else { + math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(), + param.Output(), *param.Bias(), true); + } } else { ConvAddBasic(param); } diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp index 402b187f8f5e9d2fbb70fa6bcfb72c88aa53e3d3..716256a376a50f2ec1c4c62fa25703cabf3a0c66 100644 --- a/src/operators/math/depthwise_conv_3x3.cpp +++ b/src/operators/math/depthwise_conv_3x3.cpp @@ -1881,6 +1881,103 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, #endif } +void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, + Tensor *output, Tensor bias, bool if_bias) { +#if __ARM_NEON + + const int batch_size = static_cast(input->dims()[0]); + const int input_channel = static_cast(input->dims()[1]); + + const int input_height = static_cast(input->dims()[2]); + const int input_width = static_cast(input->dims()[3]); + const int output_height = static_cast(output->dims()[2]); + const int output_width = static_cast(output->dims()[3]); + const int inhxw = input_height * input_width; + const int outhxw = output_height * output_width; + + float32x4_t zero = vdupq_n_f32(0.0); + for (int b = 0; b < batch_size; b++) { +#pragma omp parallel for + for (int c = 0; c < input_channel; c++) { + const float *filter_data = filter->data() + c * 9; + const float *input_data = input->data() + c * inhxw; + const float *bias_data = bias.data() + c; + float *output_data = output->data() + c * outhxw; + float w00 = filter_data[0]; + float w01 = filter_data[1]; + float w02 = filter_data[2]; + float w10 = filter_data[3]; + float w11 = filter_data[4]; + float w12 = filter_data[5]; + float w20 = filter_data[6]; + float w21 = filter_data[7]; + float w22 = filter_data[8]; + + float32x4_t biasv = vld1q_dup_f32(bias_data); + + for (int i = 0; i < output_height; i += 1) { + for (int m = 0; m < output_width - 2; m += 3) { + float *output_ptr = output_data + i * output_width + m; + float32x4x2_t input_buff_top{}, input_buff_mid{}, input_buff_bottom{}; + float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3, + tmp4, tmp5, out0; + input_buff_top = + vld2q_f32(input_data + (2 * i) * input_width + (2 * m)); + input_buff_mid = + vld2q_f32(input_data + (2 * i + 1) * input_width + (2 * m)); + input_buff_bottom = + vld2q_f32(input_data + (2 * i + 2) * input_width + (2 * m)); + + in0 = input_buff_top.val[0]; + tmp0 = input_buff_top.val[1]; + tmp1 = vextq_f32(in0, zero, 1); + + in2 = input_buff_mid.val[0]; + tmp2 = input_buff_mid.val[1]; + tmp3 = vextq_f32(in2, zero, 1); + + in4 = input_buff_bottom.val[0]; + tmp4 = input_buff_bottom.val[1]; + tmp5 = vextq_f32(in4, zero, 1); + + out0 = vmulq_n_f32(in0, w00); + out0 = vmlaq_n_f32(out0, tmp0, w01); + out0 = vmlaq_n_f32(out0, tmp1, w02); + out0 = vmlaq_n_f32(out0, in2, w10); + out0 = vmlaq_n_f32(out0, tmp2, w11); + out0 = vmlaq_n_f32(out0, tmp3, w12); + out0 = vmlaq_n_f32(out0, in4, w20); + out0 = vmlaq_n_f32(out0, tmp4, w21); + out0 = vmlaq_n_f32(out0, tmp5, w22); + out0 = vaddq_f32(out0, biasv); + + vst1q_lane_f32(output_ptr, out0, 0); + vst1q_lane_f32(output_ptr + 1, out0, 1); + vst1q_lane_f32(output_ptr + 2, out0, 2); + } + int m; + for (m = 0; m < output_width - 2; m += 3) { + } + for (int j = m; j < output_width; j++) { + output_data[i * output_width + j] = + input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 + + input_data[(2 * i - 1) * input_width + 2 * j] * w01 + + input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 + + input_data[(2 * i) * input_width + 2 * j - 1] * w10 + + input_data[(2 * i) * input_width + 2 * j] * w11 + + input_data[(2 * i) * input_width + 2 * j + 1] * w12 + + input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 + + input_data[(2 * i + 1) * input_width + 2 * j] * w21 + + input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22; + output_data[i * output_width + j] += *bias_data; + } + } + } + } + +#endif +} + } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/math/depthwise_conv_3x3.h b/src/operators/math/depthwise_conv_3x3.h index 60e979648f871e640924a3373c625c311c3dd067..b146b88e737a07ea08250315fc94653f63d2ad05 100644 --- a/src/operators/math/depthwise_conv_3x3.h +++ b/src/operators/math/depthwise_conv_3x3.h @@ -43,6 +43,9 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter, void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter, Tensor *output, const Tensor *new_scale, const Tensor *new_bias, bool if_relu); + +void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter, + Tensor *output, Tensor bias, bool if_bias); } // namespace math } // namespace operators } // namespace paddle_mobile diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f34cb2e0bc7450f39d53bd6766e4d257af193867..f4dc1421a4cd0f1062e8ad1240caa237e58c9371 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,9 @@ elseif ("yolo" IN_LIST NET) # gen test ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-yolo paddle-mobile) + # gen test + ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test_yolo_combined paddle-mobile) elseif ("squeezenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) @@ -95,6 +98,10 @@ else () ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-yolo paddle-mobile) + # gen test + ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test_yolo_combined paddle-mobile) + # gen test ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-googlenet paddle-mobile) diff --git a/test/net/test_yolo_combined.cpp b/test/net/test_yolo_combined.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88b889daa946cfaef1d86ff36f416b4643532c89 --- /dev/null +++ b/test/net/test_yolo_combined.cpp @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + // ../../../test/models/googlenet + // ../../../test/models/mobilenet + auto time1 = time(); + + if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model", + std::string(g_yolo_combined) + "/params", true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + std::vector dims{1, 3, 416, 416}; + std::vector input; + + GetInput(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims); + std::cout << "input.size(): " << input.size() << std::endl; + for (int j = 0; j < 100; ++j) { + std::cout << j << " : " << input[j] << std::endl; + } + // // 预热十次 + // for (int i = 0; i < 10; ++i) { + // paddle_mobile.Predict(input, dims); + // } + auto time3 = time(); + const vector vector_out = paddle_mobile.Predict(input, dims); + std::cout << "--------------------------------------------" << std::endl; + + for (float i : vector_out) { + std::cout << i << std::endl; + } + + std::cout << "--------------------------------------------" << std::endl; + + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; + } + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index a6898b2cc8d55c79e5bd82d661bbe07533e2ec4f..ecbc251a815e343f75b1247ffc430e9c52d6abfd 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -41,12 +41,15 @@ static const char *g_resnet_50 = "../models/resnet_50"; static const char *g_resnet = "../models/resnet"; static const char *g_googlenet_combine = "../models/googlenet_combine"; static const char *g_yolo = "../models/yolo"; +static const char *g_yolo_combined = "../models/yolo_combined"; static const char *g_fluid_fssd_new = "../models/fluid_fssd_new"; static const char *g_test_image_1x3x224x224 = "../images/test_image_1x3x224x224_float"; static const char *g_test_image_1x3x224x224_banana = "../images/input_3x224x224_banana"; +static const char *g_test_image_desktop_1_3_416_416_nchw_float = + "../images/in_put_1_3_416_416_2"; static const char *g_hand = "../images/hand_image"; static const char *g_imgfssd_ar = "../images/test_image_ssd_ar"; static const char *g_imgfssd_ar1 = "../images/003_0001.txt";