Merge pull request #1008 from xiebaiyuan/develop

yolo mdl->fluid tools #995

Merge pull request #1008 from xiebaiyuan/develop
yolo mdl->fluid tools #995
3b9d9819 · xiebaiyuan · GitHub · bdd97ea6 · c1a578f1 · 3b9d9819
13 changed file
--- a/python/tools/imagetools/imagetools.py
+++ b/python/tools/imagetools/imagetools.py
+# coding=utf-8
+import cv2
+from array import array
+def resize_take_rgbs(path, shape_h_w):
+    print '--------------resize_take_rgbs-----------------begin'
+    image = cv2.imread(path)
+    # print image.shape
+    cv2.imshow("before", image)
+    print_rgb(image[0, 0])
+    # image len may be for .just check it
+    # image.resize(shape_h_w)
+    image = cv2.resize(image, (shape_h_w[0], shape_h_w[1]))
+    cv2.imshow("after", image)
+    print image.shape
+    height = shape_h_w[0]
+    width = shape_h_w[1]
+    rs_ = []
+    gs_ = []
+    bs_ = []
+    for h in range(0, height):
+        for w in range(0, width):
+            bs_.append(image[h, w, 0])
+            gs_.append(image[h, w, 1])
+            rs_.append(image[h, w, 2])
+    # print image[2, 2, 0]/255.
+    print len(bs_)
+    print len(gs_)
+    print len(rs_)
+    print '--------------resize_take_rgbs-----------------end'
+    return bs_, gs_, rs_
+def print_rgb((b, g, r)):
+    print "像素 - R:%d,G:%d,B:%d" % (r, g, b)  # 显示像素值
+    #
+    # image[0, 0] = (100, 150, 200)  # 更改位置(0,0)处的像素
+    #
+    # (b, g, r) = image[0, 0]  # 再次读取(0,0)像素
+    # print "位置(0,0)处的像素 - 红:%d,绿:%d,蓝:%d" % (r, g, b)  # 显示更改后的像素值
+    #
+    # corner = image[0:100, 0:100]  # 读取像素块
+    # cv2.imshow("Corner", corner)  # 显示读取的像素块
+    #
+    # image[0:100, 0:100] = (0, 255, 0);  # 更改读取的像素块
+    #
+    # cv2.imshow("Updated", image)  # 显示图像
+    #
+    # cv2.waitKey(0)  # 程序暂停
+def save_to_file(to_file_name, array):
+    to_file = open(to_file_name, "wb")
+    array.tofile(to_file)
+    to_file.close()
--- a/python/tools/imagetools/img2nchw.py
+++ b/python/tools/imagetools/img2nchw.py
+# coding=utf-8
+import cv2
+from array import array
+import imagetools as tools
+from enum import Enum
+class ChannelType(Enum):
+    RGB = 0,
+    BGR = 1
+def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
+    print '--------------combine_bgrs_nchw-----------------begin'
+    print "scale: %f" % scale
+    print means_b_g_r
+    # print len(bgrs)
+    bs = bgrs[0]
+    gs = bgrs[1]
+    rs = bgrs[2]
+    assert len(bs) == len(gs) == len(rs)
+    print len(bs)
+    bgrs_float_array = array('f')
+    if channel_type == ChannelType.BGR:
+        print 'bgr'
+        for i in range(0, len(bs)):
+            bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
+        for i in range(0, len(gs)):
+            bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale)  # g
+        for i in range(0, len(rs)):
+            bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
+    elif channel_type == ChannelType.RGB:
+        print 'rgb'
+        for i in range(0, len(rs)):
+            bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
+        for i in range(0, len(gs)):
+            bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale)  # g
+        for i in range(0, len(bs)):
+            bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
+    print len(bgrs_float_array)
+    print '------------------'
+    print bgrs_float_array[0]
+    print bgrs_float_array[416 * 416 * 2 + 416 * 2 + 2]
+    # for i in range(0, 9):
+    #     print'bs %d' % i
+    #     print bs[i] / 255.
+    print bs[416 * 2 + 2] / 255.
+    print '--------------combine_bgrs_nchw-----------------end'
+    return bgrs_float_array
+# bgrs = tools.resize_take_rgbs('banana.jpeg', (224, 224, 3))
+# array = combine_bgrs_nchw(bgrs, (103.94, 116.78, 123.68), 0.017, array,ChannelType.BGR)
+# tools.save_to_file('banana_1_3_224_224_nchw_float')
+# cv2.waitKey(0)
+bgrs = tools.resize_take_rgbs('datas/newyolo.jpg', (416, 416, 3))
+array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB)
+tools.save_to_file('datas/desktop_1_3_416_416_nchw_float', array)
--- a/python/tools/imagetools/img2nhwc.py
+++ b/python/tools/imagetools/img2nhwc.py
+# coding=utf-8
+import cv2
+from array import array
+import imagetools as tools
+def combine_bgrs_nhwc(bgrs, means_b_g_r, scale):
+    print "scale: %f" % scale
+    print means_b_g_r
+    # print len(bgrs)
+    bs = bgrs[0]
+    gs = bgrs[1]
+    rs = bgrs[2]
+    assert len(bs) == len(gs) == len(rs)
+    # print len(bs)
+    bgrs_float_array = array('f')
+    for i in range(0, len(bs)):
+        bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
+        bgrs_float_array.append((gs[i] - means_b_g_r[1]) * scale)  # g
+        bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
+    print len(bgrs_float_array)
+    print '------------------'
+    print bgrs_float_array[0]
+    print bgrs_float_array[999]
+    return bgrs_float_array
+bgrs = tools.resize_take_rgbs('newyolo_1.jpg', (416, 416, 3))
+array = combine_bgrs_nhwc(bgrs, (0, 0, 0), 1.0 / 255)
+tools.save_to_file('desktop_1_3_416_416_nhwc_float', array)
+cv2.waitKey(0)
--- a/python/tools/imagetools/numpy2binary.py
+++ b/python/tools/imagetools/numpy2binary.py
+# coding=utf-8
+# 这个脚本是可以将numpy合并到二进制
+import cv2
+import numpy as np
+import imagetools as tools
+from array import array
+#
+# image = cv2.imread(path)
+# print image.shape
+#
+# print_rgb(image[0, 0])
+# # image len may be for .just check it
+# image.resize(shape_h_w)
+data = np.fromfile('datas/img.res')
+print data.size
+print data[0]
+data.reshape(1, 3, 416, 416)
+out_array = array('f')
+print'--------------------'
+print data.size
+print data[0]
+print '如果是nhwc --------'
+# rgb rgb rgb rgb rgb
+print data[416 * 3 * 2 + 3 * 2 + 2]
+# print data[2]
+print '如果是nchw --------'
+# rgb rgb rgb rgb rgb
+print data[416 * 416 * 2 + 416 * 2 + 2]
+# print data[2]
+# 明明是nchw
+for i in range(0, data.size):
+    out_array.append(data[i])
+print len(out_array)
+print out_array[416 * 416 * 2 + 416 * 2 + 2]
+tools.save_to_file('datas/in_put_1_3_416_416_2', out_array)
--- a/python/tools/mdl2fluid/model_combine.py
+++ b/python/tools/mdl2fluid/model_combine.py
+# coding=utf-8
+import os
+path = "yolo_v2_tofile_source/"  # 文件夹目录
+to_file_path = "yolo_v2_tofile_combined/params"
+files = os.listdir(path)  # 得到文件夹下的所有文件名称
+files.sort(cmp=None, key=str.lower)
+to_file = open(to_file_path, "wb")
+for file in files:  # 遍历文件夹
+    if not os.path.isdir(file):  # 判断是否是文件夹，不是文件夹才打开
+        f = open(path + "/" + file)  # 打开文件
+        name = f.name
+        print 'name:  ' + name
+        from_file = open(name, "rb")
+        to_file.write(from_file.read())
+        from_file.close()
+to_file.close()
--- a/python/tools/mdl2fluid/swicher.py
+++ b/python/tools/mdl2fluid/swicher.py
@@ -66,7 +66,7 @@ class Swichter:
    def read_head(self, head_file):
        from_file = open(head_file, "rb")
-        read = from_file.read(20)
+        read = from_file.read(24)
        # print read
        from_file.close()
        # print read
@@ -84,9 +84,32 @@ class Swichter:
        to_file.close()
        pass
+    def copy_padding_add_head(self, from_file_name, to_file_name, tmp_file_name, padding):
+        print'padding  = %d' % padding
+        from_file = open(from_file_name, "rb")
+        # print len(from_file.read())
+        from_file.seek(padding, 0)
+        read = from_file.read()
+        print len(read)
+        to_file = open(to_file_name, "wb")
+        # tmp_file = open(tmp_file_name, "wb")
+        head = self.read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
+        to_file.write(head)
+        to_file.write(read)
+        from_file.close()
+        to_file.close()
+        pass
+# Swichter().nhwc2nchw_one_slice_add_head(
+#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv1_0.bin',
+#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw_with_head/conv1_0',
+#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/.tmp',
+#     32,
+#     3, 3, 3)
+# Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
-# Swichter().nhwc2nchw_one_slice(
+# Swichter().copy_add_head('datas/model.0.0.weight', 'datas/conv1_0', '')
-#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nhwc/conv5_6_dw_0.bin',
-#     '/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/multiobjects/float32s_nchw/conv5_6_dw_0', 1,
-#     512, 3, 3)
-Swichter().read_head('/Users/xiebaiyuan/PaddleProject/paddle-mobile/python/tools/mdl2fluid/yolo/conv1_biases')
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -233,6 +233,13 @@ void Executor<Dtype, P>::InitMemory() {
            Get_binary_data(program_.model_path + "/" + var_desc->Name());
        char *data = origin_data;
        LoadMemory(*var_desc, tensor, &data);
+        //        DLOG << "-----      " << var_desc->Name();
+        //        DLOG << "-----      " << tensor->dims();
+        //        float *pDouble = tensor->template data<float>();
+        //        for (int i = 0; i < tensor->numel() && i < 30; ++i) {
+        //          std::cout << pDouble[i] << std::endl;
+        //        }
        delete origin_data;
      } else {
        if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {

--- a/src/operators/kernel/central-arm-func/conv_add_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_arm_func.h
@@ -129,10 +129,13 @@ void ConvAddCompute(const FusionConvAddParam<CPU> &param) {
    //        param.Paddings(),
    //                               param.Filter(), param.Bias(),
    //                               param.Output(), false);
+    if (param.Paddings()[0] == 0) {
-    math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(), param.Output(),
+      math::DepthwiseConv3x3s2p0(param.Input(), param.Filter(), param.Output(),
                                 *param.Bias(), true);
+    } else {
+      math::DepthwiseConv3x3s2p1v2(param.Input(), param.Filter(),
+                                   param.Output(), *param.Bias(), true);
+    }
  } else {
    ConvAddBasic(param);
  }

--- a/src/operators/math/depthwise_conv_3x3.cpp
+++ b/src/operators/math/depthwise_conv_3x3.cpp
@@ -1881,6 +1881,103 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
 #endif
 }
+void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
+                          Tensor *output, Tensor bias, bool if_bias) {
+#if __ARM_NEON
+  const int batch_size = static_cast<int>(input->dims()[0]);
+  const int input_channel = static_cast<int>(input->dims()[1]);
+  const int input_height = static_cast<int>(input->dims()[2]);
+  const int input_width = static_cast<int>(input->dims()[3]);
+  const int output_height = static_cast<int>(output->dims()[2]);
+  const int output_width = static_cast<int>(output->dims()[3]);
+  const int inhxw = input_height * input_width;
+  const int outhxw = output_height * output_width;
+  float32x4_t zero = vdupq_n_f32(0.0);
+  for (int b = 0; b < batch_size; b++) {
+#pragma omp parallel for
+    for (int c = 0; c < input_channel; c++) {
+      const float *filter_data = filter->data<float>() + c * 9;
+      const float *input_data = input->data<float>() + c * inhxw;
+      const float *bias_data = bias.data<float>() + c;
+      float *output_data = output->data<float>() + c * outhxw;
+      float w00 = filter_data[0];
+      float w01 = filter_data[1];
+      float w02 = filter_data[2];
+      float w10 = filter_data[3];
+      float w11 = filter_data[4];
+      float w12 = filter_data[5];
+      float w20 = filter_data[6];
+      float w21 = filter_data[7];
+      float w22 = filter_data[8];
+      float32x4_t biasv = vld1q_dup_f32(bias_data);
+      for (int i = 0; i < output_height; i += 1) {
+        for (int m = 0; m < output_width - 2; m += 3) {
+          float *output_ptr = output_data + i * output_width + m;
+          float32x4x2_t input_buff_top{}, input_buff_mid{}, input_buff_bottom{};
+          float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3,
+              tmp4, tmp5, out0;
+          input_buff_top =
+              vld2q_f32(input_data + (2 * i) * input_width + (2 * m));
+          input_buff_mid =
+              vld2q_f32(input_data + (2 * i + 1) * input_width + (2 * m));
+          input_buff_bottom =
+              vld2q_f32(input_data + (2 * i + 2) * input_width + (2 * m));
+          in0 = input_buff_top.val[0];
+          tmp0 = input_buff_top.val[1];
+          tmp1 = vextq_f32(in0, zero, 1);
+          in2 = input_buff_mid.val[0];
+          tmp2 = input_buff_mid.val[1];
+          tmp3 = vextq_f32(in2, zero, 1);
+          in4 = input_buff_bottom.val[0];
+          tmp4 = input_buff_bottom.val[1];
+          tmp5 = vextq_f32(in4, zero, 1);
+          out0 = vmulq_n_f32(in0, w00);
+          out0 = vmlaq_n_f32(out0, tmp0, w01);
+          out0 = vmlaq_n_f32(out0, tmp1, w02);
+          out0 = vmlaq_n_f32(out0, in2, w10);
+          out0 = vmlaq_n_f32(out0, tmp2, w11);
+          out0 = vmlaq_n_f32(out0, tmp3, w12);
+          out0 = vmlaq_n_f32(out0, in4, w20);
+          out0 = vmlaq_n_f32(out0, tmp4, w21);
+          out0 = vmlaq_n_f32(out0, tmp5, w22);
+          out0 = vaddq_f32(out0, biasv);
+          vst1q_lane_f32(output_ptr, out0, 0);
+          vst1q_lane_f32(output_ptr + 1, out0, 1);
+          vst1q_lane_f32(output_ptr + 2, out0, 2);
+        }
+        int m;
+        for (m = 0; m < output_width - 2; m += 3) {
+        }
+        for (int j = m; j < output_width; j++) {
+          output_data[i * output_width + j] =
+              input_data[(2 * i - 1) * input_width + 2 * j - 1] * w00 +
+              input_data[(2 * i - 1) * input_width + 2 * j] * w01 +
+              input_data[(2 * i - 1) * input_width + 2 * j + 1] * w02 +
+              input_data[(2 * i) * input_width + 2 * j - 1] * w10 +
+              input_data[(2 * i) * input_width + 2 * j] * w11 +
+              input_data[(2 * i) * input_width + 2 * j + 1] * w12 +
+              input_data[(2 * i + 1) * input_width + 2 * j - 1] * w20 +
+              input_data[(2 * i + 1) * input_width + 2 * j] * w21 +
+              input_data[(2 * i + 1) * input_width + 2 * j + 1] * w22;
+          output_data[i * output_width + j] += *bias_data;
+        }
+      }
+    }
+  }
+#endif
+}
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/math/depthwise_conv_3x3.h
+++ b/src/operators/math/depthwise_conv_3x3.h
@@ -43,6 +43,9 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
 void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
                                     Tensor *output, const Tensor *new_scale,
                                     const Tensor *new_bias, bool if_relu);
+void DepthwiseConv3x3s2p0(const Tensor *input, const Tensor *filter,
+                          Tensor *output, Tensor bias, bool if_bias);
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -18,6 +18,9 @@ elseif ("yolo" IN_LIST NET)
    # gen test
    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-yolo paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test_yolo_combined paddle-mobile)
 elseif ("squeezenet" IN_LIST NET)
    # gen test
    ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h  test_include.h executor_for_test.h)
@@ -95,6 +98,10 @@ else ()
    ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-yolo paddle-mobile)
+    # gen test
+    ADD_EXECUTABLE(test_yolo_combined net/test_yolo_combined.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test_yolo_combined paddle-mobile)
    # gen test
    ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h  test_include.h executor_for_test.h)
    target_link_libraries(test-googlenet paddle-mobile)

--- a/test/net/test_yolo_combined.cpp
+++ b/test/net/test_yolo_combined.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  //  ../../../test/models/googlenet
+  //  ../../../test/models/mobilenet
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_yolo_combined) + "/model",
+                         std::string(g_yolo_combined) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 3, 416, 416};
+    std::vector<float> input;
+    GetInput<float>(g_test_image_desktop_1_3_416_416_nchw_float, &input, dims);
+    std::cout << "input.size():  " << input.size() << std::endl;
+    for (int j = 0; j < 100; ++j) {
+      std::cout << j << " :  " << input[j] << std::endl;
+    }
+    //        // 预热十次
+    //        for (int i = 0; i < 10; ++i) {
+    //            paddle_mobile.Predict(input, dims);
+    //        }
+    auto time3 = time();
+    const vector<float> vector_out = paddle_mobile.Predict(input, dims);
+    std::cout << "--------------------------------------------" << std::endl;
+    for (float i : vector_out) {
+      std::cout << i << std::endl;
+    }
+    std::cout << "--------------------------------------------" << std::endl;
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  return 0;
+}
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -41,12 +41,15 @@ static const char *g_resnet_50 = "../models/resnet_50";
 static const char *g_resnet = "../models/resnet";
 static const char *g_googlenet_combine = "../models/googlenet_combine";
 static const char *g_yolo = "../models/yolo";
+static const char *g_yolo_combined = "../models/yolo_combined";
 static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
 static const char *g_test_image_1x3x224x224 =
    "../images/test_image_1x3x224x224_float";
 static const char *g_test_image_1x3x224x224_banana =
    "../images/input_3x224x224_banana";
+static const char *g_test_image_desktop_1_3_416_416_nchw_float =
+    "../images/in_put_1_3_416_416_2";
 static const char *g_hand = "../images/hand_image";
 static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
 static const char *g_imgfssd_ar1 = "../images/003_0001.txt";