Add readme for image preprocess and fix bug (#1597)

* add comments for imagetools * format code

Add readme for image preprocess and fix bug (#1597)
* add comments for imagetools * format code
77041ac4 · Shuai Yuan · GitHub · 98ab80cd · 77041ac4 · 77041ac4
5 changed file
--- a/README.md
+++ b/README.md
@@ -93,8 +93,9 @@ At present，work in support of onnx is also under operation in Baidu. Related t
 [https://github.com/PaddlePaddle/paddle-onnx](https://github.com/PaddlePaddle/paddle-onnx)
 ### 4. Download parts of testing models and testing pictures
-[http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)
+[http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)  
+- input data generated by tools from `tools/python/imagetools`.
 ## 模型获得

--- a/tools/python/imagetools/README.md
+++ b/tools/python/imagetools/README.md
+# imagetools
+This directory contains scripts generating input data file for paddle-mobile. The image data `g_test_image_1x3x224x224_banana` (used by `test/net/test_mobilenet.cpp`) of [http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip) is generated by this script.
+## Generate Input
+Edit script `img2nchw.py` as below according to your need:
+```python
+if __name__ == "__main__":
+    # set paras
+    input_image_path = 'banana.jpg'
+    reshape_dict = {"n":1, "c":3, "h":48, "w":512}
+    output_path = input_image_path.replace(input_image_path[-4:],
+                                           "_" + "_".join([str(reshape_dict['n']),
+                                                           str(reshape_dict['c']),
+                                                           str(reshape_dict['h']),
+                                                           str(reshape_dict['w']),
+                                                           "nchw",
+                                                           "float"],))
+    channel_type = ChannelType.BGR
+    mean_bgr = (103.94, 116.78, 123.68) # (0, 0, 0)
+    pixel_scale = 0.017
+```
--- a/tools/python/imagetools/imagetools.py
+++ b/tools/python/imagetools/imagetools.py
@@ -3,11 +3,15 @@ import cv2
 from array import array
-def resize_take_rgbs(path, shape_h_w):
+def resize_take_rgbs(path, shape_h_w, SHOW_IMG=False):
-    print '--------------resize_take_rgbs-----------------begin'
+    print("[INFO] ---- resize_take_rgbs ---- start")
    image = cv2.imread(path)
-    # print image.shape
+    print("[INFO] image.shape:{}".format(image.shape))
-    cv2.imshow("before", image)
+    print("[INFO] shape_h_w:{}".format(shape_h_w))
+    if SHOW_IMG:
+        cv2.imshow("before", image)
    print_rgb(image[0, 0])
    # image len may be for .just check it
@@ -15,8 +19,10 @@ def resize_take_rgbs(path, shape_h_w):
    image = cv2.resize(image, (shape_h_w[0], shape_h_w[1]))
-    cv2.imshow("after", image)
+    if SHOW_IMG:
-    print image.shape
+        cv2.imshow("after", image)
+    print("[INFO] resized image.shape:{}".format(image.shape))
    height = shape_h_w[0]
    width = shape_h_w[1]
@@ -25,15 +31,20 @@ def resize_take_rgbs(path, shape_h_w):
    bs_ = []
    for h in range(0, height):
        for w in range(0, width):
+            '''
            bs_.append(image[h, w, 0])
            gs_.append(image[h, w, 1])
            rs_.append(image[h, w, 2])
+            '''
+            bs_.append(image[w, h, 0])
+            gs_.append(image[w, h, 1])
+            rs_.append(image[w, h, 2])
    # print image[2, 2, 0]/255.
    print len(bs_)
    print len(gs_)
    print len(rs_)
-    print '--------------resize_take_rgbs-----------------end'
+    print("[INFO] ---- resize_take_rgbs ---- end")
    return bs_, gs_, rs_
@@ -56,6 +67,5 @@ def print_rgb((b, g, r)):
 def save_to_file(to_file_name, array):
-    to_file = open(to_file_name, "wb")
+    with open(to_file_name, "wb") as file_handle:
-    array.tofile(to_file)
+        array.tofile(file_handle)
-    to_file.close()
--- a/tools/python/imagetools/img2nchw.py
+++ b/tools/python/imagetools/img2nchw.py
@@ -9,22 +9,21 @@ class ChannelType(Enum):
    RGB = 0,
    BGR = 1
+def combine_bgrs_nchw(bgrs, means_b_g_r=(103.94, 116.78, 123.68), scale=0.017, channel_type=ChannelType.BGR):
+    print("[INFO] ---- combine_bgrs_nchw ---- start")
+    print("[INFO] scale:{}".format(scale))
+    print("[INFO] mean_b_g_r:{}".format(means_b_g_r))
+    #print("[INFO] bgrs:{}".format(bgrs))
-def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
-    print '--------------combine_bgrs_nchw-----------------begin'
-    print "scale: %f" % scale
-    print means_b_g_r
-    # print len(bgrs)
    bs = bgrs[0]
    gs = bgrs[1]
    rs = bgrs[2]
    assert len(bs) == len(gs) == len(rs)
-    print len(bs)
+    print("[INFO] element size of blue channel = len(bs) = {}".format(len(bs)))
-    bgrs_float_array = array('f')
+    bgrs_float_array = array('f')
    if channel_type == ChannelType.BGR:
-        print 'bgr'
+        print('[INFO] bgr format')
        for i in range(0, len(bs)):
            bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
        for i in range(0, len(gs)):
@@ -32,8 +31,7 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
        for i in range(0, len(rs)):
            bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
    elif channel_type == ChannelType.RGB:
-        print 'rgb'
+        print('[INFO] rgb format')
        for i in range(0, len(rs)):
            bgrs_float_array.append((rs[i] - means_b_g_r[2]) * scale)  # r
        for i in range(0, len(gs)):
@@ -41,29 +39,50 @@ def combine_bgrs_nchw(bgrs, means_b_g_r, scale, channel_type=ChannelType.BGR):
        for i in range(0, len(bs)):
            bgrs_float_array.append((bs[i] - means_b_g_r[0]) * scale)  # b
-    print len(bgrs_float_array)
+    '''
+    print("lenI(bgrs_float_array)={}".format(len(bgrs_float_array)))
    print '------------------'
    print bgrs_float_array[0]
    print bgrs_float_array[224 * 224 * 2 + 224 * 2 + 2]
    # for i in range(0, 9):
    #     print'bs %d' % i
    #     print bs[i] / 255.
    print bs[224 * 2 + 2] / 255.
-    print '--------------combine_bgrs_nchw-----------------end'
+    '''
+    print("[INFO] ---- combine_bgrs_nchw ---- end")
    return bgrs_float_array
-# bgrs = tools.resize_take_rgbs('banana.jpeg', (224, 224, 3))
+if __name__ == "__main__":
-# array = combine_bgrs_nchw(bgrs, (103.94, 116.78, 123.68), 0.017, array,ChannelType.BGR)
+    # set paras
-# tools.save_to_file('banana_1_3_224_224_nchw_float')
+    #input_image_path = 'banana.jpg'
+    #input_image_path = "ocr_detect_512x512.png"
-# cv2.waitKey(0)
+    input_image_path = "ocr_recog_48x512.png"
+    reshape_dict = {"n":1, "c":3, "h":48, "w":512}
-bgrs = tools.resize_take_rgbs('datas/jpgs/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg', (224, 224, 3))
+    output_path = input_image_path.replace(input_image_path[-4:],
-array = combine_bgrs_nchw(bgrs, (0, 0, 0), 1. / 255, ChannelType.RGB)
+                                           "_" + "_".join([str(reshape_dict['n']),
-tools.save_to_file('datas/desktop_1_3_224_224_nchw_float', array)
+                                                           str(reshape_dict['c']),
+                                                           str(reshape_dict['h']),
+                                                           str(reshape_dict['w']),
+                                                           "nchw",
+                                                           "float"],))
+    channel_type = ChannelType.BGR
+    mean_bgr = (103.94, 116.78, 123.68)
+    pixel_scale = 0.017
+    #mean_bgr = (0, 0, 0)
+    #pixel_scale = 1. / 255
+    print("[INFO] input_image_path:{}".format(input_image_path))
+    print("[INFO] reshape_dict:{}".format(reshape_dict))
+    print("[INFO] output_path:{}".format(output_path))
+    print("[INFO] mean_bgr:{}".format(mean_bgr))
+    print("[INFO] pixel_scale:{}".format(pixel_scale))
+    bgrs = tools.resize_take_rgbs(input_image_path, (reshape_dict['h'],
+                                                     reshape_dict['w'],
+                                                     reshape_dict['c']))
+    array = combine_bgrs_nchw(bgrs, mean_bgr, pixel_scale, channel_type)
+    tools.save_to_file(output_path, array)
+    print("[INFO] save {} successfully".format(output_path))
+    #cv2.waitKey(0)
--- a/tools/python/imagetools/numpy2binary.py
+++ b/tools/python/imagetools/numpy2binary.py
+#!/usr/bin/env bash
 # coding=utf-8
-# 这个脚本是可以将numpy合并到二进制
+# This script convert numpy format to binary's
 import cv2
 import numpy as np
 import imagetools as tools
 from array import array
-#
-# image = cv2.imread(path)
-# print image.shape
-#
-# print_rgb(image[0, 0])
-# # image len may be for .just check it
-# image.resize(shape_h_w)
+'''
-data = np.fromfile('/Users/xiebaiyuan/PaddleProject/paddle-mobile/tools/python/imagetools/datas/jpgs2/0000_0.9834-148196_82452-0ad4b83ec6bc0f9c5f28101539267054.jpg_p0_0.126571263346.jpg.input.npfile','f')
+image = cv2.imread(path)
-print data.size
+print image.shape
-print data
+print_rgb(image[0, 0])
+# mage len may be for .just check it
-data.reshape(1, 3, 224, 224)
+image.resize(shape_h_w)
-out_array = array('f')
+'''
-print'--------------------'
-print data.size
+if __name__ == "__main__":
-print data[0]
+    # input params
+    reshape_dict = {"n": 1, "c": 3, "h": 224, "w": 224}
-print '如果是nhwc --------'
+    np_file_path = 'banana_1_3_224_224_nchw_float'
-# rgb rgb rgb rgb rgb
+    save_file_name = 'in_put_1_3_224_224_nchw'
-print data[224 * 3 * 2 + 3 * 2 + 2]
-# print data[2]
+    # load input etc.
+    np = np.fromfile(np_file_path, 'f')
-print '如果是nchw --------'
+    #np = cv2.imread(np_file_path)
-# rgb rgb rgb rgb rgb
+    print("np.size:{}".format(np.size))
-print data[224 * 224 * 2 + 224 * 2 + 2]
+    print("np:{}".format(np))
-# print data[2]
+    np.reshape(reshape_dict['n'],
+               reshape_dict['c'],
-# 明明是nchw
+               reshape_dict['h'],
+               reshape_dict['w'])
-for i in range(0, data.size):
+    out_array = array('f')
-    out_array.append(data[i])
+    '''
-print len(out_array)
+    print("--------------------")
+    print("np.size:{}".format(np.size))
-print out_array[224 * 224 * 2 + 224 * 2 + 2]
+    print("np[0]:{}".format(np[0])
-# print out_array
+    print("如果是nhw")
+    # rgb rgb rgb rgb rgb
-tools.save_to_file('datas/in_put_1_3_224_224_nchw', out_array)
+    print np[224 * 3 * 2 + 3 * 2 + 2]
+    # print np[2]
+    print '如果是nchw --------'
+    # rgb rgb rgb rgb rgb
+    print(np[224 * 224 * 2 + 224 * 2 + 2])
+    # print np[2]
+    # 明明是nchw
+    '''
+    for i in range(0, np.size):
+        out_array.append(np[i])
+    print("len(out_array):{}".format(len(out_array)))
+    print("out_array[224 * 224 * 2 + 224 * 2 + 2]:{}".format(out_array[224 * 224 * 2 + 224 * 2 + 2]))
+    # print out_array
+    tools.save_to_file(save_file_name, out_array)