image.py 11.8 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
D
dangqingqing 已提交
14
"""
D
dangqingqing 已提交
15
This file contains some common interfaces for image preprocess.
D
dangqingqing 已提交
16
Many users are confused about the image layout. We introduce
D
dangqingqing 已提交
17
the image layout as follows.
D
dangqingqing 已提交
18 19

- CHW Layout
L
Luo Tao 已提交
20

D
dangqingqing 已提交
21
  - The abbreviations: C=channel, H=Height, W=Width
D
dangqingqing 已提交
22 23 24
  - The default layout of image opened by cv2 or PIL is HWC.
    PaddlePaddle only supports the CHW layout. And CHW is simply
    a transpose of HWC. It must transpose the input image.
D
dangqingqing 已提交
25 26

- Color format: RGB or BGR
L
Luo Tao 已提交
27

D
dangqingqing 已提交
28
  OpenCV use BGR color format. PIL use RGB color format. Both
D
dangqingqing 已提交
29
  formats can be used for training. Noted that, the format should
T
tianshuo78520a 已提交
30
  be keep consistent between the training and inference period.
D
dangqingqing 已提交
31
"""
32 33 34

from __future__ import print_function

35
import six
L
Luo Tao 已提交
36
import numpy as np
M
minqiyang 已提交
37
# FIXME(minqiyang): this is an ugly fix for the numpy bug reported here
38 39 40 41
# https://github.com/numpy/numpy/issues/12497
if six.PY3:
    import subprocess
    import sys
42 43 44 45 46 47
    import os
    interpreter = sys.executable
    # Note(zhouwei): if use Python/C 'PyRun_SimpleString', 'sys.executable'
    # will be the C++ execubable on Windows
    if sys.platform == 'win32' and 'python.exe' not in interpreter:
        interpreter = sys.exec_prefix + os.sep + 'python.exe'
M
minqiyang 已提交
48
    import_cv2_proc = subprocess.Popen(
49
        [interpreter, "-c", "import cv2"],
M
minqiyang 已提交
50 51
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE)
52 53 54 55 56 57 58 59 60 61 62
    out, err = import_cv2_proc.communicate()
    retcode = import_cv2_proc.poll()
    if retcode != 0:
        cv2 = None
    else:
        import cv2
else:
    try:
        import cv2
    except ImportError:
        cv2 = None
L
Luo Tao 已提交
63 64
import os
import tarfile
65
import six.moves.cPickle as pickle
L
Luo Tao 已提交
66

67 68
__all__ = []

D
dangqingqing 已提交
69

70 71 72 73 74 75 76 77 78 79 80 81
def _check_cv2():
    if cv2 is None:
        import sys
        sys.stderr.write(
            '''Warning with paddle image module: opencv-python should be imported,
         or paddle image module could NOT work; please install opencv-python first.'''
        )
        return False
    else:
        return True


82 83 84 85 86 87
def batch_images_from_tar(data_file,
                          dataset_name,
                          img2label,
                          num_per_batch=1024):
    """
    Read images from tar file and batch them into batch file.
L
Luo Tao 已提交
88 89 90 91 92

    :param data_file: path of image tar file
    :type data_file: string
    :param dataset_name: 'train','test' or 'valid'
    :type dataset_name: string
M
minqiyang 已提交
93
    :param img2label: a dic with image file name as key
94
                    and image's label as value
L
Luo Tao 已提交
95 96 97 98 99
    :type img2label: dic
    :param num_per_batch: image number per batch file
    :type num_per_batch: int
    :return: path of list file containing paths of batch file
    :rtype: string
100 101
    """
    batch_dir = data_file + "_batch"
K
kuizhiqing 已提交
102
    out_path = "%s/%s_%s" % (batch_dir, dataset_name, os.getpid())
K
kuizhiqing 已提交
103
    meta_file = "%s/%s_%s.txt" % (batch_dir, dataset_name, os.getpid())
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122

    if os.path.exists(out_path):
        return meta_file
    else:
        os.makedirs(out_path)

    tf = tarfile.open(data_file)
    mems = tf.getmembers()
    data = []
    labels = []
    file_id = 0
    for mem in mems:
        if mem.name in img2label:
            data.append(tf.extractfile(mem).read())
            labels.append(img2label[mem.name])
            if len(data) == num_per_batch:
                output = {}
                output['label'] = labels
                output['data'] = data
123
                pickle.dump(
124
                    output,
M
minqiyang 已提交
125
                    open('%s/batch_%d' % (out_path, file_id), 'wb'),
126
                    protocol=2)
127 128 129 130 131 132 133
                file_id += 1
                data = []
                labels = []
    if len(data) > 0:
        output = {}
        output['label'] = labels
        output['data'] = data
134
        pickle.dump(
135
            output, open('%s/batch_%d' % (out_path, file_id), 'wb'), protocol=2)
136 137 138 139 140 141 142

    with open(meta_file, 'a') as meta:
        for file in os.listdir(out_path):
            meta.write(os.path.abspath("%s/%s" % (out_path, file)) + "\n")
    return meta_file


143 144 145 146 147
def load_image_bytes(bytes, is_color=True):
    """
    Load an color or gray image from bytes array.

    Example usage:
M
minqiyang 已提交
148

149
    .. code-block:: python
L
Luo Tao 已提交
150

151
        with open('cat.jpg') as f:
152
            im = load_image_bytes(f.read())
153 154

    :param bytes: the input image bytes array.
L
Luo Tao 已提交
155
    :type bytes: str
156 157 158
    :param is_color: If set is_color True, it will load and
                     return a color image. Otherwise, it will
                     load and return a gray image.
L
Luo Tao 已提交
159
    :type is_color: bool
160
    """
161
    assert _check_cv2() is True
M
minqiyang 已提交
162

163 164 165 166 167 168
    flag = 1 if is_color else 0
    file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
    img = cv2.imdecode(file_bytes, flag)
    return img


D
dangqingqing 已提交
169 170 171 172 173
def load_image(file, is_color=True):
    """
    Load an color or gray image from the file path.

    Example usage:
M
minqiyang 已提交
174

D
dangqingqing 已提交
175
    .. code-block:: python
L
Luo Tao 已提交
176

D
dangqingqing 已提交
177 178 179 180 181 182 183
        im = load_image('cat.jpg')

    :param file: the input image path.
    :type file: string
    :param is_color: If set is_color True, it will load and
                     return a color image. Otherwise, it will
                     load and return a gray image.
L
Luo Tao 已提交
184
    :type is_color: bool
D
dangqingqing 已提交
185
    """
186
    assert _check_cv2() is True
M
minqiyang 已提交
187

D
dangqingqing 已提交
188 189 190 191 192 193 194
    # cv2.IMAGE_COLOR for OpenCV3
    # cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version
    # cv2.IMAGE_GRAYSCALE for OpenCV3
    # cv2.CV_LOAD_IMAGE_GRAYSCALE for older OpenCV Version
    # Here, use constant 1 and 0
    # 1: COLOR, 0: GRAYSCALE
    flag = 1 if is_color else 0
D
dangqingqing 已提交
195 196 197 198 199
    im = cv2.imread(file, flag)
    return im


def resize_short(im, size):
M
minqiyang 已提交
200
    """
D
dangqingqing 已提交
201 202 203
    Resize an image so that the length of shorter edge is size.

    Example usage:
M
minqiyang 已提交
204

D
dangqingqing 已提交
205
    .. code-block:: python
L
Luo Tao 已提交
206

D
dangqingqing 已提交
207 208
        im = load_image('cat.jpg')
        im = resize_short(im, 256)
M
minqiyang 已提交
209

D
dangqingqing 已提交
210 211 212 213 214
    :param im: the input image with HWC layout.
    :type im: ndarray
    :param size: the shorter edge size of image after resizing.
    :type size: int
    """
215
    assert _check_cv2() is True
M
minqiyang 已提交
216

D
dangqingqing 已提交
217 218 219
    h, w = im.shape[:2]
    h_new, w_new = size, size
    if h > w:
M
minqiyang 已提交
220
        h_new = size * h // w
D
dangqingqing 已提交
221
    else:
M
minqiyang 已提交
222
        w_new = size * w // h
223
    im = cv2.resize(im, (w_new, h_new), interpolation=cv2.INTER_CUBIC)
D
dangqingqing 已提交
224 225 226 227 228 229
    return im


def to_chw(im, order=(2, 0, 1)):
    """
    Transpose the input image order. The image layout is HWC format
D
dangqingqing 已提交
230 231
    opened by cv2 or PIL. Transpose the input image to CHW layout
    according the order (2,0,1).
D
dangqingqing 已提交
232 233

    Example usage:
M
minqiyang 已提交
234

D
dangqingqing 已提交
235
    .. code-block:: python
L
Luo Tao 已提交
236

D
dangqingqing 已提交
237 238 239
        im = load_image('cat.jpg')
        im = resize_short(im, 256)
        im = to_chw(im)
M
minqiyang 已提交
240

D
dangqingqing 已提交
241 242 243
    :param im: the input image with HWC layout.
    :type im: ndarray
    :param order: the transposed order.
M
minqiyang 已提交
244
    :type order: tuple|list
D
dangqingqing 已提交
245 246 247 248 249 250 251 252 253 254 255
    """
    assert len(im.shape) == len(order)
    im = im.transpose(order)
    return im


def center_crop(im, size, is_color=True):
    """
    Crop the center of image with size.

    Example usage:
M
minqiyang 已提交
256

D
dangqingqing 已提交
257
    .. code-block:: python
L
Luo Tao 已提交
258

D
dangqingqing 已提交
259
        im = center_crop(im, 224)
M
minqiyang 已提交
260

D
dangqingqing 已提交
261 262
    :param im: the input image with HWC layout.
    :type im: ndarray
D
dangqingqing 已提交
263
    :param size: the cropping size.
D
dangqingqing 已提交
264 265 266 267 268
    :type size: int
    :param is_color: whether the image is color or not.
    :type is_color: bool
    """
    h, w = im.shape[:2]
M
minqiyang 已提交
269 270
    h_start = (h - size) // 2
    w_start = (w - size) // 2
D
dangqingqing 已提交
271 272 273 274 275 276 277 278 279 280 281 282 283
    h_end, w_end = h_start + size, w_start + size
    if is_color:
        im = im[h_start:h_end, w_start:w_end, :]
    else:
        im = im[h_start:h_end, w_start:w_end]
    return im


def random_crop(im, size, is_color=True):
    """
    Randomly crop input image with size.

    Example usage:
M
minqiyang 已提交
284

D
dangqingqing 已提交
285
    .. code-block:: python
L
Luo Tao 已提交
286

D
dangqingqing 已提交
287
        im = random_crop(im, 224)
M
minqiyang 已提交
288

D
dangqingqing 已提交
289 290
    :param im: the input image with HWC layout.
    :type im: ndarray
D
dangqingqing 已提交
291
    :param size: the cropping size.
D
dangqingqing 已提交
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
    :type size: int
    :param is_color: whether the image is color or not.
    :type is_color: bool
    """
    h, w = im.shape[:2]
    h_start = np.random.randint(0, h - size + 1)
    w_start = np.random.randint(0, w - size + 1)
    h_end, w_end = h_start + size, w_start + size
    if is_color:
        im = im[h_start:h_end, w_start:w_end, :]
    else:
        im = im[h_start:h_end, w_start:w_end]
    return im


Q
qingqing01 已提交
307
def left_right_flip(im, is_color=True):
D
dangqingqing 已提交
308 309 310 311 312
    """
    Flip an image along the horizontal direction.
    Return the flipped image.

    Example usage:
M
minqiyang 已提交
313

D
dangqingqing 已提交
314
    .. code-block:: python
L
Luo Tao 已提交
315

D
dangqingqing 已提交
316
        im = left_right_flip(im)
M
minqiyang 已提交
317

Q
qingqing01 已提交
318
    :param im: input image with HWC layout or HW layout for gray image
D
dangqingqing 已提交
319
    :type im: ndarray
Q
qingqing01 已提交
320
    :param is_color: whether input image is color or not
Q
qingqing01 已提交
321
    :type is_color: bool
D
dangqingqing 已提交
322
    """
Q
qingqing01 已提交
323
    if len(im.shape) == 3 and is_color:
D
dangqingqing 已提交
324 325
        return im[:, ::-1, :]
    else:
Q
qingqing01 已提交
326
        return im[:, ::-1]
D
dangqingqing 已提交
327 328


D
dangqingqing 已提交
329 330 331 332 333 334
def simple_transform(im,
                     resize_size,
                     crop_size,
                     is_train,
                     is_color=True,
                     mean=None):
D
dangqingqing 已提交
335
    """
D
dangqingqing 已提交
336
    Simply data argumentation for training. These operations include
D
dangqingqing 已提交
337 338
    resizing, croping and flipping.

D
dangqingqing 已提交
339
    Example usage:
M
minqiyang 已提交
340

D
dangqingqing 已提交
341
    .. code-block:: python
L
Luo Tao 已提交
342

D
dangqingqing 已提交
343 344
        im = simple_transform(im, 256, 224, True)

D
dangqingqing 已提交
345 346 347 348 349 350 351 352
    :param im: The input image with HWC layout.
    :type im: ndarray
    :param resize_size: The shorter edge length of the resized image.
    :type resize_size: int
    :param crop_size: The cropping size.
    :type crop_size: int
    :param is_train: Whether it is training or not.
    :type is_train: bool
L
Luo Tao 已提交
353 354
    :param is_color: whether the image is color or not.
    :type is_color: bool
M
minqiyang 已提交
355
    :param mean: the mean values, which can be element-wise mean values or
L
Luo Tao 已提交
356 357
                 mean values per channel.
    :type mean: numpy array | list
D
dangqingqing 已提交
358 359 360
    """
    im = resize_short(im, resize_size)
    if is_train:
Y
yeyupiaoling 已提交
361
        im = random_crop(im, crop_size, is_color=is_color)
D
dangqingqing 已提交
362
        if np.random.randint(2) == 0:
Q
qingqing01 已提交
363
            im = left_right_flip(im, is_color)
D
dangqingqing 已提交
364
    else:
Y
yeyupiaoling 已提交
365
        im = center_crop(im, crop_size, is_color=is_color)
D
dangqingqing 已提交
366 367 368 369 370 371
    if len(im.shape) == 3:
        im = to_chw(im)

    im = im.astype('float32')
    if mean is not None:
        mean = np.array(mean, dtype=np.float32)
M
minqiyang 已提交
372
        # mean value, may be one value per channel
Q
qingqing01 已提交
373
        if mean.ndim == 1 and is_color:
D
dangqingqing 已提交
374
            mean = mean[:, np.newaxis, np.newaxis]
Q
qingqing01 已提交
375 376
        elif mean.ndim == 1:
            mean = mean
D
dangqingqing 已提交
377 378 379 380
        else:
            # elementwise mean
            assert len(mean.shape) == len(im)
        im -= mean
D
dangqingqing 已提交
381 382 383 384 385 386 387 388

    return im


def load_and_transform(filename,
                       resize_size,
                       crop_size,
                       is_train,
D
dangqingqing 已提交
389 390
                       is_color=True,
                       mean=None):
D
dangqingqing 已提交
391 392
    """
    Load image from the input file `filename` and transform image for
D
dangqingqing 已提交
393 394
    data argumentation. Please refer to the `simple_transform` interface
    for the transform operations.
D
dangqingqing 已提交
395

D
dangqingqing 已提交
396
    Example usage:
M
minqiyang 已提交
397

D
dangqingqing 已提交
398
    .. code-block:: python
L
Luo Tao 已提交
399

D
dangqingqing 已提交
400 401
        im = load_and_transform('cat.jpg', 256, 224, True)

D
dangqingqing 已提交
402 403 404 405 406 407 408 409
    :param filename: The file name of input image.
    :type filename: string
    :param resize_size: The shorter edge length of the resized image.
    :type resize_size: int
    :param crop_size: The cropping size.
    :type crop_size: int
    :param is_train: Whether it is training or not.
    :type is_train: bool
L
Luo Tao 已提交
410 411
    :param is_color: whether the image is color or not.
    :type is_color: bool
M
minqiyang 已提交
412
    :param mean: the mean values, which can be element-wise mean values or
L
Luo Tao 已提交
413 414
                 mean values per channel.
    :type mean: numpy array | list
D
dangqingqing 已提交
415
    """
Q
qingqing01 已提交
416
    im = load_image(filename, is_color)
D
dangqingqing 已提交
417
    im = simple_transform(im, resize_size, crop_size, is_train, is_color, mean)
D
dangqingqing 已提交
418
    return im