readers.py

#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

import os
import sys
import cv2
import numpy as np
import six
import glob
from .data_path_utils import _find_classes
from PIL import Image
import paddlex.utils.logging as logging


def resize_short(img, target_size, interpolation=None):
    """resize image

    Args:
        img: image data
        target_size: resize short target size
        interpolation: interpolation mode

    Returns:
        resized image data
    """
    percent = float(target_size) / min(img.shape[0], img.shape[1])
    resized_width = int(round(img.shape[1] * percent))
    resized_height = int(round(img.shape[0] * percent))
    if interpolation:
        resized = cv2.resize(
            img, (resized_width, resized_height), interpolation=interpolation)
    else:
        resized = cv2.resize(img, (resized_width, resized_height))
    return resized


def crop_image(img, target_size, center=True):
    """crop image

    Args:
        img: images data
        target_size: crop target size
        center: crop mode

    Returns:
        img: cropped image data
    """
    height, width = img.shape[:2]
    size = target_size
    if center:
        w_start = (width - size) // 2
        h_start = (height - size) // 2
    else:
        w_start = np.random.randint(0, width - size + 1)
        h_start = np.random.randint(0, height - size + 1)
    w_end = w_start + size
    h_end = h_start + size
    img = img[h_start:h_end, w_start:w_end, :]
    return img


def preprocess_image(img, random_mirror=False):
    """
    centered, scaled by 1/255.
    :param img: np.array: shape: [ns, h, w, 3], color order: rgb.
    :return: np.array: shape: [ns, h, w, 3]
    """
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    # transpose to [ns, 3, h, w]
    img = img.astype('float32').transpose((0, 3, 1, 2)) / 255

    img_mean = np.array(mean).reshape((3, 1, 1))
    img_std = np.array(std).reshape((3, 1, 1))
    img -= img_mean
    img /= img_std

    if random_mirror:
        mirror = int(np.random.uniform(0, 2))
        if mirror == 1:
            img = img[:, :, ::-1, :]

    return img


def read_image(img_path, target_size=256, crop_size=224):
    """
    resize_short to 256, then center crop to 224.
    :param img_path: one image path
    :return: np.array: shape: [1, h, w, 3], color order: rgb.
    """

    if isinstance(img_path, str):
        with open(img_path, 'rb') as f:
            img = Image.open(f)
            img = img.convert('RGB')
            img = np.array(img)
            # img = cv2.imread(img_path)

            img = resize_short(img, target_size, interpolation=None)
            img = crop_image(img, target_size=crop_size, center=True)
            # img = img[:, :, ::-1]
            img = np.expand_dims(img, axis=0)
            return img
    elif isinstance(img_path, np.ndarray):
        assert len(img_path.shape) == 4
        return img_path
    else:
        ValueError("Not recognized data type {}.".format(type(img_path)))


class ReaderConfig(object):
    """
    A generic data loader where the images are arranged in this way:

        root/train/dog/xxy.jpg
        root/train/dog/xxz.jpg
        ...
        root/train/cat/nsdf3.jpg
        root/train/cat/asd932_.jpg
        ...

        root/test/dog/xxx.jpg
        ...
        root/test/cat/123.jpg
        ...

    """
    def __init__(self, dataset_dir, is_test):
        image_paths, labels, self.num_classes = self.get_dataset_info(dataset_dir, is_test)
        random_per = np.random.permutation(range(len(image_paths)))
        self.image_paths = image_paths[random_per]
        self.labels = labels[random_per]
        self.is_test = is_test

    def get_reader(self):
        def reader():
            IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
            target_size = 256
            crop_size = 224

            for i, img_path in enumerate(self.image_paths):
                if not img_path.lower().endswith(IMG_EXTENSIONS):
                    continue

                img = cv2.imread(img_path)
                if img is None:
                    logging.info(img_path)
                    continue
                img = resize_short(img, target_size, interpolation=None)
                img = crop_image(img, crop_size, center=self.is_test)
                img = img[:, :, ::-1]
                img = np.expand_dims(img, axis=0)

                img = preprocess_image(img, not self.is_test)

                yield img, self.labels[i]

        return reader

    def get_dataset_info(self, dataset_dir, is_test=False):
        IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')

        # read
        if is_test:
            datasubset_dir = os.path.join(dataset_dir, 'test')
        else:
            datasubset_dir = os.path.join(dataset_dir, 'train')

        class_names, class_to_idx = _find_classes(datasubset_dir)
        # num_classes = len(class_names)
        image_paths = []
        labels = []
        for class_name in class_names:
            classes_dir = os.path.join(datasubset_dir, class_name)
            for img_path in glob.glob(os.path.join(classes_dir, '*')):
                if not img_path.lower().endswith(IMG_EXTENSIONS):
                    continue

                image_paths.append(img_path)
                labels.append(class_to_idx[class_name])

        image_paths = np.array(image_paths)
        labels = np.array(labels)
        return image_paths, labels, len(class_names)


def create_reader(list_image_path, list_label=None, is_test=False):
    def reader():
        IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
        target_size = 256
        crop_size = 224

        for i, img_path in enumerate(list_image_path):
            if not img_path.lower().endswith(IMG_EXTENSIONS):
                continue

            img = cv2.imread(img_path)
            if img is None:
                logging.info(img_path)
                continue

            img = resize_short(img, target_size, interpolation=None)
            img = crop_image(img, crop_size, center=is_test)
            img = img[:, :, ::-1]
            img_show = np.expand_dims(img, axis=0)

            img = preprocess_image(img_show, not is_test)

            label = 0 if list_label is None else list_label[i]

            yield img_show, img, label

    return reader