reader.py

# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import image_util
from paddle.utils.image_util import *
import random
from PIL import Image
from PIL import ImageDraw
import numpy as np
import xml.etree.ElementTree
import os
import time
import copy


class Settings(object):
    def __init__(self, dataset, toy, data_dir, label_file, resize_h, resize_w,
                 mean_value, apply_distort, apply_expand):
        self._dataset = dataset
        self._toy = toy
        self._data_dir = data_dir
        if dataset == "pascalvoc":
            self._label_list = []
            label_fpath = os.path.join(data_dir, label_file)
            for line in open(label_fpath):
                self._label_list.append(line.strip())

        self._apply_distort = apply_distort
        self._apply_expand = apply_expand
        self._resize_height = resize_h
        self._resize_width = resize_w
        self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype(
            'float32')
        self._expand_prob = 0.5
        self._expand_max_ratio = 4
        self._hue_prob = 0.5
        self._hue_delta = 18
        self._contrast_prob = 0.5
        self._contrast_delta = 0.5
        self._saturation_prob = 0.5
        self._saturation_delta = 0.5
        self._brightness_prob = 0.5
        self._brightness_delta = 0.125

    @property
    def dataset(self):
        return self._dataset

    @property
    def toy(self):
        return self._toy

    @property
    def apply_distort(self):
        return self._apply_expand

    @property
    def apply_distort(self):
        return self._apply_distort

    @property
    def data_dir(self):
        return self._data_dir

    @data_dir.setter
    def data_dir(self, data_dir):
        self._data_dir = data_dir

    @property
    def label_list(self):
        return self._label_list

    @property
    def resize_h(self):
        return self._resize_height

    @property
    def resize_w(self):
        return self._resize_width

    @property
    def img_mean(self):
        return self._img_mean


def _reader_creator(settings, file_list, mode, shuffle):
    def reader():
        if settings.dataset == 'coco':
            # cocoapi 
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            coco = COCO(file_list)
            image_ids = coco.getImgIds()
            images = coco.loadImgs(image_ids)
            category_ids = coco.getCatIds()
            category_names = [
                item['name'] for item in coco.loadCats(category_ids)
            ]
        elif settings.dataset == 'pascalvoc':
            flist = open(file_list)
            images = [line.strip() for line in flist]

        if not settings.toy == 0:
            images = images[:settings.toy] if len(
                images) > settings.toy else images
        print("{} on {} with {} images".format(mode, settings.dataset,
                                               len(images)))

        if shuffle:
            random.shuffle(images)

        for image in images:
            if settings.dataset == 'coco':
                image_name = image['file_name']
                image_path = os.path.join(settings.data_dir, image_name)
            elif settings.dataset == 'pascalvoc':
                if mode == 'train' or mode == 'test':
                    image_path, label_path = image.split()
                    image_path = os.path.join(settings.data_dir, image_path)
                    label_path = os.path.join(settings.data_dir, label_path)
                elif mode == 'infer':
                    image_path = os.path.join(settings.data_dir, image)

            img = Image.open(image_path)
            if img.mode == 'L':
                img = img.convert('RGB')
            img_width, img_height = img.size

            if mode == 'train' or mode == 'test':
                if settings.dataset == 'coco':
                    # layout: category_id | xmin | ymin | xmax | ymax | iscrowd | origin_coco_bbox | segmentation | area | image_id | annotation_id
                    bbox_labels = []
                    annIds = coco.getAnnIds(imgIds=image['id'])
                    anns = coco.loadAnns(annIds)
                    for ann in anns:
                        bbox_sample = []
                        # start from 1, leave 0 to background
                        bbox_sample.append(
                            float(category_ids.index(ann['category_id'])) + 1)
                        bbox = ann['bbox']
                        xmin, ymin, w, h = bbox
                        xmax = xmin + w
                        ymax = ymin + h
                        bbox_sample.append(float(xmin) / img_width)
                        bbox_sample.append(float(ymin) / img_height)
                        bbox_sample.append(float(xmax) / img_width)
                        bbox_sample.append(float(ymax) / img_height)
                        bbox_sample.append(float(ann['iscrowd']))
                        #bbox_sample.append(ann['bbox'])
                        #bbox_sample.append(ann['segmentation'])
                        #bbox_sample.append(ann['area'])
                        #bbox_sample.append(ann['image_id'])
                        #bbox_sample.append(ann['id'])
                        bbox_labels.append(bbox_sample)
                elif settings.dataset == 'pascalvoc':
                    # layout: label | xmin | ymin | xmax | ymax | difficult
                    bbox_labels = []
                    root = xml.etree.ElementTree.parse(label_path).getroot()
                    for object in root.findall('object'):
                        bbox_sample = []
                        # start from 1
                        bbox_sample.append(
                            float(
                                settings.label_list.index(
                                    object.find('name').text)))
                        bbox = object.find('bndbox')
                        difficult = float(object.find('difficult').text)
                        bbox_sample.append(
                            float(bbox.find('xmin').text) / img_width)
                        bbox_sample.append(
                            float(bbox.find('ymin').text) / img_height)
                        bbox_sample.append(
                            float(bbox.find('xmax').text) / img_width)
                        bbox_sample.append(
                            float(bbox.find('ymax').text) / img_height)
                        bbox_sample.append(difficult)
                        bbox_labels.append(bbox_sample)

                sample_labels = bbox_labels
                if mode == 'train':
                    if settings._apply_distort:
                        img = image_util.distort_image(img, settings)
                    if settings._apply_expand:
                        img, bbox_labels, img_width, img_height = image_util.expand_image(
                            img, bbox_labels, img_width, img_height, settings)
                    batch_sampler = []
                    # hard-code here
                    batch_sampler.append(
                        image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))
                    batch_sampler.append(
                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))
                    batch_sampler.append(
                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))
                    batch_sampler.append(
                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))
                    batch_sampler.append(
                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))
                    batch_sampler.append(
                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
                    batch_sampler.append(
                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
                    """ random crop """
                    sampled_bbox = image_util.generate_batch_samples(
                        batch_sampler, bbox_labels, img_width, img_height)

                    img = np.array(img)
                    if len(sampled_bbox) > 0:
                        idx = int(random.uniform(0, len(sampled_bbox)))
                        img, sample_labels = image_util.crop_image(
                            img, bbox_labels, sampled_bbox[idx], img_width,
                            img_height)

                    img = Image.fromarray(img)
            img = img.resize((settings.resize_w, settings.resize_h),
                             Image.ANTIALIAS)
            img = np.array(img)

            if mode == 'train':
                mirror = int(random.uniform(0, 2))
                if mirror == 1:
                    img = img[:, ::-1, :]
                    for i in xrange(len(sample_labels)):
                        tmp = sample_labels[i][1]
                        sample_labels[i][1] = 1 - sample_labels[i][3]
                        sample_labels[i][3] = 1 - tmp

            # HWC to CHW
            if len(img.shape) == 3:
                img = np.swapaxes(img, 1, 2)
                img = np.swapaxes(img, 1, 0)
            # RBG to BGR
            img = img[[2, 1, 0], :, :]
            img = img.astype('float32')
            img -= settings.img_mean
            img = img.flatten()
            img = img * 0.007843

            sample_labels = np.array(sample_labels)
            if mode == 'train' or mode == 'test':
                if mode == 'train' and len(sample_labels) == 0: continue
                if mode == 'test' and len(sample_labels) == 0: continue
                yield img.astype(
                    'float32'
                ), sample_labels[:, 1:5], sample_labels[:, 0].astype(
                    'int32'), sample_labels[:, -1].astype('int32')
            elif mode == 'infer':
                yield img.astype('float32')

    return reader


def draw_bounding_box_on_image(image,
                               sample_labels,
                               image_name,
                               category_names,
                               color='red',
                               thickness=4,
                               with_text=True,
                               normalized=True):
    image = Image.fromarray(image)
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    if not normalized:
        im_width, im_height = 1, 1
    for item in sample_labels:
        label = item[0]
        category_name = category_names[int(label)]
        bbox = item[1:5]
        xmin, ymin, xmax, ymax = bbox
        (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                      ymin * im_height, ymax * im_height)
        draw.line(
            [(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)
        if with_text:
            if image.mode == 'RGB':
                draw.text((left, top), category_name, (255, 255, 0))
    image.save(image_name)


def train(settings, file_list, shuffle=True):
    file_list = os.path.join(settings.data_dir, file_list)
    if settings.dataset == 'coco':
        train_settings = copy.copy(settings)
        if '2014' in file_list:
            sub_dir = "train2014"
        elif '2017' in file_list:
            sub_dir = "train2017"
        train_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
        return _reader_creator(train_settings, file_list, 'train', shuffle)
    elif settings.dataset == 'pascalvoc':
        return _reader_creator(settings, file_list, 'train', shuffle)


def test(settings, file_list):
    file_list = os.path.join(settings.data_dir, file_list)
    if settings.dataset == 'coco':
        test_settings = copy.copy(settings)
        if '2014' in file_list:
            sub_dir = "val2014"
        elif '2017' in file_list:
            sub_dir = "val2017"
        test_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
        return _reader_creator(test_settings, file_list, 'test', False)
    elif settings.dataset == 'pascalvoc':
        return _reader_creator(settings, file_list, 'test', False)


def infer(settings, file_list):
    return _reader_creator(settings, file_list, 'infer', False)