reader.py 3.8 KB
Newer Older
J
jerrywgz 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.utils.image_util import *
import random
from PIL import Image
from PIL import ImageDraw
import numpy as np
import xml.etree.ElementTree
import os
import time
import copy
import six

from roidbs import JsonDataset
import data_utils


class Settings(object):
    def __init__(self, args=None):
        for arg, value in sorted(six.iteritems(vars(args))):
            setattr(self, arg, value)

        if 'coco2014' in args.dataset:
            self.class_nums = 81
            self.train_file_list = 'annotations/instances_train2014.json'
            self.train_data_dir = 'train2014'
            self.val_file_list = 'annotations/instances_val2014.json'
            self.val_data_dir = 'val2014'
        elif 'coco2017' in args.dataset:
            self.class_nums = 81
            self.train_file_list = 'annotations/instances_train2017.json'
            self.train_data_dir = 'train2017'
            self.val_file_list = 'annotations/instances_val2017.json'
            self.val_data_dir = 'val2017'
        else:
            raise NotImplementedError('Dataset {} not supported'.format(
                self.dataset))
        self.mean_value = np.array(self.mean_value)[
            np.newaxis, np.newaxis, :].astype('float32')


54
def coco(settings, mode, batch_size=None, shuffle=False):
J
jerrywgz 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    if mode == 'train':
        settings.train_file_list = os.path.join(settings.data_dir,
                                                settings.train_file_list)
        settings.train_data_dir = os.path.join(settings.data_dir,
                                               settings.train_data_dir)
    elif mode == 'test':
        settings.val_file_list = os.path.join(settings.data_dir,
                                              settings.val_file_list)
        settings.val_data_dir = os.path.join(settings.data_dir,
                                             settings.val_data_dir)
    json_dataset = JsonDataset(settings, train=(mode == 'train'))
    roidbs = json_dataset.get_roidb()

    print("{} on {} with {} roidbs".format(mode, settings.dataset, len(roidbs)))

    def reader():
        if mode == "train" and shuffle:
            random.shuffle(roidbs)
73
        batch_out = []
J
jerrywgz 已提交
74 75
        for roidb in roidbs:
            im, im_scales = data_utils.get_image_blob(roidb, settings)
J
jerrywgz 已提交
76 77
            im_id = roidb['id']

J
jerrywgz 已提交
78 79 80 81 82 83 84
            im_height = np.round(roidb['height'] * im_scales)
            im_width = np.round(roidb['width'] * im_scales)
            im_info = np.array(
                [im_height, im_width, im_scales], dtype=np.float32)
            gt_boxes = roidb['gt_boxes'].astype('float32')
            gt_classes = roidb['gt_classes'].astype('int32')
            is_crowd = roidb['is_crowd'].astype('int32')
J
jerrywgz 已提交
85
            if mode == 'train' and gt_boxes.shape[0] == 0:
J
jerrywgz 已提交
86
                continue
J
jerrywgz 已提交
87 88
            batch_out.append(
                (im, gt_boxes, gt_classes, is_crowd, im_info, im_id))
89 90 91
            if len(batch_out) == batch_size:
                yield batch_out
                batch_out = []
J
jerrywgz 已提交
92 93 94 95

    return reader


96 97
def train(settings, batch_size, shuffle=True):
    return coco(settings, 'train', batch_size, shuffle)
J
jerrywgz 已提交
98 99


100 101
def test(settings, batch_size):
    return coco(settings, 'test', batch_size, shuffle=False)