data_feed.py 35.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import os
import inspect

from ppdet.core.workspace import register, serializable
from ppdet.utils.download import get_dataset_path

from ppdet.data.reader import Reader
# XXX these are for triggering the decorator
from ppdet.data.transform.operators import (
    DecodeImage, MixupImage, NormalizeBox, NormalizeImage, RandomDistort,
    RandomFlipImage, RandomInterpImage, ResizeImage, ExpandImage, CropImage,
    Permute)
31

32
from ppdet.data.transform.arrange_sample import (
W
wangguanzhong 已提交
33 34
    ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD,
    ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO)
35 36 37 38 39 40 41 42 43 44

__all__ = [
    'PadBatch', 'MultiScale', 'RandomShape', 'DataSet', 'CocoDataSet',
    'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed',
    'MaskRCNNTrainFeed', 'FasterRCNNTestFeed', 'MaskRCNNTestFeed',
    'SSDTrainFeed', 'SSDEvalFeed', 'SSDTestFeed', 'YoloTrainFeed',
    'YoloEvalFeed', 'YoloTestFeed', 'create_reader'
]


45
def _prepare_data_config(feed, args_path):
46 47
    # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory
    # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download
W
wangguanzhong 已提交
48 49
    dataset_home = args_path if args_path else feed.dataset.dataset_dir
    if dataset_home:
50 51
        annotation = getattr(feed.dataset, 'annotation', None)
        image_dir = getattr(feed.dataset, 'image_dir', None)
W
wangguanzhong 已提交
52
        dataset_dir = get_dataset_path(dataset_home, annotation, image_dir)
53 54 55 56
        if annotation:
            feed.dataset.annotation = os.path.join(dataset_dir, annotation)
        if image_dir:
            feed.dataset.image_dir = os.path.join(dataset_dir, image_dir)
57 58 59 60 61 62

    mixup_epoch = -1
    if getattr(feed, 'mixup_epoch', None) is not None:
        mixup_epoch = feed.mixup_epoch

    data_config = {
63 64 65 66 67 68 69 70
        'ANNO_FILE': feed.dataset.annotation,
        'IMAGE_DIR': feed.dataset.image_dir,
        'USE_DEFAULT_LABEL': feed.dataset.use_default_label,
        'IS_SHUFFLE': feed.shuffle,
        'SAMPLES': feed.samples,
        'WITH_BACKGROUND': feed.with_background,
        'MIXUP_EPOCH': mixup_epoch,
        'TYPE': type(feed.dataset).__source__
71
    }
Y
Yang Zhang 已提交
72

K
Kaipeng Deng 已提交
73
    if len(getattr(feed.dataset, 'images', [])) > 0:
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
        data_config['IMAGES'] = feed.dataset.images

    return data_config


def create_reader(feed, max_iter=0, args_path=None, my_source=None):
    """
    Return iterable data reader.

    Args:
        max_iter (int): number of iterations.
        my_source (callable): callable function to create a source iterator
            which is used to provide source data in 'ppdet.data.reader'
    """

    # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory
    # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download
    data_config = _prepare_data_config(feed, args_path)
92

W
walloollaw 已提交
93 94 95
    bufsize = getattr(feed, 'bufsize', 10)
    use_process = getattr(feed, 'use_process', False)
    memsize = getattr(feed, 'memsize', '3G')
96 97 98 99
    transform_config = {
        'WORKER_CONF': {
            'bufsize': bufsize,
            'worker_num': feed.num_workers,
W
walloollaw 已提交
100 101
            'use_process': use_process,
            'memsize': memsize
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
        },
        'BATCH_SIZE': feed.batch_size,
        'DROP_LAST': feed.drop_last,
        'USE_PADDED_IM_INFO': feed.use_padded_im_info,
    }

    batch_transforms = feed.batch_transforms
    pad = [t for t in batch_transforms if isinstance(t, PadBatch)]
    rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)]
    multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)]

    if any(pad):
        transform_config['IS_PADDING'] = True
        if pad[0].pad_to_stride != 0:
            transform_config['COARSEST_STRIDE'] = pad[0].pad_to_stride
    if any(rand_shape):
        transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes
    if any(multi_scale):
        transform_config['MULTI_SCALES'] = multi_scale[0].scales

    if hasattr(inspect, 'getfullargspec'):
        argspec = inspect.getfullargspec
    else:
        argspec = inspect.getargspec

    ops = []
    for op in feed.sample_transforms:
        op_dict = op.__dict__.copy()
        argnames = [
            arg for arg in argspec(type(op).__init__).args if arg != 'self'
        ]
        op_dict = {k: v for k, v in op_dict.items() if k in argnames}
        op_dict['op'] = op.__class__.__name__
        ops.append(op_dict)
    transform_config['OPS'] = ops

138 139
    return Reader.create(feed.mode, data_config, transform_config, max_iter,
                         my_source)
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254


# XXX batch transforms are only stubs for now, actually handled by `post_map`
@serializable
class PadBatch(object):
    """
    Pad a batch of samples to same dimensions

    Args:
        pad_to_stride (int): pad to multiple of strides, e.g., 32
    """

    def __init__(self, pad_to_stride=0):
        super(PadBatch, self).__init__()
        self.pad_to_stride = pad_to_stride


@serializable
class MultiScale(object):
    """
    Randomly resize image by scale

    Args:
        scales (list): list of int, randomly resize to one of these scales
    """

    def __init__(self, scales=[]):
        super(MultiScale, self).__init__()
        self.scales = scales


@serializable
class RandomShape(object):
    """
    Randomly reshape a batch

    Args:
        sizes (list): list of int, random choose a size from these
    """

    def __init__(self, sizes=[]):
        super(RandomShape, self).__init__()
        self.sizes = sizes


@serializable
class DataSet(object):
    """
    Dataset, e.g., coco, pascal voc

    Args:
        annotation (str): annotation file path
        image_dir (str): directory where image files are stored
        shuffle (bool): shuffle samples
    """
    __source__ = 'RoiDbSource'

    def __init__(self,
                 annotation,
                 image_dir,
                 dataset_dir=None,
                 use_default_label=None):
        super(DataSet, self).__init__()
        self.dataset_dir = dataset_dir
        self.annotation = annotation
        self.image_dir = image_dir
        self.use_default_label = use_default_label


COCO_DATASET_DIR = 'coco'
COCO_TRAIN_ANNOTATION = 'annotations/instances_train2017.json'
COCO_TRAIN_IMAGE_DIR = 'train2017'
COCO_VAL_ANNOTATION = 'annotations/instances_val2017.json'
COCO_VAL_IMAGE_DIR = 'val2017'


@serializable
class CocoDataSet(DataSet):
    def __init__(self,
                 dataset_dir=COCO_DATASET_DIR,
                 annotation=COCO_TRAIN_ANNOTATION,
                 image_dir=COCO_TRAIN_IMAGE_DIR):
        super(CocoDataSet, self).__init__(
            dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir)


VOC_DATASET_DIR = 'pascalvoc'
VOC_TRAIN_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/train.txt'
VOC_VAL_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/val.txt'
VOC_TEST_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/test.txt'
VOC_IMAGE_DIR = 'VOCdevkit/VOC_all/JPEGImages'
VOC_USE_DEFAULT_LABEL = None


@serializable
class VocDataSet(DataSet):
    __source__ = 'VOCSource'

    def __init__(self,
                 dataset_dir=VOC_DATASET_DIR,
                 annotation=VOC_TRAIN_ANNOTATION,
                 image_dir=VOC_IMAGE_DIR,
                 use_default_label=VOC_USE_DEFAULT_LABEL):
        super(VocDataSet, self).__init__(
            dataset_dir=dataset_dir,
            annotation=annotation,
            image_dir=image_dir,
            use_default_label=use_default_label)


@serializable
class SimpleDataSet(DataSet):
    __source__ = 'SimpleSource'

    def __init__(self,
K
Kaipeng Deng 已提交
255 256 257 258
                 dataset_dir=None,
                 annotation=None,
                 image_dir=None,
                 use_default_label=None):
259 260
        super(SimpleDataSet, self).__init__(
            dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir)
K
Kaipeng Deng 已提交
261 262 263 264
        self.images = []

    def add_images(self, images):
        self.images.extend(images)
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281


@serializable
class DataFeed(object):
    """
    DataFeed encompasses all data loading related settings

    Args:
        dataset (object): a `Dataset` instance
        fields (list): list of data fields needed
        image_shape (list): list of image dims (C, MAX_DIM, MIN_DIM)
        sample_transforms (list): list of sample transformations to use
        batch_transforms (list): list of batch transformations to use
        batch_size (int): number of images per device
        shuffle (bool): if samples should be shuffled
        drop_last (bool): drop last batch if size is uneven
        num_workers (int): number of workers processes (or threads)
W
walloollaw 已提交
282 283 284 285
        bufsize (int): size of queue used to buffer results from workers
        use_process (bool): use process or thread as workers
        memsize (str): size of shared memory used in result queue
                        when 'use_process' is True, default to '3G'
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    """
    __category__ = 'data'

    def __init__(self,
                 dataset,
                 fields,
                 image_shape,
                 sample_transforms=None,
                 batch_transforms=None,
                 batch_size=1,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 with_background=True,
                 num_workers=2,
                 bufsize=10,
                 use_process=False,
W
walloollaw 已提交
303
                 memsize=None,
304 305 306 307 308 309 310 311 312 313 314 315 316 317
                 use_padded_im_info=False):
        super(DataFeed, self).__init__()
        self.fields = fields
        self.image_shape = image_shape
        self.sample_transforms = sample_transforms
        self.batch_transforms = batch_transforms
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.samples = samples
        self.drop_last = drop_last
        self.with_background = with_background
        self.num_workers = num_workers
        self.bufsize = bufsize
        self.use_process = use_process
W
walloollaw 已提交
318
        self.memsize = memsize
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
        self.dataset = dataset
        self.use_padded_im_info = use_padded_im_info
        if isinstance(dataset, dict):
            self.dataset = DataSet(**dataset)


# for custom (i.e., Non-preset) datasets
@register
class TrainFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset,
                 fields,
                 image_shape,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=True,
                 samples=-1,
                 drop_last=False,
                 with_background=True,
                 num_workers=2,
                 bufsize=10,
W
walloollaw 已提交
343 344
                 use_process=True,
                 memsize=None):
345 346 347 348 349 350 351 352 353 354 355 356 357
        super(TrainFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            with_background=with_background,
            num_workers=num_workers,
            bufsize=bufsize,
W
walloollaw 已提交
358 359
            use_process=use_process,
            memsize=memsize)
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419


@register
class EvalFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset,
                 fields,
                 image_shape,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 with_background=True,
                 num_workers=2):
        super(EvalFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            with_background=with_background,
            num_workers=num_workers)


@register
class TestFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset,
                 fields,
                 image_shape,
                 sample_transforms=[],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 drop_last=False,
                 with_background=True,
                 num_workers=2):
        super(TestFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            drop_last=drop_last,
            with_background=with_background,
            num_workers=num_workers)


420
# yapf: disable
421 422 423 424 425 426 427 428 429 430
@register
class FasterRCNNTrainFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=CocoDataSet().__dict__,
                 fields=[
                     'image', 'im_info', 'im_id', 'gt_box', 'gt_label',
                     'is_crowd'
                 ],
431
                 image_shape=[3, 800, 1333],
432
                 sample_transforms=[
433 434 435 436 437 438 439 440
                     DecodeImage(to_rgb=True),
                     RandomFlipImage(prob=0.5),
                     NormalizeImage(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225],
                                    is_scale=True,
                                    is_channel_first=False),
                     ResizeImage(target_size=800, max_size=1333, interp=1),
                     Permute(to_bgr=False)
441 442 443 444 445 446
                 ],
                 batch_transforms=[PadBatch()],
                 batch_size=1,
                 shuffle=True,
                 samples=-1,
                 drop_last=False,
W
walloollaw 已提交
447
                 bufsize=10,
448
                 num_workers=2,
W
walloollaw 已提交
449 450
                 use_process=False,
                 memsize=None):
451 452 453 454 455 456 457 458 459 460 461 462 463
        # XXX this should be handled by the data loader, since `fields` is
        # given, just collect them
        sample_transforms.append(ArrangeRCNN())
        super(FasterRCNNTrainFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
W
walloollaw 已提交
464
            bufsize=bufsize,
465
            num_workers=num_workers,
W
walloollaw 已提交
466 467
            use_process=use_process,
            memsize=memsize)
468 469 470 471 472
        # XXX these modes should be unified
        self.mode = 'TRAIN'


@register
Y
Yang Zhang 已提交
473
class FasterRCNNEvalFeed(DataFeed):
474 475 476
    __doc__ = DataFeed.__doc__

    def __init__(self,
Y
Yang Zhang 已提交
477 478
                 dataset=CocoDataSet(COCO_VAL_ANNOTATION,
                                     COCO_VAL_IMAGE_DIR).__dict__,
W
wangguanzhong 已提交
479 480
                 fields=['image', 'im_info', 'im_id', 'im_shape', 'gt_box',
                         'gt_label', 'is_difficult'],
481
                 image_shape=[3, 800, 1333],
482
                 sample_transforms=[
483 484 485 486 487
                     DecodeImage(to_rgb=True),
                     NormalizeImage(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225],
                                    is_scale=True,
                                    is_channel_first=False),
Y
Yang Zhang 已提交
488 489
                     ResizeImage(target_size=800, max_size=1333, interp=1),
                     Permute(to_bgr=False)
490 491 492
                 ],
                 batch_transforms=[PadBatch()],
                 batch_size=1,
Y
Yang Zhang 已提交
493
                 shuffle=False,
494 495 496
                 samples=-1,
                 drop_last=False,
                 num_workers=2,
Y
Yang Zhang 已提交
497
                 use_padded_im_info=True):
W
wangguanzhong 已提交
498
        sample_transforms.append(ArrangeEvalRCNN())
Y
Yang Zhang 已提交
499
        super(FasterRCNNEvalFeed, self).__init__(
500 501 502 503 504 505 506 507 508 509
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
Y
Yang Zhang 已提交
510 511
            use_padded_im_info=use_padded_im_info)
        self.mode = 'VAL'
512 513 514


@register
Y
Yang Zhang 已提交
515
class FasterRCNNTestFeed(DataFeed):
516 517 518
    __doc__ = DataFeed.__doc__

    def __init__(self,
Y
Yang Zhang 已提交
519 520
                 dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
                                       COCO_VAL_IMAGE_DIR).__dict__,
521
                 fields=['image', 'im_info', 'im_id', 'im_shape'],
522
                 image_shape=[3, 800, 1333],
523
                 sample_transforms=[
524 525 526 527 528
                     DecodeImage(to_rgb=True),
                     NormalizeImage(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225],
                                    is_scale=True,
                                    is_channel_first=False),
529 530 531 532 533 534 535 536 537 538
                     Permute(to_bgr=False)
                 ],
                 batch_transforms=[PadBatch()],
                 batch_size=1,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 num_workers=2,
                 use_padded_im_info=True):
        sample_transforms.append(ArrangeTestRCNN())
Y
Yang Zhang 已提交
539 540 541
        if isinstance(dataset, dict):
            dataset = SimpleDataSet(**dataset)
        super(FasterRCNNTestFeed, self).__init__(
542 543 544 545 546 547 548 549 550 551 552
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
            use_padded_im_info=use_padded_im_info)
Y
Yang Zhang 已提交
553
        self.mode = 'TEST'
554 555


Y
Yang Zhang 已提交
556 557 558
# XXX currently use two presets, in the future, these should be combined into a
# single `RCNNTrainFeed`. Mask (and keypoint) should be processed
# automatically if `gt_mask` (or `gt_keypoints`) is in the required fields
559
@register
Y
Yang Zhang 已提交
560
class MaskRCNNTrainFeed(DataFeed):
561 562 563
    __doc__ = DataFeed.__doc__

    def __init__(self,
Y
Yang Zhang 已提交
564 565 566 567 568
                 dataset=CocoDataSet().__dict__,
                 fields=[
                     'image', 'im_info', 'im_id', 'gt_box', 'gt_label',
                     'is_crowd', 'gt_mask'
                 ],
569
                 image_shape=[3, 800, 1333],
570
                 sample_transforms=[
571
                     DecodeImage(to_rgb=True),
Y
Yang Zhang 已提交
572
                     RandomFlipImage(prob=0.5, is_mask_flip=True),
573 574 575 576
                     NormalizeImage(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225],
                                    is_scale=True,
                                    is_channel_first=False),
Y
Yang Zhang 已提交
577 578 579 580 581
                     ResizeImage(target_size=800,
                                 max_size=1333,
                                 interp=1,
                                 use_cv2=True),
                     Permute(to_bgr=False, channel_first=True)
582 583 584
                 ],
                 batch_transforms=[PadBatch()],
                 batch_size=1,
Y
Yang Zhang 已提交
585
                 shuffle=True,
586 587 588
                 samples=-1,
                 drop_last=False,
                 num_workers=2,
Y
Yang Zhang 已提交
589 590 591 592
                 use_process=False,
                 use_padded_im_info=False):
        sample_transforms.append(ArrangeRCNN(is_mask=True))
        super(MaskRCNNTrainFeed, self).__init__(
593 594 595 596 597 598 599 600 601 602
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
Y
Yang Zhang 已提交
603 604
            use_process=use_process)
        self.mode = 'TRAIN'
605 606 607 608 609 610 611 612 613 614


@register
class MaskRCNNEvalFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=CocoDataSet(COCO_VAL_ANNOTATION,
                                     COCO_VAL_IMAGE_DIR).__dict__,
                 fields=['image', 'im_info', 'im_id', 'im_shape'],
615
                 image_shape=[3, 800, 1333],
616
                 sample_transforms=[
617 618 619 620 621 622 623 624 625 626
                     DecodeImage(to_rgb=True),
                     NormalizeImage(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225],
                                    is_scale=True,
                                    is_channel_first=False),
                     ResizeImage(target_size=800,
                                 max_size=1333,
                                 interp=1,
                                 use_cv2=True),
                     Permute(to_bgr=False, channel_first=True)
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
                 ],
                 batch_transforms=[PadBatch()],
                 batch_size=1,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 num_workers=2,
                 use_process=False,
                 use_padded_im_info=True):
        sample_transforms.append(ArrangeTestRCNN())
        super(MaskRCNNEvalFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
            use_process=use_process,
            use_padded_im_info=use_padded_im_info)
        self.mode = 'VAL'


@register
class MaskRCNNTestFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
                                       COCO_VAL_IMAGE_DIR).__dict__,
                 fields=['image', 'im_info', 'im_id', 'im_shape'],
661
                 image_shape=[3, 800, 1333],
662
                 sample_transforms=[
663 664
                     DecodeImage(to_rgb=True),
                     NormalizeImage(
665 666 667
                         mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225],
                         is_scale=True,
668 669
                         is_channel_first=False),
                     Permute(to_bgr=False, channel_first=True)
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
                 ],
                 batch_transforms=[PadBatch()],
                 batch_size=1,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 num_workers=2,
                 use_process=False,
                 use_padded_im_info=True):
        sample_transforms.append(ArrangeTestRCNN())
        if isinstance(dataset, dict):
            dataset = SimpleDataSet(**dataset)
        super(MaskRCNNTestFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
            use_process=use_process,
            use_padded_im_info=use_padded_im_info)
        self.mode = 'TEST'


@register
class SSDTrainFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=VocDataSet().__dict__,
704
                 fields=['image', 'gt_box', 'gt_label'],
705 706
                 image_shape=[3, 300, 300],
                 sample_transforms=[
707 708 709 710 711 712
                     DecodeImage(to_rgb=True, with_mixup=False),
                     NormalizeBox(),
                     RandomDistort(brightness_lower=0.875,
                                   brightness_upper=1.125,
                                   is_order=True),
                     ExpandImage(max_ratio=4, prob=0.5),
713
                     CropImage(batch_sampler=[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
714 715 716 717 718 719
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]],
720
                               satisfy_all=False, avoid_no_bbox=False),
721 722 723 724 725 726
                     ResizeImage(target_size=300, use_cv2=False, interp=1),
                     RandomFlipImage(is_normalized=True),
                     Permute(),
                     NormalizeImage(mean=[127.5, 127.5, 127.5],
                                    std=[127.502231, 127.502231, 127.502231],
                                    is_scale=False)
727 728 729 730 731 732 733 734
                 ],
                 batch_transforms=[],
                 batch_size=32,
                 shuffle=True,
                 samples=-1,
                 drop_last=True,
                 num_workers=8,
                 bufsize=10,
W
walloollaw 已提交
735 736
                 use_process=True,
                 memsize=None):
737 738 739 740 741 742 743 744 745 746 747 748
        sample_transforms.append(ArrangeSSD())
        super(SSDTrainFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
749
            bufsize=bufsize,
W
walloollaw 已提交
750 751
            use_process=use_process,
            memsize=None)
752 753 754 755 756 757 758 759 760 761
        self.mode = 'TRAIN'


@register
class SSDEvalFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(
            self,
            dataset=VocDataSet(VOC_VAL_ANNOTATION).__dict__,
762 763
            fields=['image', 'im_shape', 'im_id', 'gt_box',
                         'gt_label', 'is_difficult'],
764 765
            image_shape=[3, 300, 300],
            sample_transforms=[
766 767 768 769 770
                DecodeImage(to_rgb=True, with_mixup=False),
                NormalizeBox(),
                ResizeImage(target_size=300, use_cv2=False, interp=1),
                Permute(),
                NormalizeImage(
771 772 773 774 775 776 777 778 779 780 781
                    mean=[127.5, 127.5, 127.5],
                    std=[127.502231, 127.502231, 127.502231],
                    is_scale=False)
            ],
            batch_transforms=[],
            batch_size=64,
            shuffle=False,
            samples=-1,
            drop_last=True,
            num_workers=8,
            bufsize=10,
W
walloollaw 已提交
782 783
            use_process=False,
            memsize=None):
784
        sample_transforms.append(ArrangeEvalSSD(fields))
785 786 787 788 789 790 791 792 793 794 795
        super(SSDEvalFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            num_workers=num_workers,
796
            bufsize=bufsize,
W
walloollaw 已提交
797 798
            use_process=use_process,
            memsize=memsize)
799 800 801 802 803 804 805 806 807
        self.mode = 'VAL'


@register
class SSDTestFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=SimpleDataSet(VOC_TEST_ANNOTATION).__dict__,
808
                 fields=['image', 'im_id', 'im_shape'],
809 810
                 image_shape=[3, 300, 300],
                 sample_transforms=[
811 812 813
                     DecodeImage(to_rgb=True),
                     ResizeImage(target_size=300, use_cv2=False, interp=1),
                     Permute(),
814 815 816 817 818 819 820 821 822 823 824 825
                     NormalizeImage(
                         mean=[127.5, 127.5, 127.5],
                         std=[127.502231, 127.502231, 127.502231],
                         is_scale=False)
                 ],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 num_workers=8,
                 bufsize=10,
W
walloollaw 已提交
826 827
                 use_process=False,
                 memsize=None):
828 829 830 831 832 833 834 835 836 837 838 839 840
        sample_transforms.append(ArrangeTestSSD())
        if isinstance(dataset, dict):
            dataset = SimpleDataSet(**dataset)
        super(SSDTestFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
841 842
            num_workers=num_workers,
            bufsize=bufsize,
W
walloollaw 已提交
843 844
            use_process=use_process,
            memsize=memsize)
845 846 847 848 849 850 851 852 853 854 855 856
        self.mode = 'TEST'


@register
class YoloTrainFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=CocoDataSet().__dict__,
                 fields=['image', 'gt_box', 'gt_label', 'gt_score'],
                 image_shape=[3, 608, 608],
                 sample_transforms=[
857 858
                     DecodeImage(to_rgb=True, with_mixup=True),
                     MixupImage(alpha=1.5, beta=1.5),
859 860
                     NormalizeBox(),
                     RandomDistort(),
861 862
                     ExpandImage(max_ratio=4., prob=.5,
                                 mean=[123.675, 116.28, 103.53]),
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
                     CropImage([[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
                                [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]),
                     RandomInterpImage(target_size=608),
                     RandomFlipImage(is_normalized=True),
                     NormalizeImage(
                         mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225],
                         is_scale=True,
                         is_channel_first=False),
                     Permute(to_bgr=False),
                 ],
                 batch_transforms=[
                     RandomShape(sizes=[
                         320, 352, 384, 416, 448, 480, 512, 544, 576, 608
                     ])
                 ],
                 batch_size=8,
                 shuffle=True,
                 samples=-1,
                 drop_last=True,
                 with_background=False,
                 num_workers=8,
                 bufsize=128,
                 use_process=True,
W
walloollaw 已提交
892
                 memsize=None,
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
                 num_max_boxes=50,
                 mixup_epoch=250):
        sample_transforms.append(ArrangeYOLO())
        super(YoloTrainFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            with_background=with_background,
            num_workers=num_workers,
            bufsize=bufsize,
W
walloollaw 已提交
909 910
            use_process=use_process,
            memsize=memsize)
911 912 913 914 915 916 917 918 919 920 921 922
        self.num_max_boxes = num_max_boxes
        self.mixup_epoch = mixup_epoch
        self.mode = 'TRAIN'


@register
class YoloEvalFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=CocoDataSet(COCO_VAL_ANNOTATION,
                                     COCO_VAL_IMAGE_DIR).__dict__,
923
                 fields=['image', 'im_size', 'im_id', 'gt_box',
924
                         'gt_label', 'is_difficult'],
925 926 927
                 image_shape=[3, 608, 608],
                 sample_transforms=[
                     DecodeImage(to_rgb=True),
928
                     ResizeImage(target_size=608, interp=2),
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943
                     NormalizeImage(
                         mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225],
                         is_scale=True,
                         is_channel_first=False),
                     Permute(to_bgr=False),
                 ],
                 batch_transforms=[],
                 batch_size=8,
                 shuffle=False,
                 samples=-1,
                 drop_last=False,
                 with_background=False,
                 num_workers=8,
                 num_max_boxes=50,
W
walloollaw 已提交
944 945
                 use_process=False,
                 memsize=None):
946
        sample_transforms.append(ArrangeEvalYOLO())
947 948 949 950 951 952 953 954 955 956 957 958
        super(YoloEvalFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            with_background=with_background,
            num_workers=num_workers,
W
walloollaw 已提交
959 960
            use_process=use_process,
            memsize=memsize)
961
        self.num_max_boxes = num_max_boxes
962 963 964
        self.mode = 'VAL'
        self.bufsize = 128

965 966 967 968 969 970 971
        # support image shape config, resize image with image_shape
        for i, trans in enumerate(sample_transforms):
            if isinstance(trans, ResizeImage):
                sample_transforms[i] = ResizeImage(
                        target_size=self.image_shape[-1],
                        interp=trans.interp)

972 973 974 975 976 977 978 979

@register
class YoloTestFeed(DataFeed):
    __doc__ = DataFeed.__doc__

    def __init__(self,
                 dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
                                       COCO_VAL_IMAGE_DIR).__dict__,
980
                 fields=['image', 'im_size', 'im_id'],
981 982 983
                 image_shape=[3, 608, 608],
                 sample_transforms=[
                     DecodeImage(to_rgb=True),
984 985 986 987 988
                     ResizeImage(target_size=608, interp=2),
                     NormalizeImage(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225],
                                    is_scale=True,
                                    is_channel_first=False),
989 990 991 992 993
                     Permute(to_bgr=False),
                 ],
                 batch_transforms=[],
                 batch_size=1,
                 shuffle=False,
K
Kaipeng Deng 已提交
994
                 samples=-1,
995 996 997 998
                 drop_last=False,
                 with_background=False,
                 num_workers=8,
                 num_max_boxes=50,
W
walloollaw 已提交
999 1000
                 use_process=False,
                 memsize=None):
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
        sample_transforms.append(ArrangeTestYOLO())
        if isinstance(dataset, dict):
            dataset = SimpleDataSet(**dataset)
        super(YoloTestFeed, self).__init__(
            dataset,
            fields,
            image_shape,
            sample_transforms,
            batch_transforms,
            batch_size=batch_size,
            shuffle=shuffle,
            samples=samples,
            drop_last=drop_last,
            with_background=with_background,
            num_workers=num_workers,
W
walloollaw 已提交
1016 1017
            use_process=use_process,
            memsize=memsize)
1018 1019
        self.mode = 'TEST'
        self.bufsize = 128
1020 1021 1022 1023 1024 1025 1026

        # support image shape config, resize image with image_shape
        for i, trans in enumerate(sample_transforms):
            if isinstance(trans, ResizeImage):
                sample_transforms[i] = ResizeImage(
                        target_size=self.image_shape[-1],
                        interp=trans.interp)
1027
# yapf: enable