Merge branch 'master' of https://github.com/PaddlePaddle/hapi into sequence_tagging

c9eec53e · 0YuanZhang0 · 50816a2d · 8a312a95 · c9eec53e · c9eec53e
18 changed file
--- a/examples/image_classification/README.MD
+++ b/examples/image_classification/README.MD
@@ -43,13 +43,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
 ### 单卡预测
 执行如下命令进行预测
 ```bash
-python -u main.py --arch resnet50 -d --evaly-only /path/to/imagenet 
+python -u main.py --arch resnet50 -d --eval-only /path/to/imagenet 
 ```

 ### 多卡预测
 执行如下命令进行多卡预测
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --evaly-only /path/to/imagenet
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --eval-only /path/to/imagenet
 ```


@@ -71,12 +71,17 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
 * **weight-decay**: 模型权重正则化系数，默认值：1e-4
 * **momentum**: SGD优化器的动量，默认值：0.9

+注意：使用```--resume```恢复训练时，假如你的模型路径为```./output/118.pdparams```，你输入的路径不需要带后缀，即```--resume ./output/118```即可。

 ## 模型

 | 模型 | top1 acc | top5 acc |
 | --- | --- | --- |
+| [ResNet18](https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams) | 71.72 | 90.60 |
+| [ResNet34](https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams) | 75.02 | 92.31 |
 | [ResNet50](https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams) | 76.27 | 93.03 |
+| [ResNet101](https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams) | 78.33 | 94.04 |
+| [ResNet152](https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams) | 78.78 | 94.40 |
 | [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 | 
 | [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 | 
 | [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 | 

--- a/examples/image_classification/main.py
+++ b/examples/image_classification/main.py
@@ -76,6 +76,9 @@ def main():
    device = set_device(FLAGS.device)
    fluid.enable_dygraph(device) if FLAGS.dynamic else None

+    model_list = [x for x in models.__dict__["__all__"]]
+    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
+        model_list, FLAGS.arch)
    model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and
                                        not FLAGS.resume)

@@ -94,7 +97,13 @@ def main():
            len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks),
        parameter_list=model.parameters())

-    model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels)
+    model.prepare(
+        optim,
+        CrossEntropy(),
+        Accuracy(topk=(1, 5)),
+        inputs,
+        labels,
+        FLAGS.device)

    if FLAGS.eval_only:
        model.evaluate(
@@ -152,7 +161,7 @@ if __name__ == '__main__':
        type=str,
        help="checkpoint path to resume")
    parser.add_argument(
-        "--eval-only", action='store_true', help="enable dygraph mode")
+        "--eval-only", action='store_true', help="only evaluate the model")
    parser.add_argument(
        "--lr-scheduler",
        default='piecewise',

--- a/examples/image_classification/scripts/mobilenet_v1_x1.0.sh
+++ b/examples/image_classification/scripts/mobilenet_v1_x1.0.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch mobilenet_v1 \
+        --epoch 120 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler piecewise \
+        --milestones 30 60 90 \
+        --weight-decay 3e-5 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/mobilenet_v2_x1.0.sh
+++ b/examples/image_classification/scripts/mobilenet_v2_x1.0.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch mobilenet_v2 \
+        --epoch 240 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler cosine \
+        --weight-decay 4e-5 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/resnet101.sh
+++ b/examples/image_classification/scripts/resnet101.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet101 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/resnet152.sh
+++ b/examples/image_classification/scripts/resnet152.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet152 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/resnet18.sh
+++ b/examples/image_classification/scripts/resnet18.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet18 \
+        --epoch 120 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler cosine \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/resnet34.sh
+++ b/examples/image_classification/scripts/resnet34.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet34 \
+        --epoch 120 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler cosine \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/resnet50.sh
+++ b/examples/image_classification/scripts/resnet50.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet50 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/image_classification/scripts/vgg16.sh
+++ b/examples/image_classification/scripts/vgg16.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch vgg16 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.01 \
+        --lr-scheduler cosine \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
--- a/examples/tsm/kinetics_dataset.py
+++ b/examples/tsm/kinetics_dataset.py
@@ -100,19 +100,12 @@ class KineticsDataset(Dataset):
    def __getitem__(self, idx):
        pickle_path = os.path.join(self.pickle_dir, self.pickle_paths[idx])

-        try:
-            if six.PY2:
-                data = pickle.load(open(pickle_path, 'rb'))
-            else:
-                data = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
-
-            vid, label, frames = data
-            if len(frames) < 1:
-                logger.error("{} contains no frame".format(pickle_path))
-                sys.exit(-1)
-        except Exception as e:
-            logger.error("Load {} failed: {}".format(pickle_path, e))
-            sys.exit(-1)
+        if six.PY2:
+            data = pickle.load(open(pickle_path, 'rb'))
+        else:
+            data = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
+
+        vid, label, frames = data

        if self.label_list is not None:
            label = self.label_list.index(label)

--- a/hapi/datasets/coco.py
+++ b/hapi/datasets/coco.py
@@ -18,7 +18,6 @@ from __future__ import print_function
 import os
 import cv2
 import numpy as np
-from pycocotools.coco import COCO

 from paddle.io import Dataset

@@ -91,6 +90,7 @@ class COCODataset(Dataset):
        self._load_roidb_and_cname2cid()

    def _load_roidb_and_cname2cid(self):
+        from pycocotools.coco import COCO
        assert self._anno_path.endswith('.json'), \
            'invalid coco annotation file: ' + anno_path
        coco = COCO(self._anno_path)

--- a/hapi/model.py
+++ b/hapi/model.py
@@ -799,11 +799,11 @@ class Model(fluid.dygraph.Layer):
                    format(key, list(state.shape), list(param.shape)))
            return param, state

-	def _strip_postfix(path):
-	    path, ext = os.path.splitext(path)
-	    assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
-		    "Unknown postfix {} from weights".format(ext)
-	    return path
+        def _strip_postfix(path):
+            path, ext = os.path.splitext(path)
+            assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
+                    "Unknown postfix {} from weights".format(ext)
+            return path

        path = _strip_postfix(path)
        param_state = _load_state_from_path(path + ".pdparams")
@@ -936,35 +936,35 @@ class Model(fluid.dygraph.Layer):
        Args:
            train_data (Dataset|DataLoader): An iterable data loader is used for 
                train. An instance of paddle paddle.io.Dataset or 
-                paddle.io.Dataloader is recomended.
+                paddle.io.Dataloader is recomended. Default: None.
            eval_data (Dataset|DataLoader): An iterable data loader is used for
                evaluation at the end of epoch. If None, will not do evaluation. 
                An instance of paddle.io.Dataset or paddle.io.Dataloader 
-                is recomended.
+                is recomended. Default: None.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
-                parameter will be ignored.
-            epochs (int): Integer number. The number of epochs to train the model.
+                parameter will be ignored. Default: 1.
+            epochs (int): Integer number. The number of epochs to train the model. Default: 1.
            eval_freq (int): The frequency, in number of epochs, an evalutation
-                is performed.
+                is performed. Default: 1.
            log_freq (int): The frequency, in number of steps, the training logs
-                are printed.
+                are printed. Default: 10.
            save_dir(str|None): The directory to save checkpoint during training.
-                If None, will not save checkpoint.
-            save_freq (int): The frequency, in number of epochs, to save checkpoint.
+                If None, will not save checkpoint. Default: None.
+            save_freq (int): The frequency, in number of epochs, to save checkpoint. Default: 1.
            verbose (int): The verbosity mode, should be 0, 1, or 2.
-                0 = silent, 1 = progress bar, 2 = one line per epoch.
+                0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
            drop_last (bool): whether drop the last incomplete batch of train_data 
                when dataset size is not divisible by the batch size. When train_data 
-                is an instance of Dataloader, this parameter will be ignored.
+                is an instance of Dataloader, this parameter will be ignored. Default: False.
            shuffle (bool): whther to shuffle train_data. When train_data is an instance 
-                of Dataloader, this parameter will be ignored.
+                of Dataloader, this parameter will be ignored. Default: True.
            num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
-                both the instance of Dataloader, this parameter will be ignored.
+                both the instance of Dataloader, this parameter will be ignored. Default: 0.
            callbacks (Callback|None): A list of `Callback` instances to apply
                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
-                are automatically inserted.
+                are automatically inserted. Default: None.
        """

        assert train_data is not None, \
@@ -1066,18 +1066,20 @@ class Model(fluid.dygraph.Layer):
                evaluation. An instance of paddle.io.Dataset or 
                paddle.io.Dataloader is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
-                When train_data and eval_data are both the instance of Dataloader, this 
-                parameter will be ignored.
+                When eval_data is the instance of Dataloader, this argument will be ignored.
+                Default: 1.
            log_freq (int): The frequency, in number of steps, the eval logs
-                are printed.
+                are printed. Default: 10.
            verbose (int): The verbosity mode, should be 0, 1, or 2.
-                0 = silent, 1 = progress bar, 2 = one line per epoch.
+                0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
            num_workers (int): The number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
-                both the instance of Dataloader, this parameter will be ignored.
+                both the instance of Dataloader, this parameter will be ignored. Default: 0.
            callbacks (Callback|None): A list of `Callback` instances to apply
                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
-                are automatically inserted.
+                are automatically inserted. Default: None.
+        Returns:
+            dict: Result of metric.
        """

        if fluid.in_dygraph_mode():
@@ -1142,16 +1144,18 @@ class Model(fluid.dygraph.Layer):
                is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
-                parameter will be ignored.
+                argument will be ignored. Default: 1.
            num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
-                both the instance of Dataloader, this parameter will be ignored.
+                both the instance of Dataloader, this argument will be ignored. Default: 0.
            stack_output (bool): whether stack output field like a batch, as for an output
                filed of a sample is in shape [X, Y], test_data contains N samples, predict
                output field will be in shape [N, X, Y] if stack_output is True, and will
                be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
                is False. stack_outputs as False is used for LoDTensor output situation,
-                it is recommended set as True if outputs contains no LoDTensor. Default False
+                it is recommended set as True if outputs contains no LoDTensor. Default: False.
+        Returns:
+            list: output of models.
        """

        if fluid.in_dygraph_mode():

--- a/hapi/vision/models/mobilenetv1.py
+++ b/hapi/vision/models/mobilenetv1.py
@@ -263,7 +263,7 @@ class MobileNetV1(Model):


 def _mobilenet(arch, pretrained=False, **kwargs):
-    model = MobileNetV1(num_classes=1000, with_pool=True, **kwargs)
+    model = MobileNetV1(**kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
@@ -276,12 +276,13 @@ def _mobilenet(arch, pretrained=False, **kwargs):
    return model


-def mobilenet_v1(pretrained=False, scale=1.0):
+def mobilenet_v1(pretrained=False, scale=1.0, **kwargs):
    """MobileNetV1
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
        scale: (float): scale of channels in each layer. Default: 1.0.
    """
-    model = _mobilenet('mobilenetv1_' + str(scale), pretrained, scale=scale)
+    model = _mobilenet(
+        'mobilenetv1_' + str(scale), pretrained, scale=scale, **kwargs)
    return model
--- a/hapi/vision/models/mobilenetv2.py
+++ b/hapi/vision/models/mobilenetv2.py
@@ -237,7 +237,7 @@ class MobileNetV2(Model):


 def _mobilenet(arch, pretrained=False, **kwargs):
-    model = MobileNetV2(num_classes=1000, with_pool=True, **kwargs)
+    model = MobileNetV2(**kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
@@ -250,12 +250,13 @@ def _mobilenet(arch, pretrained=False, **kwargs):
    return model


-def mobilenet_v2(pretrained=False, scale=1.0):
+def mobilenet_v2(pretrained=False, scale=1.0, **kwargs):
    """MobileNetV2
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
        scale: (float): scale of channels in each layer. Default: 1.0.
    """
-    model = _mobilenet('mobilenetv2_' + str(scale), pretrained, scale=scale)
+    model = _mobilenet(
+        'mobilenetv2_' + str(scale), pretrained, scale=scale, **kwargs)
    return model
--- a/hapi/vision/models/resnet.py
+++ b/hapi/vision/models/resnet.py
@@ -30,8 +30,18 @@ __all__ = [
 ]

 model_urls = {
+    'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams',
+                 '0ba53eea9bc970962d0ef96f7b94057e'),
+    'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams',
+                 '46bc9f7c3dd2e55b7866285bee91eff3'),
    'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams',
-                 '0884c9087266496c41c60d14a96f8530')
+                 '0884c9087266496c41c60d14a96f8530'),
+    'resnet101':
+    ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams',
+     'fb07a451df331e4b0bb861ed97c3a9b9'),
+    'resnet152':
+    ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams',
+     'f9c700f26d3644bb76ad2226ed5f5713'),
 }


@@ -252,8 +262,8 @@ class ResNet(Model):
        return x


-def _resnet(arch, Block, depth, pretrained):
-    model = ResNet(Block, depth, num_classes=1000, with_pool=True)
+def _resnet(arch, Block, depth, pretrained, **kwargs):
+    model = ResNet(Block, depth, **kwargs)
    if pretrained:
        assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
            arch)
@@ -265,46 +275,46 @@ def _resnet(arch, Block, depth, pretrained):
    return model


-def resnet18(pretrained=False):
+def resnet18(pretrained=False, **kwargs):
    """ResNet 18-layer model
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
-    return _resnet('resnet18', BasicBlock, 18, pretrained)
+    return _resnet('resnet18', BasicBlock, 18, pretrained, **kwargs)


-def resnet34(pretrained=False):
+def resnet34(pretrained=False, **kwargs):
    """ResNet 34-layer model
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
-    return _resnet('resnet34', BasicBlock, 34, pretrained)
+    return _resnet('resnet34', BasicBlock, 34, pretrained, **kwargs)


-def resnet50(pretrained=False):
+def resnet50(pretrained=False, **kwargs):
    """ResNet 50-layer model
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
-    return _resnet('resnet50', BottleneckBlock, 50, pretrained)
+    return _resnet('resnet50', BottleneckBlock, 50, pretrained, **kwargs)


-def resnet101(pretrained=False):
+def resnet101(pretrained=False, **kwargs):
    """ResNet 101-layer model
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
-    return _resnet('resnet101', BottleneckBlock, 101, pretrained)
+    return _resnet('resnet101', BottleneckBlock, 101, pretrained, **kwargs)


-def resnet152(pretrained=False):
+def resnet152(pretrained=False, **kwargs):
    """ResNet 152-layer model
    
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
-    return _resnet('resnet152', BottleneckBlock, 152, pretrained)
+    return _resnet('resnet152', BottleneckBlock, 152, pretrained, **kwargs)
--- a/hapi/vision/models/vgg.py
+++ b/hapi/vision/models/vgg.py
@@ -137,7 +137,7 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
    return model


-def vgg11(pretrained=False, batch_norm=False):
+def vgg11(pretrained=False, batch_norm=False, **kwargs):
    """VGG 11-layer model
    
    Args:
@@ -147,10 +147,10 @@ def vgg11(pretrained=False, batch_norm=False):
    model_name = 'vgg11'
    if batch_norm:
        model_name += ('_bn')
-    return _vgg(model_name, 'A', batch_norm, pretrained)
+    return _vgg(model_name, 'A', batch_norm, pretrained, **kwargs)


-def vgg13(pretrained=False, batch_norm=False):
+def vgg13(pretrained=False, batch_norm=False, **kwargs):
    """VGG 13-layer model
    
    Args:
@@ -160,10 +160,10 @@ def vgg13(pretrained=False, batch_norm=False):
    model_name = 'vgg13'
    if batch_norm:
        model_name += ('_bn')
-    return _vgg(model_name, 'B', batch_norm, pretrained)
+    return _vgg(model_name, 'B', batch_norm, pretrained, **kwargs)


-def vgg16(pretrained=False, batch_norm=False):
+def vgg16(pretrained=False, batch_norm=False, **kwargs):
    """VGG 16-layer model 
    
    Args:
@@ -173,10 +173,10 @@ def vgg16(pretrained=False, batch_norm=False):
    model_name = 'vgg16'
    if batch_norm:
        model_name += ('_bn')
-    return _vgg(model_name, 'D', batch_norm, pretrained)
+    return _vgg(model_name, 'D', batch_norm, pretrained, **kwargs)


-def vgg19(pretrained=False, batch_norm=False):
+def vgg19(pretrained=False, batch_norm=False, **kwargs):
    """VGG 19-layer model 
    
    Args:
@@ -186,4 +186,4 @@ def vgg19(pretrained=False, batch_norm=False):
    model_name = 'vgg19'
    if batch_norm:
        model_name += ('_bn')
-    return _vgg(model_name, 'E', batch_norm, pretrained)
+    return _vgg(model_name, 'E', batch_norm, pretrained, **kwargs)
--- a/hapi/vision/transforms/transforms.py
+++ b/hapi/vision/transforms/transforms.py
@@ -71,7 +71,7 @@ class Compose(object):
            except Exception as e:
                stack_info = traceback.format_exc()
                print("fail to perform transform [{}] with error: "
-                        "{} and stack:\n{}".format(f, e, str(stack_info)))
+                      "{} and stack:\n{}".format(f, e, str(stack_info)))
                raise e
        return data

@@ -92,6 +92,7 @@ class BatchCompose(object):
                                            these transforms perform on batch data.

    """
+
    def __init__(self, transforms=[]):
        self.transforms = transforms

@@ -102,7 +103,7 @@ class BatchCompose(object):
            except Exception as e:
                stack_info = traceback.format_exc()
                print("fail to perform batch transform [{}] with error: "
-                        "{} and stack:\n{}".format(f, e, str(stack_info)))
+                      "{} and stack:\n{}".format(f, e, str(stack_info)))
                raise e

        # sample list to batch data
@@ -112,7 +113,7 @@ class BatchCompose(object):


 class Resize(object):
-    """Resize the input PIL Image to the given size.
+    """Resize the input Image to the given size.

    Args:
        size (int|list|tuple): Desired output size. If size is a sequence like
@@ -130,13 +131,6 @@ class Resize(object):
        self.interpolation = interpolation

    def __call__(self, img, lbl):
-        """
-        Args:
-            img (PIL Image): Image to be scaled.
-
-        Returns:
-            PIL Image: Rescaled image.
-        """
        return F.resize(img, self.size, self.interpolation), lbl


@@ -328,18 +322,22 @@ class Permute(object):
    Input image should be HWC mode and an instance of numpy.ndarray. 

    Args:
-        mode: Output mode of input. Use "CHW" mode by default.
+        mode: Output mode of input. Default: "CHW".
+        to_rgb: convert 'bgr' image to 'rgb'. Default: True.
    """

-    def __init__(self, mode="CHW"):
+    def __init__(self, mode="CHW", to_rgb=True):
        assert mode in [
            "CHW"
        ], "Only support 'CHW' mode, but received mode: {}".format(mode)
        self.mode = mode
+        self.to_rgb = to_rgb

    def __call__(self, img, lbl):
+        if self.to_rgb:
+            img = img[..., ::-1]
        if self.mode == "CHW":
-            return img.transpose((2, 0, 1))[::-1, ...], lbl
+            return img.transpose((2, 0, 1)), lbl
        return img, lbl