diff --git a/examples/image_classification/README.MD b/examples/image_classification/README.MD index 1345949c76b0b55c461574bce271c6417a112cbb..5b50370dd4b2ad76e62f0e99877849f5fe2fed8f 100644 --- a/examples/image_classification/README.MD +++ b/examples/image_classification/README.MD @@ -43,13 +43,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch ### 单卡预测 执行如下命令进行预测 ```bash -python -u main.py --arch resnet50 -d --evaly-only /path/to/imagenet +python -u main.py --arch resnet50 -d --eval-only /path/to/imagenet ``` ### 多卡预测 执行如下命令进行多卡预测 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --evaly-only /path/to/imagenet +CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --eval-only /path/to/imagenet ``` @@ -71,12 +71,17 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch * **weight-decay**: 模型权重正则化系数,默认值:1e-4 * **momentum**: SGD优化器的动量,默认值:0.9 +注意:使用```--resume```恢复训练时,假如你的模型路径为```./output/118.pdparams```,你输入的路径不需要带后缀,即```--resume ./output/118```即可。 ## 模型 | 模型 | top1 acc | top5 acc | | --- | --- | --- | +| [ResNet18](https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams) | 71.72 | 90.60 | +| [ResNet34](https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams) | 75.02 | 92.31 | | [ResNet50](https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams) | 76.27 | 93.03 | +| [ResNet101](https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams) | 78.33 | 94.04 | +| [ResNet152](https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams) | 78.78 | 94.40 | | [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 | | [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 | | [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 | diff --git a/examples/image_classification/main.py b/examples/image_classification/main.py index 546991528631909d5f75caec4df96c63053e7fdb..76360df91cd64a66e2e288c90a37ac667cdc3eea 100644 --- a/examples/image_classification/main.py +++ b/examples/image_classification/main.py @@ -76,6 +76,9 @@ def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None + model_list = [x for x in models.__dict__["__all__"]] + assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format( + model_list, FLAGS.arch) model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and not FLAGS.resume) @@ -94,7 +97,13 @@ def main(): len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks), parameter_list=model.parameters()) - model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels) + model.prepare( + optim, + CrossEntropy(), + Accuracy(topk=(1, 5)), + inputs, + labels, + FLAGS.device) if FLAGS.eval_only: model.evaluate( @@ -152,7 +161,7 @@ if __name__ == '__main__': type=str, help="checkpoint path to resume") parser.add_argument( - "--eval-only", action='store_true', help="enable dygraph mode") + "--eval-only", action='store_true', help="only evaluate the model") parser.add_argument( "--lr-scheduler", default='piecewise', diff --git a/examples/image_classification/scripts/mobilenet_v1_x1.0.sh b/examples/image_classification/scripts/mobilenet_v1_x1.0.sh new file mode 100644 index 0000000000000000000000000000000000000000..16734e64c0fe3e6e93eacadd89ce366b48969dbd --- /dev/null +++ b/examples/image_classification/scripts/mobilenet_v1_x1.0.sh @@ -0,0 +1,13 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch mobilenet_v1 \ + --epoch 120 \ + --batch-size 64 \ + --learning-rate 0.1 \ + --lr-scheduler piecewise \ + --milestones 30 60 90 \ + --weight-decay 3e-5 \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/mobilenet_v2_x1.0.sh b/examples/image_classification/scripts/mobilenet_v2_x1.0.sh new file mode 100644 index 0000000000000000000000000000000000000000..2616d7ef8668b0d85fa56cf28c0bf95e86212fd1 --- /dev/null +++ b/examples/image_classification/scripts/mobilenet_v2_x1.0.sh @@ -0,0 +1,12 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch mobilenet_v2 \ + --epoch 240 \ + --batch-size 64 \ + --learning-rate 0.1 \ + --lr-scheduler cosine \ + --weight-decay 4e-5 \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/resnet101.sh b/examples/image_classification/scripts/resnet101.sh new file mode 100644 index 0000000000000000000000000000000000000000..34844cafb61f7373b9e9f9c997dc44bb5a3308ca --- /dev/null +++ b/examples/image_classification/scripts/resnet101.sh @@ -0,0 +1,10 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch resnet101 \ + --epoch 90 \ + --batch-size 64 \ + --learning-rate 0.1 \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/resnet152.sh b/examples/image_classification/scripts/resnet152.sh new file mode 100644 index 0000000000000000000000000000000000000000..26541637b1a2d45a6e65db513f3a806b9aa92594 --- /dev/null +++ b/examples/image_classification/scripts/resnet152.sh @@ -0,0 +1,10 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch resnet152 \ + --epoch 90 \ + --batch-size 64 \ + --learning-rate 0.1 \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/resnet18.sh b/examples/image_classification/scripts/resnet18.sh new file mode 100644 index 0000000000000000000000000000000000000000..f1f20e55bed5106b58b3b90b5909d5c93c09e4cd --- /dev/null +++ b/examples/image_classification/scripts/resnet18.sh @@ -0,0 +1,11 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch resnet18 \ + --epoch 120 \ + --batch-size 64 \ + --learning-rate 0.1 \ + --lr-scheduler cosine \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/resnet34.sh b/examples/image_classification/scripts/resnet34.sh new file mode 100644 index 0000000000000000000000000000000000000000..a4a36614dfad023d98a3e5ae3e26ddc96449e2f2 --- /dev/null +++ b/examples/image_classification/scripts/resnet34.sh @@ -0,0 +1,11 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch resnet34 \ + --epoch 120 \ + --batch-size 64 \ + --learning-rate 0.1 \ + --lr-scheduler cosine \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/resnet50.sh b/examples/image_classification/scripts/resnet50.sh new file mode 100644 index 0000000000000000000000000000000000000000..50a0e7398bd68aa046345095774b5403022331d2 --- /dev/null +++ b/examples/image_classification/scripts/resnet50.sh @@ -0,0 +1,10 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch resnet50 \ + --epoch 90 \ + --batch-size 64 \ + --learning-rate 0.1 \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/image_classification/scripts/vgg16.sh b/examples/image_classification/scripts/vgg16.sh new file mode 100644 index 0000000000000000000000000000000000000000..7372ce315efef42524550fec1dd549146f4e1a54 --- /dev/null +++ b/examples/image_classification/scripts/vgg16.sh @@ -0,0 +1,11 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行 +python -m paddle.distributed.launch main.py \ + --arch vgg16 \ + --epoch 90 \ + --batch-size 64 \ + --learning-rate 0.01 \ + --lr-scheduler cosine \ + -d \ + data/ILSVRC2012/ \ No newline at end of file diff --git a/examples/tsm/kinetics_dataset.py b/examples/tsm/kinetics_dataset.py index c8570018cfbcf808917f28806ab841da874782d3..123d89814a8c631569cd0503750cafac631cca22 100644 --- a/examples/tsm/kinetics_dataset.py +++ b/examples/tsm/kinetics_dataset.py @@ -100,19 +100,12 @@ class KineticsDataset(Dataset): def __getitem__(self, idx): pickle_path = os.path.join(self.pickle_dir, self.pickle_paths[idx]) - try: - if six.PY2: - data = pickle.load(open(pickle_path, 'rb')) - else: - data = pickle.load(open(pickle_path, 'rb'), encoding='bytes') - - vid, label, frames = data - if len(frames) < 1: - logger.error("{} contains no frame".format(pickle_path)) - sys.exit(-1) - except Exception as e: - logger.error("Load {} failed: {}".format(pickle_path, e)) - sys.exit(-1) + if six.PY2: + data = pickle.load(open(pickle_path, 'rb')) + else: + data = pickle.load(open(pickle_path, 'rb'), encoding='bytes') + + vid, label, frames = data if self.label_list is not None: label = self.label_list.index(label) diff --git a/hapi/datasets/coco.py b/hapi/datasets/coco.py index f1ab97281a6e0e20834c33f1e6663903f25349a0..50d31cff06692e30fb153983023d4c8ed7476f2c 100644 --- a/hapi/datasets/coco.py +++ b/hapi/datasets/coco.py @@ -18,7 +18,6 @@ from __future__ import print_function import os import cv2 import numpy as np -from pycocotools.coco import COCO from paddle.io import Dataset @@ -91,6 +90,7 @@ class COCODataset(Dataset): self._load_roidb_and_cname2cid() def _load_roidb_and_cname2cid(self): + from pycocotools.coco import COCO assert self._anno_path.endswith('.json'), \ 'invalid coco annotation file: ' + anno_path coco = COCO(self._anno_path) diff --git a/hapi/model.py b/hapi/model.py index f4e6744df5107d345c873f6fa45269f704615708..4d27355603f111dfc637d68e7efa9695369b504b 100644 --- a/hapi/model.py +++ b/hapi/model.py @@ -799,11 +799,11 @@ class Model(fluid.dygraph.Layer): format(key, list(state.shape), list(param.shape))) return param, state - def _strip_postfix(path): - path, ext = os.path.splitext(path) - assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \ - "Unknown postfix {} from weights".format(ext) - return path + def _strip_postfix(path): + path, ext = os.path.splitext(path) + assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \ + "Unknown postfix {} from weights".format(ext) + return path path = _strip_postfix(path) param_state = _load_state_from_path(path + ".pdparams") @@ -936,35 +936,35 @@ class Model(fluid.dygraph.Layer): Args: train_data (Dataset|DataLoader): An iterable data loader is used for train. An instance of paddle paddle.io.Dataset or - paddle.io.Dataloader is recomended. + paddle.io.Dataloader is recomended. Default: None. eval_data (Dataset|DataLoader): An iterable data loader is used for evaluation at the end of epoch. If None, will not do evaluation. An instance of paddle.io.Dataset or paddle.io.Dataloader - is recomended. + is recomended. Default: None. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this - parameter will be ignored. - epochs (int): Integer number. The number of epochs to train the model. + parameter will be ignored. Default: 1. + epochs (int): Integer number. The number of epochs to train the model. Default: 1. eval_freq (int): The frequency, in number of epochs, an evalutation - is performed. + is performed. Default: 1. log_freq (int): The frequency, in number of steps, the training logs - are printed. + are printed. Default: 10. save_dir(str|None): The directory to save checkpoint during training. - If None, will not save checkpoint. - save_freq (int): The frequency, in number of epochs, to save checkpoint. + If None, will not save checkpoint. Default: None. + save_freq (int): The frequency, in number of epochs, to save checkpoint. Default: 1. verbose (int): The verbosity mode, should be 0, 1, or 2. - 0 = silent, 1 = progress bar, 2 = one line per epoch. + 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2. drop_last (bool): whether drop the last incomplete batch of train_data when dataset size is not divisible by the batch size. When train_data - is an instance of Dataloader, this parameter will be ignored. + is an instance of Dataloader, this parameter will be ignored. Default: False. shuffle (bool): whther to shuffle train_data. When train_data is an instance - of Dataloader, this parameter will be ignored. + of Dataloader, this parameter will be ignored. Default: True. num_workers (int): the number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are - both the instance of Dataloader, this parameter will be ignored. + both the instance of Dataloader, this parameter will be ignored. Default: 0. callbacks (Callback|None): A list of `Callback` instances to apply during training. If None, `ProgBarLogger` and `ModelCheckpoint` - are automatically inserted. + are automatically inserted. Default: None. """ assert train_data is not None, \ @@ -1066,18 +1066,20 @@ class Model(fluid.dygraph.Layer): evaluation. An instance of paddle.io.Dataset or paddle.io.Dataloader is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. - When train_data and eval_data are both the instance of Dataloader, this - parameter will be ignored. + When eval_data is the instance of Dataloader, this argument will be ignored. + Default: 1. log_freq (int): The frequency, in number of steps, the eval logs - are printed. + are printed. Default: 10. verbose (int): The verbosity mode, should be 0, 1, or 2. - 0 = silent, 1 = progress bar, 2 = one line per epoch. + 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2. num_workers (int): The number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are - both the instance of Dataloader, this parameter will be ignored. + both the instance of Dataloader, this parameter will be ignored. Default: 0. callbacks (Callback|None): A list of `Callback` instances to apply during training. If None, `ProgBarLogger` and `ModelCheckpoint` - are automatically inserted. + are automatically inserted. Default: None. + Returns: + dict: Result of metric. """ if fluid.in_dygraph_mode(): @@ -1142,16 +1144,18 @@ class Model(fluid.dygraph.Layer): is recomended. batch_size (int): Integer number. The batch size of train_data and eval_data. When train_data and eval_data are both the instance of Dataloader, this - parameter will be ignored. + argument will be ignored. Default: 1. num_workers (int): the number of subprocess to load data, 0 for no subprocess used and loading data in main process. When train_data and eval_data are - both the instance of Dataloader, this parameter will be ignored. + both the instance of Dataloader, this argument will be ignored. Default: 0. stack_output (bool): whether stack output field like a batch, as for an output filed of a sample is in shape [X, Y], test_data contains N samples, predict output field will be in shape [N, X, Y] if stack_output is True, and will be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs is False. stack_outputs as False is used for LoDTensor output situation, - it is recommended set as True if outputs contains no LoDTensor. Default False + it is recommended set as True if outputs contains no LoDTensor. Default: False. + Returns: + list: output of models. """ if fluid.in_dygraph_mode(): diff --git a/hapi/vision/models/mobilenetv1.py b/hapi/vision/models/mobilenetv1.py index ff27cb9c5d7745361858c3f6ec13e5865fafa605..31c0acbee2fdc107b0d776605c296c2c9296bcfd 100644 --- a/hapi/vision/models/mobilenetv1.py +++ b/hapi/vision/models/mobilenetv1.py @@ -263,7 +263,7 @@ class MobileNetV1(Model): def _mobilenet(arch, pretrained=False, **kwargs): - model = MobileNetV1(num_classes=1000, with_pool=True, **kwargs) + model = MobileNetV1(**kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) @@ -276,12 +276,13 @@ def _mobilenet(arch, pretrained=False, **kwargs): return model -def mobilenet_v1(pretrained=False, scale=1.0): +def mobilenet_v1(pretrained=False, scale=1.0, **kwargs): """MobileNetV1 Args: pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. scale: (float): scale of channels in each layer. Default: 1.0. """ - model = _mobilenet('mobilenetv1_' + str(scale), pretrained, scale=scale) + model = _mobilenet( + 'mobilenetv1_' + str(scale), pretrained, scale=scale, **kwargs) return model diff --git a/hapi/vision/models/mobilenetv2.py b/hapi/vision/models/mobilenetv2.py index 02db68e569cea06dac876dd3b7bc044cd15542f7..d624625bcda1b763a0b3e511b6146776245e2fd5 100644 --- a/hapi/vision/models/mobilenetv2.py +++ b/hapi/vision/models/mobilenetv2.py @@ -237,7 +237,7 @@ class MobileNetV2(Model): def _mobilenet(arch, pretrained=False, **kwargs): - model = MobileNetV2(num_classes=1000, with_pool=True, **kwargs) + model = MobileNetV2(**kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) @@ -250,12 +250,13 @@ def _mobilenet(arch, pretrained=False, **kwargs): return model -def mobilenet_v2(pretrained=False, scale=1.0): +def mobilenet_v2(pretrained=False, scale=1.0, **kwargs): """MobileNetV2 Args: pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False. scale: (float): scale of channels in each layer. Default: 1.0. """ - model = _mobilenet('mobilenetv2_' + str(scale), pretrained, scale=scale) + model = _mobilenet( + 'mobilenetv2_' + str(scale), pretrained, scale=scale, **kwargs) return model diff --git a/hapi/vision/models/resnet.py b/hapi/vision/models/resnet.py index 804cc3534ad4c3cda4f800b41d8567922450e037..ac0944ee651224b106db71d0c87e9e5c29fd14d9 100644 --- a/hapi/vision/models/resnet.py +++ b/hapi/vision/models/resnet.py @@ -30,8 +30,18 @@ __all__ = [ ] model_urls = { + 'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams', + '0ba53eea9bc970962d0ef96f7b94057e'), + 'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams', + '46bc9f7c3dd2e55b7866285bee91eff3'), 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', - '0884c9087266496c41c60d14a96f8530') + '0884c9087266496c41c60d14a96f8530'), + 'resnet101': + ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams', + 'fb07a451df331e4b0bb861ed97c3a9b9'), + 'resnet152': + ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams', + 'f9c700f26d3644bb76ad2226ed5f5713'), } @@ -252,8 +262,8 @@ class ResNet(Model): return x -def _resnet(arch, Block, depth, pretrained): - model = ResNet(Block, depth, num_classes=1000, with_pool=True) +def _resnet(arch, Block, depth, pretrained, **kwargs): + model = ResNet(Block, depth, **kwargs) if pretrained: assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( arch) @@ -265,46 +275,46 @@ def _resnet(arch, Block, depth, pretrained): return model -def resnet18(pretrained=False): +def resnet18(pretrained=False, **kwargs): """ResNet 18-layer model Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ - return _resnet('resnet18', BasicBlock, 18, pretrained) + return _resnet('resnet18', BasicBlock, 18, pretrained, **kwargs) -def resnet34(pretrained=False): +def resnet34(pretrained=False, **kwargs): """ResNet 34-layer model Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ - return _resnet('resnet34', BasicBlock, 34, pretrained) + return _resnet('resnet34', BasicBlock, 34, pretrained, **kwargs) -def resnet50(pretrained=False): +def resnet50(pretrained=False, **kwargs): """ResNet 50-layer model Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ - return _resnet('resnet50', BottleneckBlock, 50, pretrained) + return _resnet('resnet50', BottleneckBlock, 50, pretrained, **kwargs) -def resnet101(pretrained=False): +def resnet101(pretrained=False, **kwargs): """ResNet 101-layer model Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ - return _resnet('resnet101', BottleneckBlock, 101, pretrained) + return _resnet('resnet101', BottleneckBlock, 101, pretrained, **kwargs) -def resnet152(pretrained=False): +def resnet152(pretrained=False, **kwargs): """ResNet 152-layer model Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ - return _resnet('resnet152', BottleneckBlock, 152, pretrained) + return _resnet('resnet152', BottleneckBlock, 152, pretrained, **kwargs) diff --git a/hapi/vision/models/vgg.py b/hapi/vision/models/vgg.py index 5ef09bd665e4308739651d868203a4a56b14de38..41cf34eddf7d4d379f9ea3a6bc5490f9763919dc 100644 --- a/hapi/vision/models/vgg.py +++ b/hapi/vision/models/vgg.py @@ -137,7 +137,7 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): return model -def vgg11(pretrained=False, batch_norm=False): +def vgg11(pretrained=False, batch_norm=False, **kwargs): """VGG 11-layer model Args: @@ -147,10 +147,10 @@ def vgg11(pretrained=False, batch_norm=False): model_name = 'vgg11' if batch_norm: model_name += ('_bn') - return _vgg(model_name, 'A', batch_norm, pretrained) + return _vgg(model_name, 'A', batch_norm, pretrained, **kwargs) -def vgg13(pretrained=False, batch_norm=False): +def vgg13(pretrained=False, batch_norm=False, **kwargs): """VGG 13-layer model Args: @@ -160,10 +160,10 @@ def vgg13(pretrained=False, batch_norm=False): model_name = 'vgg13' if batch_norm: model_name += ('_bn') - return _vgg(model_name, 'B', batch_norm, pretrained) + return _vgg(model_name, 'B', batch_norm, pretrained, **kwargs) -def vgg16(pretrained=False, batch_norm=False): +def vgg16(pretrained=False, batch_norm=False, **kwargs): """VGG 16-layer model Args: @@ -173,10 +173,10 @@ def vgg16(pretrained=False, batch_norm=False): model_name = 'vgg16' if batch_norm: model_name += ('_bn') - return _vgg(model_name, 'D', batch_norm, pretrained) + return _vgg(model_name, 'D', batch_norm, pretrained, **kwargs) -def vgg19(pretrained=False, batch_norm=False): +def vgg19(pretrained=False, batch_norm=False, **kwargs): """VGG 19-layer model Args: @@ -186,4 +186,4 @@ def vgg19(pretrained=False, batch_norm=False): model_name = 'vgg19' if batch_norm: model_name += ('_bn') - return _vgg(model_name, 'E', batch_norm, pretrained) + return _vgg(model_name, 'E', batch_norm, pretrained, **kwargs) diff --git a/hapi/vision/transforms/transforms.py b/hapi/vision/transforms/transforms.py index 79926f811fcac0844d3290bbbccd4a5d389c626e..3d974171ce0d6f5a80f2af6a272a4250d771fb4d 100644 --- a/hapi/vision/transforms/transforms.py +++ b/hapi/vision/transforms/transforms.py @@ -71,7 +71,7 @@ class Compose(object): except Exception as e: stack_info = traceback.format_exc() print("fail to perform transform [{}] with error: " - "{} and stack:\n{}".format(f, e, str(stack_info))) + "{} and stack:\n{}".format(f, e, str(stack_info))) raise e return data @@ -92,6 +92,7 @@ class BatchCompose(object): these transforms perform on batch data. """ + def __init__(self, transforms=[]): self.transforms = transforms @@ -102,7 +103,7 @@ class BatchCompose(object): except Exception as e: stack_info = traceback.format_exc() print("fail to perform batch transform [{}] with error: " - "{} and stack:\n{}".format(f, e, str(stack_info))) + "{} and stack:\n{}".format(f, e, str(stack_info))) raise e # sample list to batch data @@ -112,7 +113,7 @@ class BatchCompose(object): class Resize(object): - """Resize the input PIL Image to the given size. + """Resize the input Image to the given size. Args: size (int|list|tuple): Desired output size. If size is a sequence like @@ -130,13 +131,6 @@ class Resize(object): self.interpolation = interpolation def __call__(self, img, lbl): - """ - Args: - img (PIL Image): Image to be scaled. - - Returns: - PIL Image: Rescaled image. - """ return F.resize(img, self.size, self.interpolation), lbl @@ -328,18 +322,22 @@ class Permute(object): Input image should be HWC mode and an instance of numpy.ndarray. Args: - mode: Output mode of input. Use "CHW" mode by default. + mode: Output mode of input. Default: "CHW". + to_rgb: convert 'bgr' image to 'rgb'. Default: True. """ - def __init__(self, mode="CHW"): + def __init__(self, mode="CHW", to_rgb=True): assert mode in [ "CHW" ], "Only support 'CHW' mode, but received mode: {}".format(mode) self.mode = mode + self.to_rgb = to_rgb def __call__(self, img, lbl): + if self.to_rgb: + img = img[..., ::-1] if self.mode == "CHW": - return img.transpose((2, 0, 1))[::-1, ...], lbl + return img.transpose((2, 0, 1)), lbl return img, lbl