diff --git a/examples/image_classification/README.MD b/examples/image_classification/README.MD
index 1345949c76b0b55c461574bce271c6417a112cbb..5b50370dd4b2ad76e62f0e99877849f5fe2fed8f 100644
--- a/examples/image_classification/README.MD
+++ b/examples/image_classification/README.MD
@@ -43,13 +43,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
 ### 单卡预测
 执行如下命令进行预测
 ```bash
-python -u main.py --arch resnet50 -d --evaly-only /path/to/imagenet 
+python -u main.py --arch resnet50 -d --eval-only /path/to/imagenet 
 ```
 
 ### 多卡预测
 执行如下命令进行多卡预测
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --evaly-only /path/to/imagenet
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --eval-only /path/to/imagenet
 ```
 
 
@@ -71,12 +71,17 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
 * **weight-decay**: 模型权重正则化系数，默认值：1e-4
 * **momentum**: SGD优化器的动量，默认值：0.9
 
+注意：使用```--resume```恢复训练时，假如你的模型路径为```./output/118.pdparams```，你输入的路径不需要带后缀，即```--resume ./output/118```即可。
 
 ## 模型
 
 | 模型 | top1 acc | top5 acc |
 | --- | --- | --- |
+| [ResNet18](https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams) | 71.72 | 90.60 |
+| [ResNet34](https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams) | 75.02 | 92.31 |
 | [ResNet50](https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams) | 76.27 | 93.03 |
+| [ResNet101](https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams) | 78.33 | 94.04 |
+| [ResNet152](https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams) | 78.78 | 94.40 |
 | [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 | 
 | [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 | 
 | [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 | 
diff --git a/examples/image_classification/main.py b/examples/image_classification/main.py
index 546991528631909d5f75caec4df96c63053e7fdb..76360df91cd64a66e2e288c90a37ac667cdc3eea 100644
--- a/examples/image_classification/main.py
+++ b/examples/image_classification/main.py
@@ -76,6 +76,9 @@ def main():
     device = set_device(FLAGS.device)
     fluid.enable_dygraph(device) if FLAGS.dynamic else None
 
+    model_list = [x for x in models.__dict__["__all__"]]
+    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
+        model_list, FLAGS.arch)
     model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and
                                         not FLAGS.resume)
 
@@ -94,7 +97,13 @@ def main():
             len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks),
         parameter_list=model.parameters())
 
-    model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels)
+    model.prepare(
+        optim,
+        CrossEntropy(),
+        Accuracy(topk=(1, 5)),
+        inputs,
+        labels,
+        FLAGS.device)
 
     if FLAGS.eval_only:
         model.evaluate(
@@ -152,7 +161,7 @@ if __name__ == '__main__':
         type=str,
         help="checkpoint path to resume")
     parser.add_argument(
-        "--eval-only", action='store_true', help="enable dygraph mode")
+        "--eval-only", action='store_true', help="only evaluate the model")
     parser.add_argument(
         "--lr-scheduler",
         default='piecewise',
diff --git a/examples/image_classification/scripts/mobilenet_v1_x1.0.sh b/examples/image_classification/scripts/mobilenet_v1_x1.0.sh
new file mode 100644
index 0000000000000000000000000000000000000000..16734e64c0fe3e6e93eacadd89ce366b48969dbd
--- /dev/null
+++ b/examples/image_classification/scripts/mobilenet_v1_x1.0.sh
@@ -0,0 +1,13 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch mobilenet_v1 \
+        --epoch 120 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler piecewise \
+        --milestones 30 60 90 \
+        --weight-decay 3e-5 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/mobilenet_v2_x1.0.sh b/examples/image_classification/scripts/mobilenet_v2_x1.0.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2616d7ef8668b0d85fa56cf28c0bf95e86212fd1
--- /dev/null
+++ b/examples/image_classification/scripts/mobilenet_v2_x1.0.sh
@@ -0,0 +1,12 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch mobilenet_v2 \
+        --epoch 240 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler cosine \
+        --weight-decay 4e-5 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/resnet101.sh b/examples/image_classification/scripts/resnet101.sh
new file mode 100644
index 0000000000000000000000000000000000000000..34844cafb61f7373b9e9f9c997dc44bb5a3308ca
--- /dev/null
+++ b/examples/image_classification/scripts/resnet101.sh
@@ -0,0 +1,10 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet101 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/resnet152.sh b/examples/image_classification/scripts/resnet152.sh
new file mode 100644
index 0000000000000000000000000000000000000000..26541637b1a2d45a6e65db513f3a806b9aa92594
--- /dev/null
+++ b/examples/image_classification/scripts/resnet152.sh
@@ -0,0 +1,10 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet152 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/resnet18.sh b/examples/image_classification/scripts/resnet18.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f1f20e55bed5106b58b3b90b5909d5c93c09e4cd
--- /dev/null
+++ b/examples/image_classification/scripts/resnet18.sh
@@ -0,0 +1,11 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet18 \
+        --epoch 120 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler cosine \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/resnet34.sh b/examples/image_classification/scripts/resnet34.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a4a36614dfad023d98a3e5ae3e26ddc96449e2f2
--- /dev/null
+++ b/examples/image_classification/scripts/resnet34.sh
@@ -0,0 +1,11 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet34 \
+        --epoch 120 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        --lr-scheduler cosine \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/resnet50.sh b/examples/image_classification/scripts/resnet50.sh
new file mode 100644
index 0000000000000000000000000000000000000000..50a0e7398bd68aa046345095774b5403022331d2
--- /dev/null
+++ b/examples/image_classification/scripts/resnet50.sh
@@ -0,0 +1,10 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch resnet50 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.1 \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/image_classification/scripts/vgg16.sh b/examples/image_classification/scripts/vgg16.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7372ce315efef42524550fec1dd549146f4e1a54
--- /dev/null
+++ b/examples/image_classification/scripts/vgg16.sh
@@ -0,0 +1,11 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3 
+
+# 默认imagenet数据存储在data/ILSVRC2012/下，去除-d便使用静态图模式运行
+python -m paddle.distributed.launch main.py \
+        --arch vgg16 \
+        --epoch 90 \
+        --batch-size 64 \
+        --learning-rate 0.01 \
+        --lr-scheduler cosine \
+        -d \
+        data/ILSVRC2012/
\ No newline at end of file
diff --git a/examples/tsm/kinetics_dataset.py b/examples/tsm/kinetics_dataset.py
index c8570018cfbcf808917f28806ab841da874782d3..123d89814a8c631569cd0503750cafac631cca22 100644
--- a/examples/tsm/kinetics_dataset.py
+++ b/examples/tsm/kinetics_dataset.py
@@ -100,19 +100,12 @@ class KineticsDataset(Dataset):
     def __getitem__(self, idx):
         pickle_path = os.path.join(self.pickle_dir, self.pickle_paths[idx])
 
-        try:
-            if six.PY2:
-                data = pickle.load(open(pickle_path, 'rb'))
-            else:
-                data = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
-
-            vid, label, frames = data
-            if len(frames) < 1:
-                logger.error("{} contains no frame".format(pickle_path))
-                sys.exit(-1)
-        except Exception as e:
-            logger.error("Load {} failed: {}".format(pickle_path, e))
-            sys.exit(-1)
+        if six.PY2:
+            data = pickle.load(open(pickle_path, 'rb'))
+        else:
+            data = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
+
+        vid, label, frames = data
 
         if self.label_list is not None:
             label = self.label_list.index(label)
diff --git a/hapi/datasets/coco.py b/hapi/datasets/coco.py
index f1ab97281a6e0e20834c33f1e6663903f25349a0..50d31cff06692e30fb153983023d4c8ed7476f2c 100644
--- a/hapi/datasets/coco.py
+++ b/hapi/datasets/coco.py
@@ -18,7 +18,6 @@ from __future__ import print_function
 import os
 import cv2
 import numpy as np
-from pycocotools.coco import COCO
 
 from paddle.io import Dataset
 
@@ -91,6 +90,7 @@ class COCODataset(Dataset):
         self._load_roidb_and_cname2cid()
 
     def _load_roidb_and_cname2cid(self):
+        from pycocotools.coco import COCO
         assert self._anno_path.endswith('.json'), \
             'invalid coco annotation file: ' + anno_path
         coco = COCO(self._anno_path)
diff --git a/hapi/model.py b/hapi/model.py
index f4e6744df5107d345c873f6fa45269f704615708..4d27355603f111dfc637d68e7efa9695369b504b 100644
--- a/hapi/model.py
+++ b/hapi/model.py
@@ -799,11 +799,11 @@ class Model(fluid.dygraph.Layer):
                     format(key, list(state.shape), list(param.shape)))
             return param, state
 
-	def _strip_postfix(path):
-	    path, ext = os.path.splitext(path)
-	    assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
-		    "Unknown postfix {} from weights".format(ext)
-	    return path
+        def _strip_postfix(path):
+            path, ext = os.path.splitext(path)
+            assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
+                    "Unknown postfix {} from weights".format(ext)
+            return path
 
         path = _strip_postfix(path)
         param_state = _load_state_from_path(path + ".pdparams")
@@ -936,35 +936,35 @@ class Model(fluid.dygraph.Layer):
         Args:
             train_data (Dataset|DataLoader): An iterable data loader is used for 
                 train. An instance of paddle paddle.io.Dataset or 
-                paddle.io.Dataloader is recomended.
+                paddle.io.Dataloader is recomended. Default: None.
             eval_data (Dataset|DataLoader): An iterable data loader is used for
                 evaluation at the end of epoch. If None, will not do evaluation. 
                 An instance of paddle.io.Dataset or paddle.io.Dataloader 
-                is recomended.
+                is recomended. Default: None.
             batch_size (int): Integer number. The batch size of train_data and eval_data. 
                 When train_data and eval_data are both the instance of Dataloader, this 
-                parameter will be ignored.
-            epochs (int): Integer number. The number of epochs to train the model.
+                parameter will be ignored. Default: 1.
+            epochs (int): Integer number. The number of epochs to train the model. Default: 1.
             eval_freq (int): The frequency, in number of epochs, an evalutation
-                is performed.
+                is performed. Default: 1.
             log_freq (int): The frequency, in number of steps, the training logs
-                are printed.
+                are printed. Default: 10.
             save_dir(str|None): The directory to save checkpoint during training.
-                If None, will not save checkpoint.
-            save_freq (int): The frequency, in number of epochs, to save checkpoint.
+                If None, will not save checkpoint. Default: None.
+            save_freq (int): The frequency, in number of epochs, to save checkpoint. Default: 1.
             verbose (int): The verbosity mode, should be 0, 1, or 2.
-                0 = silent, 1 = progress bar, 2 = one line per epoch.
+                0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
             drop_last (bool): whether drop the last incomplete batch of train_data 
                 when dataset size is not divisible by the batch size. When train_data 
-                is an instance of Dataloader, this parameter will be ignored.
+                is an instance of Dataloader, this parameter will be ignored. Default: False.
             shuffle (bool): whther to shuffle train_data. When train_data is an instance 
-                of Dataloader, this parameter will be ignored.
+                of Dataloader, this parameter will be ignored. Default: True.
             num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                 used and loading data in main process. When train_data and eval_data are
-                both the instance of Dataloader, this parameter will be ignored.
+                both the instance of Dataloader, this parameter will be ignored. Default: 0.
             callbacks (Callback|None): A list of `Callback` instances to apply
                 during training. If None, `ProgBarLogger` and `ModelCheckpoint`
-                are automatically inserted.
+                are automatically inserted. Default: None.
         """
 
         assert train_data is not None, \
@@ -1066,18 +1066,20 @@ class Model(fluid.dygraph.Layer):
                 evaluation. An instance of paddle.io.Dataset or 
                 paddle.io.Dataloader is recomended.
             batch_size (int): Integer number. The batch size of train_data and eval_data. 
-                When train_data and eval_data are both the instance of Dataloader, this 
-                parameter will be ignored.
+                When eval_data is the instance of Dataloader, this argument will be ignored.
+                Default: 1.
             log_freq (int): The frequency, in number of steps, the eval logs
-                are printed.
+                are printed. Default: 10.
             verbose (int): The verbosity mode, should be 0, 1, or 2.
-                0 = silent, 1 = progress bar, 2 = one line per epoch.
+                0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
             num_workers (int): The number of subprocess to load data, 0 for no subprocess 
                 used and loading data in main process. When train_data and eval_data are
-                both the instance of Dataloader, this parameter will be ignored.
+                both the instance of Dataloader, this parameter will be ignored. Default: 0.
             callbacks (Callback|None): A list of `Callback` instances to apply
                 during training. If None, `ProgBarLogger` and `ModelCheckpoint`
-                are automatically inserted.
+                are automatically inserted. Default: None.
+        Returns:
+            dict: Result of metric.
         """
 
         if fluid.in_dygraph_mode():
@@ -1142,16 +1144,18 @@ class Model(fluid.dygraph.Layer):
                 is recomended.
             batch_size (int): Integer number. The batch size of train_data and eval_data. 
                 When train_data and eval_data are both the instance of Dataloader, this 
-                parameter will be ignored.
+                argument will be ignored. Default: 1.
             num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                 used and loading data in main process. When train_data and eval_data are
-                both the instance of Dataloader, this parameter will be ignored.
+                both the instance of Dataloader, this argument will be ignored. Default: 0.
             stack_output (bool): whether stack output field like a batch, as for an output
                 filed of a sample is in shape [X, Y], test_data contains N samples, predict
                 output field will be in shape [N, X, Y] if stack_output is True, and will
                 be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
                 is False. stack_outputs as False is used for LoDTensor output situation,
-                it is recommended set as True if outputs contains no LoDTensor. Default False
+                it is recommended set as True if outputs contains no LoDTensor. Default: False.
+        Returns:
+            list: output of models.
         """
 
         if fluid.in_dygraph_mode():
diff --git a/hapi/vision/models/mobilenetv1.py b/hapi/vision/models/mobilenetv1.py
index ff27cb9c5d7745361858c3f6ec13e5865fafa605..31c0acbee2fdc107b0d776605c296c2c9296bcfd 100644
--- a/hapi/vision/models/mobilenetv1.py
+++ b/hapi/vision/models/mobilenetv1.py
@@ -263,7 +263,7 @@ class MobileNetV1(Model):
 
 
 def _mobilenet(arch, pretrained=False, **kwargs):
-    model = MobileNetV1(num_classes=1000, with_pool=True, **kwargs)
+    model = MobileNetV1(**kwargs)
     if pretrained:
         assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
             arch)
@@ -276,12 +276,13 @@ def _mobilenet(arch, pretrained=False, **kwargs):
     return model
 
 
-def mobilenet_v1(pretrained=False, scale=1.0):
+def mobilenet_v1(pretrained=False, scale=1.0, **kwargs):
     """MobileNetV1
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
         scale: (float): scale of channels in each layer. Default: 1.0.
     """
-    model = _mobilenet('mobilenetv1_' + str(scale), pretrained, scale=scale)
+    model = _mobilenet(
+        'mobilenetv1_' + str(scale), pretrained, scale=scale, **kwargs)
     return model
diff --git a/hapi/vision/models/mobilenetv2.py b/hapi/vision/models/mobilenetv2.py
index 02db68e569cea06dac876dd3b7bc044cd15542f7..d624625bcda1b763a0b3e511b6146776245e2fd5 100644
--- a/hapi/vision/models/mobilenetv2.py
+++ b/hapi/vision/models/mobilenetv2.py
@@ -237,7 +237,7 @@ class MobileNetV2(Model):
 
 
 def _mobilenet(arch, pretrained=False, **kwargs):
-    model = MobileNetV2(num_classes=1000, with_pool=True, **kwargs)
+    model = MobileNetV2(**kwargs)
     if pretrained:
         assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
             arch)
@@ -250,12 +250,13 @@ def _mobilenet(arch, pretrained=False, **kwargs):
     return model
 
 
-def mobilenet_v2(pretrained=False, scale=1.0):
+def mobilenet_v2(pretrained=False, scale=1.0, **kwargs):
     """MobileNetV2
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
         scale: (float): scale of channels in each layer. Default: 1.0.
     """
-    model = _mobilenet('mobilenetv2_' + str(scale), pretrained, scale=scale)
+    model = _mobilenet(
+        'mobilenetv2_' + str(scale), pretrained, scale=scale, **kwargs)
     return model
diff --git a/hapi/vision/models/resnet.py b/hapi/vision/models/resnet.py
index 804cc3534ad4c3cda4f800b41d8567922450e037..ac0944ee651224b106db71d0c87e9e5c29fd14d9 100644
--- a/hapi/vision/models/resnet.py
+++ b/hapi/vision/models/resnet.py
@@ -30,8 +30,18 @@ __all__ = [
 ]
 
 model_urls = {
+    'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams',
+                 '0ba53eea9bc970962d0ef96f7b94057e'),
+    'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams',
+                 '46bc9f7c3dd2e55b7866285bee91eff3'),
     'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams',
-                 '0884c9087266496c41c60d14a96f8530')
+                 '0884c9087266496c41c60d14a96f8530'),
+    'resnet101':
+    ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams',
+     'fb07a451df331e4b0bb861ed97c3a9b9'),
+    'resnet152':
+    ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams',
+     'f9c700f26d3644bb76ad2226ed5f5713'),
 }
 
 
@@ -252,8 +262,8 @@ class ResNet(Model):
         return x
 
 
-def _resnet(arch, Block, depth, pretrained):
-    model = ResNet(Block, depth, num_classes=1000, with_pool=True)
+def _resnet(arch, Block, depth, pretrained, **kwargs):
+    model = ResNet(Block, depth, **kwargs)
     if pretrained:
         assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
             arch)
@@ -265,46 +275,46 @@ def _resnet(arch, Block, depth, pretrained):
     return model
 
 
-def resnet18(pretrained=False):
+def resnet18(pretrained=False, **kwargs):
     """ResNet 18-layer model
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    return _resnet('resnet18', BasicBlock, 18, pretrained)
+    return _resnet('resnet18', BasicBlock, 18, pretrained, **kwargs)
 
 
-def resnet34(pretrained=False):
+def resnet34(pretrained=False, **kwargs):
     """ResNet 34-layer model
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    return _resnet('resnet34', BasicBlock, 34, pretrained)
+    return _resnet('resnet34', BasicBlock, 34, pretrained, **kwargs)
 
 
-def resnet50(pretrained=False):
+def resnet50(pretrained=False, **kwargs):
     """ResNet 50-layer model
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    return _resnet('resnet50', BottleneckBlock, 50, pretrained)
+    return _resnet('resnet50', BottleneckBlock, 50, pretrained, **kwargs)
 
 
-def resnet101(pretrained=False):
+def resnet101(pretrained=False, **kwargs):
     """ResNet 101-layer model
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    return _resnet('resnet101', BottleneckBlock, 101, pretrained)
+    return _resnet('resnet101', BottleneckBlock, 101, pretrained, **kwargs)
 
 
-def resnet152(pretrained=False):
+def resnet152(pretrained=False, **kwargs):
     """ResNet 152-layer model
     
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    return _resnet('resnet152', BottleneckBlock, 152, pretrained)
+    return _resnet('resnet152', BottleneckBlock, 152, pretrained, **kwargs)
diff --git a/hapi/vision/models/vgg.py b/hapi/vision/models/vgg.py
index 5ef09bd665e4308739651d868203a4a56b14de38..41cf34eddf7d4d379f9ea3a6bc5490f9763919dc 100644
--- a/hapi/vision/models/vgg.py
+++ b/hapi/vision/models/vgg.py
@@ -137,7 +137,7 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
     return model
 
 
-def vgg11(pretrained=False, batch_norm=False):
+def vgg11(pretrained=False, batch_norm=False, **kwargs):
     """VGG 11-layer model
     
     Args:
@@ -147,10 +147,10 @@ def vgg11(pretrained=False, batch_norm=False):
     model_name = 'vgg11'
     if batch_norm:
         model_name += ('_bn')
-    return _vgg(model_name, 'A', batch_norm, pretrained)
+    return _vgg(model_name, 'A', batch_norm, pretrained, **kwargs)
 
 
-def vgg13(pretrained=False, batch_norm=False):
+def vgg13(pretrained=False, batch_norm=False, **kwargs):
     """VGG 13-layer model
     
     Args:
@@ -160,10 +160,10 @@ def vgg13(pretrained=False, batch_norm=False):
     model_name = 'vgg13'
     if batch_norm:
         model_name += ('_bn')
-    return _vgg(model_name, 'B', batch_norm, pretrained)
+    return _vgg(model_name, 'B', batch_norm, pretrained, **kwargs)
 
 
-def vgg16(pretrained=False, batch_norm=False):
+def vgg16(pretrained=False, batch_norm=False, **kwargs):
     """VGG 16-layer model 
     
     Args:
@@ -173,10 +173,10 @@ def vgg16(pretrained=False, batch_norm=False):
     model_name = 'vgg16'
     if batch_norm:
         model_name += ('_bn')
-    return _vgg(model_name, 'D', batch_norm, pretrained)
+    return _vgg(model_name, 'D', batch_norm, pretrained, **kwargs)
 
 
-def vgg19(pretrained=False, batch_norm=False):
+def vgg19(pretrained=False, batch_norm=False, **kwargs):
     """VGG 19-layer model 
     
     Args:
@@ -186,4 +186,4 @@ def vgg19(pretrained=False, batch_norm=False):
     model_name = 'vgg19'
     if batch_norm:
         model_name += ('_bn')
-    return _vgg(model_name, 'E', batch_norm, pretrained)
+    return _vgg(model_name, 'E', batch_norm, pretrained, **kwargs)
diff --git a/hapi/vision/transforms/transforms.py b/hapi/vision/transforms/transforms.py
index 79926f811fcac0844d3290bbbccd4a5d389c626e..3d974171ce0d6f5a80f2af6a272a4250d771fb4d 100644
--- a/hapi/vision/transforms/transforms.py
+++ b/hapi/vision/transforms/transforms.py
@@ -71,7 +71,7 @@ class Compose(object):
             except Exception as e:
                 stack_info = traceback.format_exc()
                 print("fail to perform transform [{}] with error: "
-                        "{} and stack:\n{}".format(f, e, str(stack_info)))
+                      "{} and stack:\n{}".format(f, e, str(stack_info)))
                 raise e
         return data
 
@@ -92,6 +92,7 @@ class BatchCompose(object):
                                             these transforms perform on batch data.
 
     """
+
     def __init__(self, transforms=[]):
         self.transforms = transforms
 
@@ -102,7 +103,7 @@ class BatchCompose(object):
             except Exception as e:
                 stack_info = traceback.format_exc()
                 print("fail to perform batch transform [{}] with error: "
-                        "{} and stack:\n{}".format(f, e, str(stack_info)))
+                      "{} and stack:\n{}".format(f, e, str(stack_info)))
                 raise e
 
         # sample list to batch data
@@ -112,7 +113,7 @@ class BatchCompose(object):
 
 
 class Resize(object):
-    """Resize the input PIL Image to the given size.
+    """Resize the input Image to the given size.
 
     Args:
         size (int|list|tuple): Desired output size. If size is a sequence like
@@ -130,13 +131,6 @@ class Resize(object):
         self.interpolation = interpolation
 
     def __call__(self, img, lbl):
-        """
-        Args:
-            img (PIL Image): Image to be scaled.
-
-        Returns:
-            PIL Image: Rescaled image.
-        """
         return F.resize(img, self.size, self.interpolation), lbl
 
 
@@ -328,18 +322,22 @@ class Permute(object):
     Input image should be HWC mode and an instance of numpy.ndarray. 
 
     Args:
-        mode: Output mode of input. Use "CHW" mode by default.
+        mode: Output mode of input. Default: "CHW".
+        to_rgb: convert 'bgr' image to 'rgb'. Default: True.
     """
 
-    def __init__(self, mode="CHW"):
+    def __init__(self, mode="CHW", to_rgb=True):
         assert mode in [
             "CHW"
         ], "Only support 'CHW' mode, but received mode: {}".format(mode)
         self.mode = mode
+        self.to_rgb = to_rgb
 
     def __call__(self, img, lbl):
+        if self.to_rgb:
+            img = img[..., ::-1]
         if self.mode == "CHW":
-            return img.transpose((2, 0, 1))[::-1, ...], lbl
+            return img.transpose((2, 0, 1)), lbl
         return img, lbl