Merge branch 'develop_reg' into develop_reg

31a55526 · Walter · GitHub · 0a3bbc99 · e1d53e7f · 31a55526
271 changed file
--- a/README_cn.md
+++ b/README_cn.md
--- a/deploy/configs/build_cartoon.yaml
+++ b/deploy/configs/build_cartoon.yaml
+Global:
+  rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/"
+  batch_size: 1
+  use_gpu: True
+  enable_mkldnn: True
+  cpu_num_threads: 100
+  enable_benchmark: True
+  use_fp16: False
+  ir_optim: True
+  use_tensorrt: False
+  gpu_mem: 8000
+  enable_profile: False
+RecPreProcess:
+  transform_ops:
+    - ResizeImage:
+        resize_short: 256
+    - CropImage:
+        size: 224
+    - NormalizeImage:
+        scale: 0.00392157
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+RecPostProcess: null
+# indexing engine config
+IndexProcess:
+  index_path: "./dataset/cartoon_demo_data_v1.0/index/"
+  image_root: "./dataset/cartoon_demo_data_v1.0/"
+  data_file:  "./dataset/cartoon_demo_data_v1.0/data_file.txt"
+  delimiter: "\t"
+  dist_type: "IP"
+  pq_size: 100
+  embedding_size: 2048
--- a/deploy/configs/build_inshop.yaml
+++ b/deploy/configs/build_inshop.yaml
 Global:
-  rec_inference_model_dir: "./inshop/rec/"
+  rec_inference_model_dir: "./models/product_ResNet50_vd_Inshop_v1.0_infer"
  batch_size: 1
  use_gpu: True
  enable_mkldnn: True
@@ -26,9 +26,9 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
-  index_path: "./inshop/inshop_index/"
+  index_path: "./dataset/product_demo_data_v1.0/index"
-  image_root: "./inshop/dataset/"
+  image_root: "./dataset/product_demo_data_v1.0"
-  data_file:  "./inshop/inshop_gallery_demo.txt"
+  data_file:  "./dataset/product_demo_data_v1.0/data_file.txt"
  delimiter: " "
  dist_type: "IP"
  pq_size: 100

--- a/deploy/configs/build_logo.yaml
+++ b/deploy/configs/build_logo.yaml
 Global:
-  rec_inference_model_dir: "./logo/model/"
+  rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/"
  batch_size: 1
  use_gpu: True
  enable_mkldnn: True
@@ -26,9 +26,9 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
-  index_path: "./logo/logo_index/"
+  index_path: "./dataset/logo_demo_data_v1.0/index/"
-  image_root: "./logo/dataset/"
+  image_root: "./dataset/logo_demo_data_v1.0/"
-  data_file:  "./logo/logo_gallery_demo.txt"
+  data_file:  "./dataset/logo_demo_data_v1.0/data_file.txt"
  delimiter: "\t"
  dist_type: "IP"
  pq_size: 100

--- a/deploy/configs/build_vehicle.yaml
+++ b/deploy/configs/build_vehicle.yaml
 Global:
-  rec_inference_model_dir: "./vehicle/model/"
+  rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/"
  batch_size: 1
  use_gpu: True
  enable_mkldnn: True
@@ -26,9 +26,9 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
-  index_path: "./vehilce/vehicle_index/"
+  index_path: "./dataset/vehicle_demo_data_v1.0/index/"
-  image_root: "./vehicle/dataset/"
+  image_root: "./dataset/vehicle_demo_data_v1.0/"
-  data_file:  "./vehilce/demo_gallery.txt"
+  data_file:  "./dataset/vehicle_demo_data_v1.0/data_file.txt"
  delimiter: " "
  dist_type: "IP"
  pq_size: 100

--- a/deploy/configs/inference_icartoon.py
+++ b/deploy/configs/inference_icartoon.py
 Global:
-  infer_imgs: "./dataset/iCartoonFace/val2/0000000.jpg"
+  infer_imgs: "./dataset/cartoon_demo_data_v1.0/query/"
-  det_inference_model_dir: "./output/det"
+  det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
-  rec_inference_model_dir: "./output/"
+  rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/"
  batch_size: 1
  image_shape: [3, 640, 640]
  threshold: 0.5
@@ -9,7 +9,6 @@ Global:
  labe_list:
  - foreground
-  # inference engine config
  use_gpu: True
  enable_mkldnn: True
  cpu_num_threads: 100
@@ -34,7 +33,6 @@ DetPreProcess:
 DetPostProcess: {}
 RecPreProcess:
  transform_ops:
    - ResizeImage:
@@ -50,18 +48,8 @@ RecPreProcess:
 RecPostProcess: null
-# indexing engine config
 IndexProcess:
-  build:  
+  index_path: "./dataset/cartoon_demo_data_v1.0/index/"
-    enable: False
+  search_budget: 100
-    index_path: "./icartoon_index/"
+  return_k: 5
-    image_root: "./dataset/iCartoonFace"
+  dist_type: "IP"
-    data_file:  "./dataset/iCartoonFace/gallery_pesudo.txt"
-    spacer: "\t"
-    dist_type: "IP"
-    pq_size: 100
-    embedding_size: 2048
-  infer:
-    index_path: "./icartoon_index/"
-    search_budget: 100
-    return_k: 10
--- a/deploy/configs/inference_cls.yaml
+++ b/deploy/configs/inference_cls.yaml
 Global:
  infer_imgs: "../docs/images/whl/demo.jpg"
-  inference_model_dir: "./MobileNetV1_infer/"
+  inference_model_dir: "../inference/"
  batch_size: 1
  use_gpu: True
  enable_mkldnn: True
@@ -27,4 +27,4 @@ PreProcess:
 PostProcess:
  name: Topk
  topk: 5
-  class_id_map_file: "ppcls/utils/imagenet1k_label_list.txt"
+  class_id_map_file: "../ppcls/utils/imagenet1k_label_list.txt"
\ No newline at end of file
--- a/deploy/configs/inference_inshop.yaml
+++ b/deploy/configs/inference_inshop.yaml
 Global:
-  infer_imgs: "./inshop/demo/01_3_back.jpg"
+  infer_imgs: "./dataset/product_demo_data_v1.0/query"
-  det_inference_model_dir: "./inshop/det/"
+  det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer"
-  rec_inference_model_dir: "./inshop/rec/"
+  rec_inference_model_dir: "./models/product_ResNet50_vd_Inshop_v1.0_infer"
  batch_size: 1
  image_shape: [3, 640, 640]
  threshold: 0.0
-  max_det_results: 3
+  max_det_results: 1
  labe_list:
  - foreground
@@ -48,7 +48,7 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
-  index_path: "./inshop/inshop_index"
+  index_path: "./dataset/product_demo_data_v1.0/index"
  search_budget: 100
-  return_k: 10
+  return_k: 5
  dist_type: "IP"
--- a/deploy/configs/inference_logo.yaml
+++ b/deploy/configs/inference_logo.yaml
 Global:
-  infer_imgs: "./logo/demo/logo_APK.jpg"
+  infer_imgs: "./dataset/logo_demo_data_v1.0/query/logo_AKG.jpg"
-  det_inference_model_dir: "./logo/det/"
+  det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
-  rec_inference_model_dir: "./logo/rec/"
+  rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/"
  batch_size: 1
  image_shape: [3, 640, 640]
  threshold: 0.5
@@ -48,7 +48,7 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
-  index_path: "./logo_index/"
+  index_path: "./dataset/logo_demo_data_v1.0/index/"
  search_budget: 100
-  return_k: 10
+  return_k: 5
  dist_type: "IP"
--- a/deploy/configs/inference_vehicle.yaml
+++ b/deploy/configs/inference_vehicle.yaml
 Global:
-  infer_imgs: "./vehicle/demo/2e3521935c280c.jpg"
+  infer_imgs: "./dataset/vehicle_demo_data_v1.0/query/"
-  det_inference_model_dir: "./det/"
+  det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
-  rec_inference_model_dir: "./vehicle/rec/"
+  rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/"
  batch_size: 1
  image_shape: [3, 640, 640]
  threshold: 0.5
@@ -50,7 +50,7 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
-    index_path: "./vehicle_index/"
+    index_path: "./dataset/vehicle_demo_data_v1.0/index/"
    search_budget: 100
-    return_k: 10
+    return_k: 5
    dist_type: "IP"
--- a/tools/test_hubserving.py
+++ b/tools/test_hubserving.py
--- a/tools/serving/image_http_client.py
+++ b/tools/serving/image_http_client.py
--- a/tools/serving/image_service_cpu.py
+++ b/tools/serving/image_service_cpu.py
--- a/tools/serving/image_service_gpu.py
+++ b/tools/serving/image_service_gpu.py
--- a/tools/serving/utils.py
+++ b/tools/serving/utils.py
--- a/docs/en/models/DLA.md
+++ b/docs/en/models/DLA.md
+# DLA series
+## Overview
+DLA (Deep Layer Aggregation). Visual recognition requires rich representations that span levels from low to high, scales from small to large, and resolutions from fine to coarse. Even with the depth of features in a convolutional network, a layer in isolation is not enough: compounding and aggregating these representations improves inference of what and where. Although skip connections have been incorporated to combine layers, these connections have been "shallow" themselves, and only fuse by simple, one-step operations. The authors augment standard architectures with deeper aggregation to better fuse information across layers. Deep layer aggregation structures iteratively and hierarchically merge the feature hierarchy to make networks with better accuracy and fewer parameters. Experiments across architectures and tasks show that deep layer aggregation improves recognition and resolution compared to existing branching and merging schemes.  [paper](https://arxiv.org/abs/1707.06484)
+## Accuracy, FLOPS and Parameters
+|         Model         | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:-----------------:|:----------:|:---------:|:---------:|:---------:|
+| DLA34                 | 15.8       | 3.1       | 76.03     |   92.98   |
+| DLA46_c              | 1.3        | 0.5       | 63.21     |   85.30   |
+| DLA46x_c            | 1.1        | 0.5       | 64.36     |   86.01   |
+| DLA60               | 22.0       | 4.2       | 76.10    |   92.92   |
+| DLA60x             | 17.4       | 3.5       | 77.53    |   93.78   |
+| DLA60x_c              | 1.3        | 0.6       | 66.45     |   87.54   | 
+| DLA102                | 33.3       | 7.2       | 78.93     |   94.52   |
+| DLA102x             | 26.4       | 5.9       | 78.10     |   94.00   |
+| DLA102x2              | 41.4       | 9.3       | 78.85     |   94.45   |
+| DLA169                | 53.5       | 11.6      | 78.09    |   94.09   |
--- a/docs/en/models/HarDNet.md
+++ b/docs/en/models/HarDNet.md
+# HarDNet series
+## Overview
+HarDNet（Harmonic DenseNet）is a brand new neural network proposed by National Tsing Hua University in 2019, which to achieve high efficiency in terms of both low MACs and memory traffic. The new network achieves 35%, 36%, 30%, 32%, and 45% inference time reduction compared with FC-DenseNet-103, DenseNet-264, ResNet-50, ResNet-152, and SSD-VGG, respectively. We use tools including Nvidia profiler and ARM Scale-Sim to measure the memory traffic and verify that the inference latency is indeed proportional to the memory traffic consumption and the proposed network consumes low memory traffic. [Paper](https://arxiv.org/abs/1909.00948).
+## Accuracy, FLOPS and Parameters
+|         Model        | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:---------------------:|:----------:|:---------:|:---------:|:---------:|
+| HarDNet68        | 17.6       | 4.3       | 75.46     | 92.65    |
+| HarDNet85          | 36.7       | 9.1       | 77.44     |  93.55    |
+| HarDNet39_ds       |  3.5       | 0.4       | 71.33     |  89.98    |
+| HarDNet68_ds       |  4.2       | 0.8       | 73.62     |  91.52    |
\ No newline at end of file
--- a/docs/en/models/RedNet.md
+++ b/docs/en/models/RedNet.md
+# RedNet series
+## Overview
+In the backbone of ResNet and in all bottleneck positions of backbone, the convolution is replaced by Involution, but all convolutions are reserved for channel mapping and fusion. These carefully redesigned entities combine to form a new efficient backbone network, called Rednet. [paper](https://arxiv.org/abs/2103.06255).
+## Accuracy, FLOPS and Parameters
+|         Model         | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:---------------------:|:----------:|:---------:|:---------:|:---------:|
+| RedNet26             |  9.2       | 1.7       | 75.95     | 93.19     |
+| RedNet38            | 12.4       | 2.2       | 77.47     | 93.56     |
+| RedNet50             | 15.5       | 2.7       | 78.33     | 94.17     |
+| RedNet101           | 25.7       | 4.7       | 78.94     | 94.36     |
+| RedNet152           | 34.0       | 6.8       | 79.17     | 94.40     |
\ No newline at end of file
--- a/docs/en/models/TNT.md
+++ b/docs/en/models/TNT.md
+# TNT series
+## Overview
+TNT(Transformer-iN-Transformer) series models were proposed by Huawei-Noah in 2021 for modeling both patch-level and pixel-level representation.  In each TNT block, an outer transformer block is utilized to process patch embeddings, and an inner transformer block extracts local features from pixel embeddings. The pixel-level feature is projected to the space of patch embedding by a linear transformation layer and then added into the patch. By stacking the TNT blocks, we build the TNT model for image recognition. Experiments on ImageNet benchmark and downstream tasks demonstrate the superiority and efficiency of the proposed TNT architecture. For example, our TNT achieves 81.3% top-1 accuracy on ImageNet which is 1.5% higher than that of DeiT with similar computational cost. [Paper](https://arxiv.org/abs/2103.00112).
+## Accuracy, FLOPS and Parameters
+|         Model        | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:---------------------:|:----------:|:---------:|:---------:|:---------:|
+|        TNT_small        | 23.8       | 5.2       | 81.12     |   95.56   |
\ No newline at end of file
--- a/docs/images/logo/logodet3k.jpg
+++ b/docs/images/logo/logodet3k.jpg
--- a/docs/zh_CN/ImageNet_models_cn.md
+++ b/docs/zh_CN/ImageNet_models_cn.md
--- a/docs/zh_CN/application/logo_recognition.md
+++ b/docs/zh_CN/application/logo_recognition.md
+# Logo识别
+ Logo识别技术，是现实生活中应用很广的一个领域，比如一张照片中是否出现了Adidas或者Nike的商标Logo，或者一个杯子上是否出现了星巴克或者可口可乐的商标Logo。通常Logo类别数量较多时，往往采用检测+识别两阶段方式，检测模块负责检测出潜在的Logo区域，根据检测区域抠图后输入识别模块进行识别。识别模块多采用检索的方式，根据查询图片和底库图片进行相似度排序获得预测类别。此文档主要对Logo图片的特征提取部分进行相关介绍，内容包括：
+-  数据集及预处理方式
+-  Backbone的具体设置
+-  Loss函数的相关设置
+全部的超参数及具体配置：[ResNet50_ReID.yaml](../../../ppcls/configs/Logo/ResNet50_ReID.yaml)
+## 数据集及预处理
+### LogoDet-3K数据集
+<img src="../../images/logo/logodet3k.jpg" style="zoom:50%;" />
+LogoDet-3K数据集是具有完整标注的Logo数据集，有3000个标识类别，约20万个高质量的人工标注的标识对象和158652张图片。相关数据介绍参考[原论文](https://arxiv.org/abs/2008.05359)
+## 数据预处理
+由于原始的数据集中，图像包含标注的检测框，在识别阶段只考虑检测器抠图后的logo区域，因此采用原始的标注框抠出Logo区域图像构成训练集，排除背景在识别阶段的影响。对数据集进行划分，产生155427张训练集，覆盖3000个logo类别（同时作为测试时gallery图库），3225张测试集，用于作为查询集。抠图后的训练集可[在此下载](https://arxiv.org/abs/2008.05359)
+- 图像`Resize`到224
+- 随机水平翻转
+- [AugMix](https://arxiv.org/abs/1912.02781v1)
+- Normlize：归一化到0~1
+- [RandomErasing](https://arxiv.org/pdf/1708.04896v2.pdf)
+在配置文件中设置如下，详见`transform_ops`部分：
+```yaml
+DataLoader:
+  Train:
+    dataset:
+        # 具体使用的Dataset的的名称
+        name: "LogoDataset"
+        # 使用此数据集的具体参数
+        image_root: "dataset/LogoDet-3K-crop/train/"
+        cls_label_path: "dataset/LogoDet-3K-crop/LogoDet-3K+train.txt"
+        # 图像增广策略：ResizeImage、RandFlipImage等
+        transform_ops:
+          - ResizeImage:
+              size: 224
+          - RandFlipImage:
+              flip_code: 1
+          - AugMix:
+              prob: 0.5
+          - NormalizeImage:
+              scale: 0.00392157
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+          - RandomErasing:
+              EPSILON: 0.5
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 128
+        num_instances: 2
+        drop_last: False
+        shuffle: True
+    loader:
+        num_workers: 6
+        use_shared_memory: False
+```
+## Backbone的具体设置
+具体是用`ResNet50`作为backbone，主要做了如下修改：
+ - 使用ImageNet预训练模型
+ - last stage stride=1, 保持最后输出特征图尺寸14x14
+ - 在最后加入一个embedding 卷积层，特征维度为512
+   具体代码：[ResNet50_last_stage_stride1](../../../ppcls/arch/backbone/variant_models/resnet_variant.py)
+在配置文件中Backbone设置如下：
+```yaml
+Arch:
+  # 使用RecModel模型进行训练，目前支持普通ImageNet和RecModel两个方式
+  name: "RecModel"
+  # 导出inference model的具体配置
+  infer_output_key: "features"
+  infer_add_softmax: False
+  # 使用的Backbone
+  Backbone:
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+  # 使用此层作为Backbone的feature输出，name为具体层的full_name
+  BackboneStopLayer:
+    name: "adaptive_avg_pool2d_0"
+  # Backbone的基础上，新增网络层。此模型添加1x1的卷积层（embedding）
+  Neck:
+    name: "VehicleNeck"
+    in_channels: 2048
+    out_channels: 512
+  # 增加CircleMargin head
+  Head:
+    name: "CircleMargin"
+    margin: 0.35
+    scale:  64
+    embedding_size: 512
+```
+## Loss的设置
+在Logo识别中，使用了[Pairwise Cosface + CircleMargin](https://arxiv.org/abs/2002.10857) 联合训练，其中权重比例为1:1
+具体代码详见：[PairwiseCosface](../../../ppcls/loss/pairwisecosface.py) 、[CircleMargin](../../../ppcls/arch/gears/circlemargin.py)
+在配置文件中设置如下：
+```yaml
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+    - PairwiseCosface:
+        margin: 0.35
+        gamma: 64
+        weight: 1.0
+  Eval:
+    - CELoss:
+        weight: 1.0
+```
+## 其他相关设置
+### Optimizer设置
+```yaml
+Optimizer:
+  # 使用的优化器名称
+  name: Momentum
+  # 优化器具体参数
+  momentum: 0.9
+  lr:
+    # 使用的学习率调节具体名称
+    name: Cosine
+    # 学习率调节算法具体参数
+    learning_rate: 0.01
+  regularizer:
+    name: 'L2'
+    coeff: 0.0001
+```
+### Eval Metric设置
+```yaml
+Metric:
+  Eval:
+    # 使用Recallk和mAP两种评价指标
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
+```
+### 其他超参数设置
+```yaml
+Global:
+  # 如为null则从头开始训练。若指定中间训练保存的状态地址，则继续训练
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  class_num: 3000
+  # 保存模型的粒度，每个epoch保存一次
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  # 训练的epoch数
+  epochs: 120
+  # log输出频率
+  print_batch_step: 10
+  # 是否使用visualdl库
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 224, 224]
+  save_inference_dir: "./inference"
+  # 使用retrival的方式进行评测
+  eval_mode: "retrieval"
+```
--- a/docs/zh_CN/faq_series/faq_2020_s1.md
+++ b/docs/zh_CN/faq_series/faq_2020_s1.md
@@ -128,8 +128,8 @@ ResNet系列模型中，相比于其他模型，ResNet_vd模型在预测速度
 **A**：
-* 对于单张图像的增广，可以参考[基于单张图片的数据增广脚本](../../../ppcls/data/imaug/operators.py)，参考`ResizeImage`或者`CropImage`等数据算子的写法，创建一个新的类，然后在`__call__`中，实现对应的增广方法即可。
+* 对于单张图像的增广，可以参考[基于单张图片的数据增广脚本](../../../ppcls/data/preprocess/ops)，参考`ResizeImage`或者`CropImage`等数据算子的写法，创建一个新的类，然后在`__call__`中，实现对应的增广方法即可。
-* 对于一个batch图像的增广，可以参考[基于batch数据的数据增广脚本](../../../ppcls/data/imaug/batch_operators.py)，参考`MixupOperator`或者`CutmixOperator`等数据算子的写法，创建一个新的类，然后在`__call__`中，实现对应的增广方法即可。
+* 对于一个batch图像的增广，可以参考[基于batch数据的数据增广脚本](../../../ppcls/data/preprocess/batch_ops)，参考`MixupOperator`或者`CutmixOperator`等数据算子的写法，创建一个新的类，然后在`__call__`中，实现对应的增广方法即可。
 ## Q3.5: 怎么进一步加速模型训练过程呢？

--- a/docs/zh_CN/models/DLA.md
+++ b/docs/zh_CN/models/DLA.md
+# DLA系列
+## 概述
+DLA (Deep Layer Aggregation)。 视觉识别需要丰富的表示形式，其范围从低到高，范围从小到大，分辨率从精细到粗糙。即使卷积网络中的要素深度很深，仅靠隔离层还是不够的：将这些表示法进行复合和聚合可改善对内容和位置的推断。尽管已合并了残差连接以组合各层，但是这些连接本身是“浅”的，并且只能通过简单的一步操作来融合。作者通过更深层的聚合来增强标准体系结构，以更好地融合各层的信息。Deep Layer Aggregation 结构迭代地和分层地合并了特征层次结构，以使网络具有更高的准确性和更少的参数。跨体系结构和任务的实验表明，与现有的分支和合并方案相比，Deep Layer Aggregation 可提高识别和分辨率。[论文地址](https://arxiv.org/abs/1707.06484)。
+## 精度、FLOPS和参数量
+|         Model         | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:-----------------:|:----------:|:---------:|:---------:|:---------:|
+| DLA34                 | 15.8       | 3.1       | 76.03     |   92.98   |
+| DLA46_c              | 1.3        | 0.5       | 63.21     |   85.30   |
+| DLA46x_c            | 1.1        | 0.5       | 64.36     |   86.01   |
+| DLA60               | 22.0       | 4.2       | 76.10    |   92.92   |
+| DLA60x             | 17.4       | 3.5       | 77.53    |   93.78   |
+| DLA60x_c              | 1.3        | 0.6       | 66.45     |   87.54   | 
+| DLA102                | 33.3       | 7.2       | 78.93     |   94.52   |
+| DLA102x             | 26.4       | 5.9       | 78.10     |   94.00   |
+| DLA102x2              | 41.4       | 9.3       | 78.85     |   94.45   |
+| DLA169                | 53.5       | 11.6      | 78.09    |   94.09   |
\ No newline at end of file
--- a/docs/zh_CN/models/HarDNet.md
+++ b/docs/zh_CN/models/HarDNet.md
+# HarDNet系列
+## 概述
+HarDNet（Harmonic DenseNet）是 2019 年由国立清华大学提出的一种全新的神经网络，在低 MAC 和内存流量的条件下实现了高效率。与 FC-DenseNet-103，DenseNet-264，ResNet-50，ResNet-152 和SSD-VGG 相比，新网络的推理时间减少了 35%，36%，30%，32% 和 45%。我们使用了包括Nvidia Profiler 和 ARM Scale-Sim 在内的工具来测量内存流量，并验证推理延迟确实与内存流量消耗成正比，并且所提议的网络消耗的内存流量很低。[论文地址](https://arxiv.org/abs/1909.00948)。
+## 精度、FLOPS和参数量
+|         Model        | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:---------------------:|:----------:|:---------:|:---------:|:---------:|
+| HarDNet68        | 17.6       | 4.3       | 75.46     | 92.65    |
+| HarDNet85          | 36.7       | 9.1       | 77.44     |  93.55    |
+| HarDNet39_ds       |  3.5       | 0.4       | 71.33     |  89.98    |
+| HarDNet68_ds       |  4.2       | 0.8       | 73.62     |  91.52    |
--- a/docs/zh_CN/models/LeViT.md
+++ b/docs/zh_CN/models/LeViT.md
+# LeViT
+## 概述
+LeViT是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能，因此能够更好地反映普遍应用的真实场景。通过大量实验，作者找到了卷积神经网络与Transformer体系更好的结合方式，并且提出了attention-based方法，用于整合Transformer中的位置信息编码。[论文地址](https://arxiv.org/abs/2104.01136)。
+## 精度、FLOPS和参数量
+| Models           | Top1 | Top5 | Reference<br>top1 | Reference<br>top5 | FLOPS<br>(M) | Params<br>(M) |
+|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
+| LeViT-128S | 0.7598 | 0.9269 | 0.766 | 0.929 | 305  | 7.8 |
+| LeViT-128  | 0.7810 | 0.9371 | 0.786 | 0.940 | 406  | 9.2 |
+| LeViT-192  | 0.7934 | 0.9446 | 0.800 | 0.947 | 658  | 11 |
+| LeViT-256  | 0.8085 | 0.9497 | 0.816 | 0.954 | 1120 | 19 |
+| LeViT-384  | 0.8191 | 0.9551 | 0.826 | 0.960 | 2353 | 39 |
+**注**：与Reference的精度差异源于数据预处理不同及未使用蒸馏的head作为输出。
--- a/docs/zh_CN/models/RedNet.md
+++ b/docs/zh_CN/models/RedNet.md
+# RedNet系列
+## 概述
+在 ResNet 的 Backbone 和 Backbone 的所有 Bottleneck 位置上使用 Involution 替换掉了卷积，但保留了所有的卷积用于通道映射和融合。这些精心重新设计的实体联合起来，形成了一种新的高效 Backbone 网络，称为 RedNet。[论文地址](https://arxiv.org/abs/2103.06255)。
+## 精度、FLOPS和参数量
+|         Model         | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:---------------------:|:----------:|:---------:|:---------:|:---------:|
+| RedNet26             |  9.2       | 1.7       | 75.95     | 93.19     |
+| RedNet38            | 12.4       | 2.2       | 77.47     | 93.56     |
+| RedNet50             | 15.5       | 2.7       | 78.33     | 94.17     |
+| RedNet101           | 25.7       | 4.7       | 78.94     | 94.36     |
+| RedNet152           | 34.0       | 6.8       | 79.17     | 94.40     |
\ No newline at end of file
--- a/docs/zh_CN/models/TNT.md
+++ b/docs/zh_CN/models/TNT.md
+# TNT系列
+## 概述
+TNT（Transformer-iN-Transformer）系列模型由华为诺亚于2021年提出，用于对 patch 级别和 pixel 级别的表示进行建模。在每个 TNT 块中，outer transformer block 用于处理 patch 嵌入，inner transformer block 从 pixel 嵌入中提取局部特征。通过线性变换层将 pixel 级特征投影到 patch 嵌入空间，然后加入到 patch 中。通过对 TNT 块的叠加，建立了用于图像识别的 TNT 模型。在ImageNet 基准测试和下游任务上的实验证明了该 TNT 体系结构的优越性和有效性。例如，在计算量相当的情况下 TNT 能在 ImageNet 上达到 81.3% 的 top-1 精度，比 DeiT 高 1.5%。[论文地址](https://arxiv.org/abs/2103.00112)。
+## 精度、FLOPS和参数量
+|         Model        | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
+|:---------------------:|:----------:|:---------:|:---------:|:---------:|
+|        TNT_small        | 23.8       | 5.2       | 81.21     |   95.63   |
--- a/docs/zh_CN/models/Twins.md
+++ b/docs/zh_CN/models/Twins.md
+# Twins
+## 概述
+Twins网络包括Twins-PCPVT和Twins-SVT，其重点对空间注意力机制进行了精心设计，得到了简单却更为有效的方案。由于该体系结构仅涉及矩阵乘法，而目前的深度学习框架中对矩阵乘法有较高的优化程度，因此该体系结构十分高效且易于实现。并且，该体系结构在图像分类、目标检测和语义分割等多种下游视觉任务中都能够取得优异的性能。[论文地址](https://arxiv.org/abs/2104.13840)。
+## 精度、FLOPS和参数量
+| Models        | Top1 | Top5 | Reference<br>top1 | Reference<br>top5 | FLOPS<br>(G) | Params<br>(M) |
+|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
+| pcpvt_small   | 0.8082 | 0.9552 | 0.812 | - | 3.7 | 24.1   |
+| pcpvt_base    | 0.8242 | 0.9619 | 0.827 | - | 6.4 | 43.8   |
+| pcpvt_large   | 0.8273 | 0.9650 | 0.831 | - | 9.5 | 60.9   |
+| alt_gvt_small | 0.8140 | 0.9546 | 0.817 | - | 2.8  | 24   |
+| alt_gvt_base  | 0.8294 | 0.9621 | 0.832 | - | 8.3  | 56   |
+| alt_gvt_large | 0.8331 | 0.9642 | 0.837 | - | 14.8 | 99.2   |
+**注**：与Reference的精度差异源于数据预处理不同。
--- a/docs/zh_CN/tutorials/quick_start_recognition.md
+++ b/docs/zh_CN/tutorials/quick_start_recognition.md
+# 图像识别快速开始
+图像识别主要包含3个部分：主体检测得到检测框、识别提取特征、根据特征进行检索。
+## 1. 环境配置
+* 请先参考[快速安装](./installation.md)配置PaddleClas运行环境。
+注意：
+**本部分内容需要在`deploy`文件夹下运行，在PaddleClas代码的根目录下，可以通过以下方法进入该文件夹**
+```shell
+cd deploy
+```
+## 2. inference 模型和数据下载
+检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型以及测试数据下载方法如下。。
+| 模型简介       | 推荐场景   | 测试数据地址  | inference模型 |
+| ------------  | ------------- | ------- | -------- |
+| 通用主体检测模型 | 通用场景  | -  |[下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) |
+| Logo识别模型 | Logo场景  | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) |  [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) |
+| 动漫人物识别模型 | 动漫人物场景  | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) |
+| 车辆细分类模型 | 车辆场景  | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) |  [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) |
+| 商品识别模型 | 商品场景  | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) |  [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_Inshop_v1.0_infer.tar) |
+**注意**：windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载，并解压放置在相应目录下
+* 下载并解压数据与模型
+```shell
+mkdir dataset
+cd dataset
+# 下载demo数据并解压
+wget {url/of/data} && tar -xf {name/of/data/package}
+cd ..
+mkdir models
+cd models
+# 下载识别inference模型并解压
+wget {url/of/inference model} && tar -xf {name/of/inference model/package}
+cd ..
+```
+### 2.1 下载通用检测模型
+```shell
+mkdir models
+cd models
+# 下载通用检测inference模型并解压
+wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar && tar -xf ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar
+cd ..
+```
+### 2.1 Logo识别
+以Logo识别demo为例，按照下面的命令下载demo数据与模型。
+```shell
+mkdir dataset
+cd dataset
+# 下载demo数据并解压
+wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar && tar -xf logo_demo_data_v1.0.tar
+cd ..
+mkdir models
+cd models
+# 下载识别inference模型并解压
+wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar && tar -xf logo_rec_ResNet50_Logo3K_v1.0_infer.tar
+cd ..
+```
+解压完毕后，`dataset`文件夹下应有如下文件结构：
+```
+├── logo_demo_data_v1.0
+│   ├── data_file.txt
+│   ├── gallery
+│   ├── index
+│   └── query
+├── ...
+```
+`models`文件夹下应有如下文件结构：
+```
+├── logo_rec_ResNet50_Logo3K_v1.0_infer
+│   ├── inference.pdiparams
+│   ├── inference.pdiparams.info
+│   └── inference.pdmodel
+├── ppyolov2_r50vd_dcn_mainbody_v1.0_infer
+│   ├── inference.pdiparams
+│   ├── inference.pdiparams.info
+│   └── inference.pdmodel
+```
+按照下面的方式可以完成对于图片的检索
+```shell
+python3.7 python/predict_system.py -c configs/inference_logo.yaml
+```
+配置文件中，部分关键字段解释如下
+```yaml
+Global:
+  infer_imgs: "./dataset/logo_demo_data_v1.0/query/" # 预测图像
+  det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" # 检测inference模型文件夹
+  rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/" # 识别inference模型文件夹
+  batch_size: 1 # 预测的批大小
+  image_shape: [3, 640, 640] # 检测的图像尺寸
+  threshold: 0.5 # 检测的阈值，得分超过该阈值的检测框才会被检出
+  max_det_results: 1 # 用于图像识别的检测框数量，符合阈值条件的检测框中，根据得分，最多对其中的max_det_results个检测框做后续的识别
+# indexing engine config
+IndexProcess:
+  index_path: "./dataset/logo_demo_data_v1.0/index/" # 索引文件夹，用于识别特征提取之后的索引
+  search_budget: 100
+  return_k: 5 # 从底库中反馈return_k个数量的最相似内容
+  dist_type: "IP"
+```
+最终输出结果如下
+```
+[{'bbox': [25, 21, 483, 382], 'rec_docs': ['AKG', 'AKG', 'AKG', 'AKG', 'AKG'], 'rec_scores': array([2.32288337, 2.31903863, 2.28398442, 2.16804123, 2.10190272])}]
+```
+其中bbox表示检测出的主体所在位置，rec_docs表示底库中与检出主体最相近的若干张图像对应的标签，rec_scores表示对应的相似度。
+如果希望预测文件夹内的图像，可以直接修改配置文件，也可以通过下面的`-o`参数修改对应的配置。
+```shell
+python3.7 python/predict_system.py -c configs/inference_logo.yaml -o Global.infer_imgs="./dataset/logo_demo_data_v1.0/query"
+```
+如果希望在底库中新增图像，重新构建idnex，可以使用下面的命令重新构建index。
+```shell
+python3.7 python/build_gallery.py -c configs/build_logo.yaml
+```
+其中index相关配置如下。
+```yaml
+# indexing engine config
+IndexProcess:
+  index_path: "./dataset/logo_demo_data_v1.0/index/" # 保存的索引地址
+  image_root: "./dataset/logo_demo_data_v1.0/" # 图像的根目录
+  data_file:  "./dataset/logo_demo_data_v1.0/data_file.txt" # 图像的数据list文本，每一行包含图像的文件名与标签信息
+  delimiter: "\t"
+  dist_type: "IP"
+  pq_size: 100
+  embedding_size: 512 # 特征维度
+```
+需要改动的内容为：
+1. 在图像根目录下面添加对应的图像内容（也可以在其子文件夹下面，保证最终根目录与数据list文本中添加的文件名合并之后，图像存在即可）
+2. 图像的数据list文本中添加图像新的内容，每行包含图像文件名以及对应的标签信息。
+### 2.2 其他任务的识别
+如果希望尝试其他方向的识别与检索效果，在下载解压好对应的demo数据与模型之后，替换对应的配置文件即可完成预测。
+| 场景   | 预测配置文件  | 构建底库的配置文件 |
+| ---- | ----- | ----- |
+| 动漫人物 | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) |
+| 车辆 | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) |
+| 商品 | [inference_inshop.yaml](../../../deploy/configs/) | [build_inshop.yaml](../../../deploy/configs/build_inshop.yaml) |
--- a/ppcls/arch/__init__.py
+++ b/ppcls/arch/__init__.py
@@ -21,8 +21,9 @@ from . import backbone, gears
 from .backbone import *
 from .gears import build_gear
 from .utils import *
+from ppcls.utils.save_load import load_dygraph_pretrain
-__all__ = ["build_model", "RecModel"]
+__all__ = ["build_model", "RecModel", "DistillationModel"]
 def build_model(config):
@@ -62,3 +63,48 @@ class RecModel(nn.Layer):
        else:
            y = None
        return {"features": x, "logits": y}
+class DistillationModel(nn.Layer):
+    def __init__(self,
+                 models=None,
+                 pretrained_list=None,
+                 freeze_params_list=None,
+                 **kargs):
+        super().__init__()
+        assert isinstance(models, list)
+        self.model_list = []
+        self.model_name_list = []
+        if pretrained_list is not None:
+            assert len(pretrained_list) == len(models)
+        if freeze_params_list is None:
+            freeze_params_list = [False] * len(models)
+        assert len(freeze_params_list) == len(models)
+        for idx, model_config in enumerate(models):
+            assert len(model_config) == 1
+            key = list(model_config.keys())[0]
+            model_config = model_config[key]
+            model_name = model_config.pop("name")
+            model = eval(model_name)(**model_config)
+            if freeze_params_list[idx]:
+                for param in model.parameters():
+                    param.trainable = False
+            self.model_list.append(self.add_sublayer(key, model))
+            self.model_name_list.append(key)
+        if pretrained_list is not None:
+            for idx, pretrained in enumerate(pretrained_list):
+                if pretrained is not None:
+                    load_dygraph_pretrain(
+                        self.model_name_list[idx], path=pretrained)
+    def forward(self, x, label=None):
+        result_dict = dict()
+        for idx, model_name in enumerate(self.model_name_list):
+            if label is None:
+                result_dict[model_name] = self.model_list[idx](x)
+            else:
+                result_dict[model_name] = self.model_list[idx](x, label)
+        return result_dict
--- a/ppcls/arch/backbone/__init__.py
+++ b/ppcls/arch/backbone/__init__.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,11 +19,12 @@ from ppcls.arch.backbone.legendary_models.vgg import VGG11, VGG13, VGG16, VGG19
 from ppcls.arch.backbone.legendary_models.inception_v3 import InceptionV3
 from ppcls.arch.backbone.legendary_models.hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W64_C
-from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet18_vc, ResNet34_vc, ResNet50_vc, ResNet101_vc, ResNet152_vc
+from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet50_vc
 from ppcls.arch.backbone.model_zoo.resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d
-from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2Net50_48w_2s, Res2Net50_26w_6s, Res2Net50_26w_8s, Res2Net101_26w_4s, Res2Net152_26w_4s, Res2Net200_26w_4s
+from ppcls.arch.backbone.model_zoo.resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d
-from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_48w_2s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s
+from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_26w_4s, Res2Net50_14w_8s
-from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd, SE_ResNet101_vd, SE_ResNet152_vd, SE_ResNet200_vd
+from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_26w_4s, Res2Net101_vd_26w_4s, Res2Net200_vd_26w_4s
+from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd
 from ppcls.arch.backbone.model_zoo.se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd
 from ppcls.arch.backbone.model_zoo.se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_64x4d
 from ppcls.arch.backbone.model_zoo.dpn import DPN68, DPN92, DPN98, DPN107, DPN131
@@ -33,10 +34,11 @@ from ppcls.arch.backbone.model_zoo.resnest import ResNeSt50_fast_1s1x64d, ResNeS
 from ppcls.arch.backbone.model_zoo.googlenet import GoogLeNet
 from ppcls.arch.backbone.model_zoo.mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0
 from ppcls.arch.backbone.model_zoo.shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish
+from ppcls.arch.backbone.model_zoo.ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
 from ppcls.arch.backbone.model_zoo.alexnet import AlexNet
 from ppcls.arch.backbone.model_zoo.inception_v4 import InceptionV4
 from ppcls.arch.backbone.model_zoo.xception import Xception41, Xception65, Xception71
-from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab, Xception71_deeplab
+from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab
 from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl
 from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
 from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
@@ -47,4 +49,10 @@ from ppcls.arch.backbone.model_zoo.distillation_models import ResNet50_vd_distil
 from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
 from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
 from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
+from ppcls.arch.backbone.model_zoo.gvt import pcpvt_small, pcpvt_base, pcpvt_large, alt_gvt_small, alt_gvt_base, alt_gvt_large
+from ppcls.arch.backbone.model_zoo.levit import LeViT_128S, LeViT_128, LeViT_192, LeViT_256, LeViT_384
+from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
+from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152 
+from ppcls.arch.backbone.model_zoo.tnt import TNT_small
+from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
 from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
--- a/ppcls/arch/backbone/model_zoo/alexnet.py
+++ b/ppcls/arch/backbone/model_zoo/alexnet.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
@@ -7,8 +21,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import Uniform
 import math
-__all__ = ["AlexNet"]
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"AlexNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class ConvPoolLayer(nn.Layer):
    def __init__(self,
@@ -126,7 +143,19 @@ class AlexNetDY(nn.Layer):
        x = self._fc8(x)
        return x
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
-def AlexNet(**args):
+def AlexNet(pretrained=False, use_ssld=False, **kwargs):
-    model = AlexNetDY(**args)
+    model = AlexNetDY(**kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/darknet.py
+++ b/ppcls/arch/backbone/model_zoo/darknet.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle
 from paddle import ParamAttr
 import paddle.nn as nn
@@ -7,8 +21,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import Uniform
 import math
-__all__ = ["DarkNet53"]
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"DarkNet53": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
    def __init__(self,
@@ -155,7 +172,19 @@ class DarkNet(nn.Layer):
        x = self._out(x)
        return x
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-def DarkNet53(**args):
+    if pretrained is False:
-    model = DarkNet(**args)
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
+    model = DarkNet(**kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/densenet.py
+++ b/ppcls/arch/backbone/model_zoo/densenet.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,9 +26,16 @@ from paddle.nn.initializer import Uniform
 import math
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    "DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", "DenseNet264"
-]
+MODEL_URLS = {"DenseNet121": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
+              "DenseNet161": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
+              "DenseNet169": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
+              "DenseNet201": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
+              "DenseNet264": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
+             }
+__all__ = list(MODEL_URLS.keys())
 class BNACConvLayer(nn.Layer):
@@ -282,27 +289,43 @@ class DenseNet(nn.Layer):
        y = self.out(y)
        return y
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-def DenseNet121(**args):
+    if pretrained is False:
-    model = DenseNet(layers=121, **args)
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
+    model = DenseNet(layers=121, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
    return model
-def DenseNet161(**args):
+def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
-    model = DenseNet(layers=161, **args)
+    model = DenseNet(layers=161, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
    return model
-def DenseNet169(**args):
+def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
-    model = DenseNet(layers=169, **args)
+    model = DenseNet(layers=169, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
    return model
-def DenseNet201(**args):
+def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
-    model = DenseNet(layers=201, **args)
+    model = DenseNet(layers=201, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
    return model
-def DenseNet264(**args):
+def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
-    model = DenseNet(layers=264, **args)
+    model = DenseNet(layers=264, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py
@@ -16,12 +16,20 @@ import paddle
 import paddle.nn as nn
 from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zeros_
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    'DeiT_tiny_patch16_224', 'DeiT_small_patch16_224', 'DeiT_base_patch16_224',
-    'DeiT_tiny_distilled_patch16_224', 'DeiT_small_distilled_patch16_224',
+MODEL_URLS = {
-    'DeiT_base_distilled_patch16_224', 'DeiT_base_patch16_384',
+              "DeiT_tiny_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
-    'DeiT_base_distilled_patch16_384'
+              "DeiT_small_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
-]
+              "DeiT_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
+              "DeiT_tiny_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
+              "DeiT_small_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
+              "DeiT_base_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams", 
+              "DeiT_base_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
+              "DeiT_base_distilled_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
+             }
+__all__ = list(MODEL_URLS.keys())
 class DistilledVisionTransformer(VisionTransformer):
@@ -90,7 +98,20 @@ class DistilledVisionTransformer(VisionTransformer):
        return (x + x_dist) / 2
-def DeiT_tiny_patch16_224(**kwargs):
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
    model = VisionTransformer(
        patch_size=16,
        embed_dim=192,
@@ -100,10 +121,11 @@ def DeiT_tiny_patch16_224(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_patch16_224"], use_ssld=use_ssld)
    return model
-def DeiT_small_patch16_224(**kwargs):
+def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
    model = VisionTransformer(
        patch_size=16,
        embed_dim=384,
@@ -113,10 +135,11 @@ def DeiT_small_patch16_224(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_patch16_224"], use_ssld=use_ssld)
    return model
-def DeiT_base_patch16_224(**kwargs):
+def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
    model = VisionTransformer(
        patch_size=16,
        embed_dim=768,
@@ -126,10 +149,11 @@ def DeiT_base_patch16_224(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_224"], use_ssld=use_ssld)
    return model
-def DeiT_tiny_distilled_patch16_224(**kwargs):
+def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
    model = DistilledVisionTransformer(
        patch_size=16,
        embed_dim=192,
@@ -139,10 +163,11 @@ def DeiT_tiny_distilled_patch16_224(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_distilled_patch16_224"], use_ssld=use_ssld)
    return model
-def DeiT_small_distilled_patch16_224(**kwargs):
+def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
    model = DistilledVisionTransformer(
        patch_size=16,
        embed_dim=384,
@@ -152,10 +177,11 @@ def DeiT_small_distilled_patch16_224(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_distilled_patch16_224"], use_ssld=use_ssld)
    return model
-def DeiT_base_distilled_patch16_224(**kwargs):
+def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
    model = DistilledVisionTransformer(
        patch_size=16,
        embed_dim=768,
@@ -165,10 +191,11 @@ def DeiT_base_distilled_patch16_224(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_224"], use_ssld=use_ssld)
    return model
-def DeiT_base_patch16_384(**kwargs):
+def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
    model = VisionTransformer(
        img_size=384,
        patch_size=16,
@@ -179,10 +206,11 @@ def DeiT_base_patch16_384(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_384"], use_ssld=use_ssld)
    return model
-def DeiT_base_distilled_patch16_384(**kwargs):
+def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs):
    model = DistilledVisionTransformer(
        img_size=384,
        patch_size=16,
@@ -193,4 +221,5 @@ def DeiT_base_distilled_patch16_384(**kwargs):
        qkv_bias=True,
        epsilon=1e-6,
        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_384"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/dla.py
+++ b/ppcls/arch/backbone/model_zoo/dla.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal, Constant
+from ppcls.arch.backbone.base.theseus_layer import Identity
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {
+    "DLA34":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
+    "DLA46_c":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams",
+    "DLA46x_c":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams",
+    "DLA60":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams",
+    "DLA60x":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams",
+    "DLA60x_c":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams",
+    "DLA102":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams",
+    "DLA102x":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams",
+    "DLA102x2":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams",
+    "DLA169":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
+}
+__all__ = MODEL_URLS.keys()
+zeros_ = Constant(value=0.)
+ones_ = Constant(value=1.)
+class DlaBasic(nn.Layer):
+    def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
+        super(DlaBasic, self).__init__()
+        self.conv1 = nn.Conv2D(
+            inplanes, planes, kernel_size=3, stride=stride,
+            padding=dilation, bias_attr=False, dilation=dilation
+        )
+        self.bn1 = nn.BatchNorm2D(planes)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv2D(
+            planes, planes, kernel_size=3, stride=1,
+            padding=dilation, bias_attr=False, dilation=dilation
+        )
+        self.bn2 = nn.BatchNorm2D(planes)
+        self.stride = stride
+    def forward(self, x, residual=None):
+        if residual is None:
+            residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out += residual
+        out = self.relu(out)
+        return out
+class DlaBottleneck(nn.Layer):
+    expansion = 2
+    def __init__(self, inplanes, outplanes, stride=1,
+                 dilation=1, cardinality=1, base_width=64):
+        super(DlaBottleneck, self).__init__()
+        self.stride = stride
+        mid_planes = int(math.floor(
+            outplanes * (base_width / 64)) * cardinality)
+        mid_planes = mid_planes // self.expansion
+        self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
+        self.bn1 = nn.BatchNorm2D(mid_planes)
+        self.conv2 = nn.Conv2D(
+            mid_planes, mid_planes, kernel_size=3, 
+            stride=stride, padding=dilation, bias_attr=False, 
+            dilation=dilation, groups=cardinality
+        )
+        self.bn2 = nn.BatchNorm2D(mid_planes)
+        self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
+        self.bn3 = nn.BatchNorm2D(outplanes)
+        self.relu = nn.ReLU()
+    def forward(self, x, residual=None):
+        if residual is None:
+            residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out += residual
+        out = self.relu(out)
+        return out
+class DlaRoot(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size, residual):
+        super(DlaRoot, self).__init__()
+        self.conv = nn.Conv2D(
+            in_channels, out_channels, 1, stride=1, 
+            bias_attr=False, padding=(kernel_size - 1) // 2
+        )
+        self.bn = nn.BatchNorm2D(out_channels)
+        self.relu = nn.ReLU()
+        self.residual = residual
+    def forward(self, *x):
+        children = x
+        x = self.conv(paddle.concat(x, 1))
+        x = self.bn(x)
+        if self.residual:
+            x += children[0]
+        x = self.relu(x)
+        return x
+class DlaTree(nn.Layer):
+    def __init__(self, levels, block, in_channels, out_channels, 
+                 stride=1,dilation=1, cardinality=1, base_width=64,
+                 level_root=False, root_dim=0, root_kernel_size=1, 
+                 root_residual=False):
+        super(DlaTree, self).__init__()
+        if root_dim == 0:
+            root_dim = 2 * out_channels
+        if level_root:
+            root_dim += in_channels
+        self.downsample = nn.MaxPool2D(
+            stride, stride=stride) if stride > 1 else Identity()
+        self.project = Identity()
+        cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
+        if levels == 1:
+            self.tree1 = block(in_channels, out_channels, stride, **cargs)
+            self.tree2 = block(out_channels, out_channels, 1, **cargs)
+            if in_channels != out_channels:
+                self.project = nn.Sequential(
+                    nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
+                    nn.BatchNorm2D(out_channels))
+        else:
+            cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
+            self.tree1 = DlaTree(
+                levels - 1, block, in_channels, 
+                out_channels, stride, root_dim=0, **cargs
+            )
+            self.tree2 = DlaTree(
+                levels - 1, block, out_channels, 
+                out_channels, root_dim=root_dim + out_channels, **cargs
+            )
+        if levels == 1:
+            self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)
+        self.level_root = level_root
+        self.root_dim = root_dim
+        self.levels = levels
+    def forward(self, x, residual=None, children=None):
+        children = [] if children is None else children
+        bottom = self.downsample(x)
+        residual = self.project(bottom)
+        if self.level_root:
+            children.append(bottom)
+        x1 = self.tree1(x, residual)
+        if self.levels == 1:
+            x2 = self.tree2(x1)
+            x = self.root(x2, x1, *children)
+        else:
+            children.append(x1)
+            x = self.tree2(x1, children=children)
+        return x
+class DLA(nn.Layer):
+    def __init__(self, levels, channels, in_chans=3, cardinality=1,
+                 base_width=64, block=DlaBottleneck, residual_root=False,
+                 drop_rate=0.0, class_dim=1000, with_pool=True):
+        super(DLA, self).__init__()
+        self.channels = channels
+        self.class_dim = class_dim
+        self.with_pool = with_pool
+        self.cardinality = cardinality
+        self.base_width = base_width
+        self.drop_rate = drop_rate
+        self.base_layer = nn.Sequential(
+            nn.Conv2D(
+                in_chans, channels[0], kernel_size=7,
+                stride=1, padding=3, bias_attr=False
+            ),
+            nn.BatchNorm2D(channels[0]),
+            nn.ReLU())
+        self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
+        self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
+        cargs = dict(
+            cardinality=cardinality, 
+            base_width=base_width, 
+            root_residual=residual_root
+        )
+        self.level2 = DlaTree(
+            levels[2], block, channels[1], 
+            channels[2], 2, level_root=False, **cargs
+        )
+        self.level3 = DlaTree(
+            levels[3], block, channels[2], 
+            channels[3], 2, level_root=True, **cargs
+        )
+        self.level4 = DlaTree(
+            levels[4], block, channels[3], 
+            channels[4], 2, level_root=True, **cargs
+        )
+        self.level5 = DlaTree(
+            levels[5], block, channels[4], 
+            channels[5], 2, level_root=True, **cargs
+        )
+        self.feature_info = [
+            # rare to have a meaningful stride 1 level
+            dict(num_chs=channels[0], reduction=1, module='level0'),
+            dict(num_chs=channels[1], reduction=2, module='level1'),
+            dict(num_chs=channels[2], reduction=4, module='level2'),
+            dict(num_chs=channels[3], reduction=8, module='level3'),
+            dict(num_chs=channels[4], reduction=16, module='level4'),
+            dict(num_chs=channels[5], reduction=32, module='level5'),
+        ]
+        self.num_features = channels[-1]
+        if with_pool:
+            self.global_pool = nn.AdaptiveAvgPool2D(1)
+        if class_dim > 0:
+            self.fc = nn.Conv2D(self.num_features, class_dim, 1)
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+                normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
+                normal_(m.weight)
+            elif isinstance(m, nn.BatchNorm2D):
+                ones_(m.weight)
+                zeros_(m.bias)
+    def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
+        modules = []
+        for i in range(convs):
+            modules.extend([
+                nn.Conv2D(
+                    inplanes, planes, kernel_size=3, 
+                    stride=stride if i == 0 else 1,
+                    padding=dilation, bias_attr=False, dilation=dilation
+                ),
+                nn.BatchNorm2D(planes),
+                nn.ReLU()])
+            inplanes = planes
+        return nn.Sequential(*modules)
+    def forward_features(self, x):
+        x = self.base_layer(x)
+        x = self.level0(x)
+        x = self.level1(x)
+        x = self.level2(x)
+        x = self.level3(x)
+        x = self.level4(x)
+        x = self.level5(x)
+        return x
+    def forward(self, x):
+        x = self.forward_features(x)
+        if self.with_pool:
+            x = self.global_pool(x)
+        if self.drop_rate > 0.:
+            x = F.dropout(x, p=self.drop_rate, training=self.training)
+        if self.class_dim > 0:
+            x = self.fc(x)
+            x = x.flatten(1)
+        return x
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def DLA34(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 2, 2, 1),
+        channels=(16, 32, 64, 128, 256, 512),
+        block=DlaBasic,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
+    return model
+def DLA46_c(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 2, 2, 1),
+        channels=(16, 32, 64, 64, 128, 256),
+        block=DlaBottleneck,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
+    return model
+def DLA46x_c(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 2, 2, 1),
+        channels=(16, 32, 64, 64, 128, 256),
+        block=DlaBottleneck,
+        cardinality=32,
+        base_width=4,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
+    return model
+def DLA60(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 2, 3, 1),
+        channels=(16, 32, 128, 256, 512, 1024),
+        block=DlaBottleneck,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
+    return model
+def DLA60x(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 2, 3, 1),
+        channels=(16, 32, 128, 256, 512, 1024),
+        block=DlaBottleneck,
+        cardinality=32,
+        base_width=4,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
+    return model
+def DLA60x_c(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 2, 3, 1),
+        channels=(16, 32, 64, 64, 128, 256),
+        block=DlaBottleneck,
+        cardinality=32,
+        base_width=4,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
+    return model
+def DLA102(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 3, 4, 1),
+        channels=(16, 32, 128, 256, 512, 1024),
+        block=DlaBottleneck,
+        residual_root=True,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
+    return model
+def DLA102x(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 3, 4, 1),
+        channels=(16, 32, 128, 256, 512, 1024),
+        block=DlaBottleneck,
+        cardinality=32,
+        base_width=4,
+        residual_root=True,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
+    return model
+def DLA102x2(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 1, 3, 4, 1),
+        channels=(16, 32, 128, 256, 512, 1024),
+        block=DlaBottleneck,
+        cardinality=64,
+        base_width=4,
+        residual_root=True,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
+    return model
+def DLA169(pretrained=False, **kwargs):
+    model = DLA(
+        levels=(1, 1, 2, 3, 5, 1),
+        channels=(16, 32, 128, 256, 512, 1024),
+        block=DlaBottleneck,
+        residual_root=True,
+        **kwargs
+    )
+    _load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
+    return model
--- a/ppcls/arch/backbone/model_zoo/dpn.py
+++ b/ppcls/arch/backbone/model_zoo/dpn.py
@@ -27,14 +27,16 @@ from paddle.nn.initializer import Uniform
 import math
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    "DPN",
-    "DPN68",
+MODEL_URLS = {"DPN68": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
-    "DPN92",
+              "DPN92": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
-    "DPN98",
+              "DPN98": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
-    "DPN107",
+              "DPN107": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
-    "DPN131",
+              "DPN131": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
-]
+             }
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
@@ -398,28 +400,45 @@ class DPN(nn.Layer):
        net_arg['init_padding'] = init_padding
        return net_arg
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-def DPN68(**args):
+    if pretrained is False:
-    model = DPN(layers=68, **args)
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )  
+def DPN68(pretrained=False, use_ssld=False, **kwargs):
+    model = DPN(layers=68, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DPN68"])
    return model
-def DPN92(**args):
+def DPN92(pretrained=False, use_ssld=False, **kwargs):
-    model = DPN(layers=92, **args)
+    model = DPN(layers=92, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DPN92"])
    return model
-def DPN98(**args):
+def DPN98(pretrained=False, use_ssld=False, **kwargs):
-    model = DPN(layers=98, **args)
+    model = DPN(layers=98, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DPN98"])
    return model
-def DPN107(**args):
+def DPN107(pretrained=False, use_ssld=False, **kwargs):
-    model = DPN(layers=107, **args)
+    model = DPN(layers=107, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["DPN107"])
    return model
-def DPN131(**args):
+def DPN131(pretrained=False, use_ssld=False, **kwargs):
-    model = DPN(layers=131, **args)
+    model = DPN(layers=131, **kwargs)
-    return model
+    _load_pretrained(pretrained, model, MODEL_URLS["DPN131"])
+    return model
\ No newline at end of file
--- a/ppcls/arch/backbone/model_zoo/efficientnet.py
+++ b/ppcls/arch/backbone/model_zoo/efficientnet.py
@@ -9,11 +9,20 @@ import collections
 import re
 import copy
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    'EfficientNet', 'EfficientNetB0_small', 'EfficientNetB0', 'EfficientNetB1',
-    'EfficientNetB2', 'EfficientNetB3', 'EfficientNetB4', 'EfficientNetB5',
+MODEL_URLS = {"EfficientNetB0_small":  "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
-    'EfficientNetB6', 'EfficientNetB7'
+              "EfficientNetB0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
-]
+              "EfficientNetB1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
+              "EfficientNetB2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
+              "EfficientNetB3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
+              "EfficientNetB4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
+              "EfficientNetB5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
+              "EfficientNetB6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
+              "EfficientNetB7": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
+             }
+__all__ = list(MODEL_URLS.keys())
 GlobalParams = collections.namedtuple('GlobalParams', [
    'batch_norm_momentum',
@@ -783,119 +792,159 @@ class EfficientNet(nn.Layer):
        x = self._fc(x)
        return x
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )  
 def EfficientNetB0_small(padding_type='DYNAMIC',
                         override_params=None,
                         use_se=False,
-                         **args):
+                         pretrained=False, 
+                         use_ssld=False, 
+                         **kwargs):
    model = EfficientNet(
        name='b0',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"])
    return model
 def EfficientNetB0(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b0',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"])
    return model
 def EfficientNetB1(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b1',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"])
    return model
 def EfficientNetB2(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b2',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"])
    return model
 def EfficientNetB3(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b3',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"])
    return model
 def EfficientNetB4(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b4',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"])
    return model
 def EfficientNetB5(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b5',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"])
    return model
 def EfficientNetB6(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b6',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"])
    return model
 def EfficientNetB7(padding_type='SAME',
                   override_params=None,
                   use_se=True,
-                   **args):
+                   pretrained=False, 
+                   use_ssld=False, 
+                   **kwargs):
    model = EfficientNet(
        name='b7',
        padding_type=padding_type,
        override_params=override_params,
        use_se=use_se,
-        **args)
+        **kwargs)
-    return model
+    _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"])
+    return model
\ No newline at end of file
--- a/ppcls/arch/backbone/model_zoo/ghostnet.py
+++ b/ppcls/arch/backbone/model_zoo/ghostnet.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,7 +21,14 @@ from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import Uniform, KaimingNormal
-__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"GhostNet_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
+              "GhostNet_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
+              "GhostNet_x1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
+             }
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
@@ -315,17 +322,33 @@ class GhostNet(nn.Layer):
            new_v += divisor
        return new_v
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
-def GhostNet_x0_5(**args):
+def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
-    model = GhostNet(scale=0.5)
+    model = GhostNet(scale=0.5, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
    return model
-def GhostNet_x1_0(**args):
+def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
-    model = GhostNet(scale=1.0)
+    model = GhostNet(scale=1.0, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
    return model
-def GhostNet_x1_3(**args):
+def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
-    model = GhostNet(scale=1.3)
+    model = GhostNet(scale=1.3, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/googlenet.py
+++ b/ppcls/arch/backbone/model_zoo/googlenet.py
@@ -8,7 +8,12 @@ from paddle.nn.initializer import Uniform
 import math
-__all__ = ['GoogLeNet']
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"GoogLeNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
+             }
+__all__ = list(MODEL_URLS.keys())
 def xavier(channels, filter_size, name):
@@ -200,8 +205,22 @@ class GoogLeNetDY(nn.Layer):
        x = self._drop_o2(x)
        out2 = self._out2(x)
        return [out, out1, out2]
-def GoogLeNet(**args):
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-    model = GoogLeNetDY(**args)
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
+    model = GoogLeNetDY(**kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/gvt.py
+++ b/ppcls/arch/backbone/model_zoo/gvt.py
--- a/ppcls/arch/backbone/model_zoo/hardnet.py
+++ b/ppcls/arch/backbone/model_zoo/hardnet.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {
+    'HarDNet39_ds':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
+    'HarDNet68_ds':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams',
+    'HarDNet68':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams',
+    'HarDNet85':
+    'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
+}
+__all__ = MODEL_URLS.keys()
+def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
+    layer = nn.Sequential(
+        ('conv', nn.Conv2D(
+            in_channels, out_channels, kernel_size=kernel_size,
+            stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr
+        )),
+        ('norm', nn.BatchNorm2D(out_channels)),
+        ('relu', nn.ReLU6())
+    )
+    return layer
+def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
+    layer = nn.Sequential(
+        ('dwconv', nn.Conv2D(
+            in_channels, out_channels, kernel_size=kernel_size,
+            stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr
+        )),
+        ('norm', nn.BatchNorm2D(out_channels))
+    )
+    return layer
+def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
+    layer = nn.Sequential(
+        ('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)),
+        ('layer2', DWConvLayer(out_channels, out_channels, stride=stride))
+    )
+    return layer
+class HarDBlock(nn.Layer):
+    def __init__(self, in_channels, growth_rate, grmul, n_layers, 
+                 keepBase=False, residual_out=False, dwconv=False):
+        super().__init__()
+        self.keepBase = keepBase
+        self.links = []
+        layers_ = []
+        self.out_channels = 0  # if upsample else in_channels
+        for i in range(n_layers):
+            outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
+            self.links.append(link)
+            if dwconv:
+                layers_.append(CombConvLayer(inch, outch))
+            else:
+                layers_.append(ConvLayer(inch, outch))
+            if (i % 2 == 0) or (i == n_layers - 1):
+                self.out_channels += outch
+        # print("Blk out =",self.out_channels)
+        self.layers = nn.LayerList(layers_)
+    def get_link(self, layer, base_ch, growth_rate, grmul):
+        if layer == 0:
+            return base_ch, 0, []
+        out_channels = growth_rate
+        link = []
+        for i in range(10):
+            dv = 2 ** i
+            if layer % dv == 0:
+                k = layer - dv
+                link.append(k)
+                if i > 0:
+                    out_channels *= grmul
+        out_channels = int(int(out_channels + 1) / 2) * 2
+        in_channels = 0
+        for i in link:
+            ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
+            in_channels += ch
+        return out_channels, in_channels, link
+    def forward(self, x):
+        layers_ = [x]
+        for layer in range(len(self.layers)):
+            link = self.links[layer]
+            tin = []
+            for i in link:
+                tin.append(layers_[i])
+            if len(tin) > 1:
+                x = paddle.concat(tin, 1)
+            else:
+                x = tin[0]
+            out = self.layers[layer](x)
+            layers_.append(out)
+        t = len(layers_)
+        out_ = []
+        for i in range(t):
+            if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1):
+                out_.append(layers_[i])
+        out = paddle.concat(out_, 1)
+        return out
+class HarDNet(nn.Layer):
+    def __init__(self, depth_wise=False, arch=85,
+                 class_dim=1000, with_pool=True):
+        super().__init__()
+        first_ch = [32, 64]
+        second_kernel = 3
+        max_pool = True
+        grmul = 1.7
+        drop_rate = 0.1
+        # HarDNet68
+        ch_list = [128, 256, 320, 640, 1024]
+        gr = [14, 16, 20, 40, 160]
+        n_layers = [8, 16, 16, 16,  4]
+        downSamp = [1,  0,  1,  1,  0]
+        if arch == 85:
+            # HarDNet85
+            first_ch = [48, 96]
+            ch_list = [192, 256, 320, 480, 720, 1280]
+            gr = [24,  24,  28,  36,  48, 256]
+            n_layers = [8,  16,  16,  16,  16,   4]
+            downSamp = [1,   0,   1,   0,   1,   0]
+            drop_rate = 0.2
+        elif arch == 39:
+            # HarDNet39
+            first_ch = [24, 48]
+            ch_list = [96, 320, 640, 1024]
+            grmul = 1.6
+            gr = [16,  20, 64, 160]
+            n_layers = [4,  16,  8,   4]
+            downSamp = [1,   1,  1,   0]
+        if depth_wise:
+            second_kernel = 1
+            max_pool = False
+            drop_rate = 0.05
+        blks = len(n_layers)
+        self.base = nn.LayerList([])
+        # First Layer: Standard Conv3x3, Stride=2
+        self.base.append(
+            ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3,
+                      stride=2, bias_attr=False))
+        # Second Layer
+        self.base.append(
+            ConvLayer(first_ch[0], first_ch[1],  kernel_size=second_kernel))
+        # Maxpooling or DWConv3x3 downsampling
+        if max_pool:
+            self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
+        else:
+            self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
+        # Build all HarDNet blocks
+        ch = first_ch[1]
+        for i in range(blks):
+            blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
+            ch = blk.out_channels
+            self.base.append(blk)
+            if i == blks-1 and arch == 85:
+                self.base.append(nn.Dropout(0.1))
+            self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
+            ch = ch_list[i]
+            if downSamp[i] == 1:
+                if max_pool:
+                    self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
+                else:
+                    self.base.append(DWConvLayer(ch, ch, stride=2))
+        ch = ch_list[blks-1]
+        layers = []
+        if with_pool:
+            layers.append(nn.AdaptiveAvgPool2D((1, 1)))
+        if class_dim > 0:
+            layers.append(nn.Flatten())
+            layers.append(nn.Dropout(drop_rate))
+            layers.append(nn.Linear(ch, class_dim))
+        self.base.append(nn.Sequential(*layers))
+    def forward(self, x):
+        for layer in self.base:
+            x = layer(x)
+        return x
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def HarDNet39_ds(pretrained=False, **kwargs):
+    model = HarDNet(arch=39, depth_wise=True, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
+    return model
+def HarDNet68_ds(pretrained=False, **kwargs):
+    model = HarDNet(arch=68, depth_wise=True, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
+    return model
+def HarDNet68(pretrained=False, **kwargs):
+    model = HarDNet(arch=68, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
+    return model
+def HarDNet85(pretrained=False, **kwargs):
+    model = HarDNet(arch=85, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
+    return model
--- a/ppcls/arch/backbone/model_zoo/hrnet.py
+++ b/ppcls/arch/backbone/model_zoo/hrnet.py
@@ -27,24 +27,18 @@ from paddle.nn.initializer import Uniform
 import math
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    "HRNet_W18_C",
-    "HRNet_W30_C",
+MODEL_URLS = {"HRNet_W18_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W18_C_pretrained.pdparams",
-    "HRNet_W32_C",
+              "HRNet_W30_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W30_C_pretrained.pdparams",
-    "HRNet_W40_C",
+              "HRNet_W32_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W32_C_pretrained.pdparams",
-    "HRNet_W44_C",
+              "HRNet_W40_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W40_C_pretrained.pdparams",
-    "HRNet_W48_C",
+              "HRNet_W44_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W44_C_pretrained.pdparams",
-    "HRNet_W60_C",
+              "HRNet_W48_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W48_C_pretrained.pdparams",
-    "HRNet_W64_C",
+              "HRNet_W64_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W64_C_pretrained.pdparams",
-    "SE_HRNet_W18_C",
+             }
-    "SE_HRNet_W30_C",
-    "SE_HRNet_W32_C",
+__all__ = list(MODEL_URLS.keys())
-    "SE_HRNet_W40_C",
-    "SE_HRNet_W44_C",
-    "SE_HRNet_W48_C",
-    "SE_HRNet_W60_C",
-    "SE_HRNet_W64_C",
-]
 class ConvBNLayer(nn.Layer):
@@ -661,82 +655,62 @@ class HRNet(nn.Layer):
        y = self.out(y)
        return y
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-def HRNet_W18_C(**args):
+    if pretrained is False:
-    model = HRNet(width=18, **args)
+        pass
-    return model
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
-def HRNet_W30_C(**args):
+        load_dygraph_pretrain(model, pretrained)
-    model = HRNet(width=30, **args)
+    else:
-    return model
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
-def HRNet_W32_C(**args):
-    model = HRNet(width=32, **args)
-    return model
+def HRNet_W18_C(pretrained=False, use_ssld=False, **kwarg):
+    model = HRNet(width=18, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W18_C"], use_ssld=use_ssld)
-def HRNet_W40_C(**args):
-    model = HRNet(width=40, **args)
-    return model
-def HRNet_W44_C(**args):
-    model = HRNet(width=44, **args)
-    return model
-def HRNet_W48_C(**args):
-    model = HRNet(width=48, **args)
-    return model
-def HRNet_W60_C(**args):
-    model = HRNet(width=60, **args)
-    return model
-def HRNet_W64_C(**args):
-    model = HRNet(width=64, **args)
-    return model
-def SE_HRNet_W18_C(**args):
-    model = HRNet(width=18, has_se=True, **args)
    return model
-def SE_HRNet_W30_C(**args):
+def HRNet_W30_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=30, has_se=True, **args)
+    model = HRNet(width=30, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W30_C"], use_ssld=use_ssld)
    return model
-def SE_HRNet_W32_C(**args):
+def HRNet_W32_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=32, has_se=True, **args)
+    model = HRNet(width=32, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W32_C"], use_ssld=use_ssld)
    return model
-def SE_HRNet_W40_C(**args):
+def HRNet_W40_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=40, has_se=True, **args)
+    model = HRNet(width=40, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W40_C"], use_ssld=use_ssld)
    return model
-def SE_HRNet_W44_C(**args):
+def HRNet_W44_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=44, has_se=True, **args)
+    model = HRNet(width=44, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W44_C"], use_ssld=use_ssld)
    return model
-def SE_HRNet_W48_C(**args):
+def HRNet_W48_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=48, has_se=True, **args)
+    model = HRNet(width=48, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W48_C"], use_ssld=use_ssld)
    return model
-def SE_HRNet_W60_C(**args):
+def HRNet_W64_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=60, has_se=True, **args)
+    model = HRNet(width=64, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["HRNet_W64_C"], use_ssld=use_ssld)
    return model
-def SE_HRNet_W64_C(**args):
+def SE_HRNet_W64_C(pretrained=False, use_ssld=False, **kwarg):
-    model = HRNet(width=64, has_se=True, **args)
+    model = HRNet(width=64, **kwarg)
+    _load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W64_C"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/inception_v3.py
+++ b/ppcls/arch/backbone/model_zoo/inception_v3.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,7 +26,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import Uniform
 import math
-__all__ = ["InceptionV3"]
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"InceptionV3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV3_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
@@ -425,9 +429,9 @@ class InceptionE(nn.Layer):
        return outputs   
-class InceptionV3(nn.Layer):
+class Inception_V3(nn.Layer):
    def __init__(self, class_dim=1000):
-        super(InceptionV3, self).__init__()
+        super(Inception_V3, self).__init__()
        self.inception_a_list = [[192, 256, 288], [32, 64, 64]]
        self.inception_c_list = [[768, 768, 768, 768], [128, 160, 160, 192]]
@@ -472,10 +476,28 @@ class InceptionV3(nn.Layer):
    def forward(self, x):
        y = self.inception_stem(x)
        for inception_block in self.inception_block_list:
-           y = inception_block(y)
+            y = inception_block(y)
        y = self.gap(y)
        y = paddle.reshape(y, shape=[-1, 2048])
        y = self.drop(y)
        y = self.out(y)
        return y
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def InceptionV3(pretrained=False, use_ssld=False, **kwargs):
+    model = Inception_V3(**kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV3"], use_ssld=use_ssld)
+    return model
--- a/ppcls/arch/backbone/model_zoo/inception_v4.py
+++ b/ppcls/arch/backbone/model_zoo/inception_v4.py
@@ -21,7 +21,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import Uniform
 import math
-__all__ = ["InceptionV4"]
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"InceptionV4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
@@ -450,6 +454,19 @@ class InceptionV4DY(nn.Layer):
        return x
-def InceptionV4(**args):
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-    model = InceptionV4DY(**args)
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
+    model = InceptionV4DY(**kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/levit.py
+++ b/ppcls/arch/backbone/model_zoo/levit.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+import math
+import warnings
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import TruncatedNormal, Constant
+from paddle.regularizer import L2Decay
+from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {
+              "LeViT_128S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
+              "LeViT_128": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
+              "LeViT_192": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
+              "LeViT_256": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
+              "LeViT_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
+             }
+__all__ = list(MODEL_URLS.keys())
+def cal_attention_biases(attention_biases, attention_bias_idxs):
+    gather_list = []
+    attention_bias_t = paddle.transpose(attention_biases, (1, 0))
+    for idx in attention_bias_idxs:
+        gather = paddle.gather(attention_bias_t, idx)
+        gather_list.append(gather)
+    shape0, shape1 = attention_bias_idxs.shape
+    return paddle.transpose(paddle.concat(gather_list), (1, 0)).reshape(
+        (0, shape0, shape1))
+class Conv2d_BN(nn.Sequential):
+    def __init__(self,
+                 a,
+                 b,
+                 ks=1,
+                 stride=1,
+                 pad=0,
+                 dilation=1,
+                 groups=1,
+                 bn_weight_init=1,
+                 resolution=-10000):
+        super().__init__()
+        self.add_sublayer(
+            'c',
+            nn.Conv2D(
+                a, b, ks, stride, pad, dilation, groups, bias_attr=False))
+        bn = nn.BatchNorm2D(b)
+        ones_(bn.weight)
+        zeros_(bn.bias)
+        self.add_sublayer('bn', bn)
+class Linear_BN(nn.Sequential):
+    def __init__(self, a, b, bn_weight_init=1):
+        super().__init__()
+        self.add_sublayer('c', nn.Linear(a, b, bias_attr=False))
+        bn = nn.BatchNorm1D(b)
+        ones_(bn.weight)
+        zeros_(bn.bias)
+        self.add_sublayer('bn', bn)
+    def forward(self, x):
+        l, bn = self._sub_layers.values()
+        x = l(x)
+        return paddle.reshape(bn(x.flatten(0, 1)), x.shape)
+class BN_Linear(nn.Sequential):
+    def __init__(self, a, b, bias=True, std=0.02):
+        super().__init__()
+        self.add_sublayer('bn', nn.BatchNorm1D(a))
+        l = nn.Linear(a, b, bias_attr=bias)
+        trunc_normal_(l.weight)
+        if bias:
+            zeros_(l.bias)
+        self.add_sublayer('l', l)
+def b16(n, activation, resolution=224):
+    return nn.Sequential(
+        Conv2d_BN(
+            3, n // 8, 3, 2, 1, resolution=resolution),
+        activation(),
+        Conv2d_BN(
+            n // 8, n // 4, 3, 2, 1, resolution=resolution // 2),
+        activation(),
+        Conv2d_BN(
+            n // 4, n // 2, 3, 2, 1, resolution=resolution // 4),
+        activation(),
+        Conv2d_BN(
+            n // 2, n, 3, 2, 1, resolution=resolution // 8))
+class Residual(nn.Layer):
+    def __init__(self, m, drop):
+        super().__init__()
+        self.m = m
+        self.drop = drop
+    def forward(self, x):
+        if self.training and self.drop > 0:
+            return x + self.m(x) * paddle.rand(
+                x.size(0), 1, 1,
+                device=x.device).ge_(self.drop).div(1 - self.drop).detach()
+        else:
+            return x + self.m(x)
+class Attention(nn.Layer):
+    def __init__(self,
+                 dim,
+                 key_dim,
+                 num_heads=8,
+                 attn_ratio=4,
+                 activation=None,
+                 resolution=14):
+        super().__init__()
+        self.num_heads = num_heads
+        self.scale = key_dim**-0.5
+        self.key_dim = key_dim
+        self.nh_kd = nh_kd = key_dim * num_heads
+        self.d = int(attn_ratio * key_dim)
+        self.dh = int(attn_ratio * key_dim) * num_heads
+        self.attn_ratio = attn_ratio
+        self.h = self.dh + nh_kd * 2
+        self.qkv = Linear_BN(dim, self.h)
+        self.proj = nn.Sequential(
+            activation(), Linear_BN(
+                self.dh, dim, bn_weight_init=0))
+        points = list(itertools.product(range(resolution), range(resolution)))
+        N = len(points)
+        attention_offsets = {}
+        idxs = []
+        for p1 in points:
+            for p2 in points:
+                offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1]))
+                if offset not in attention_offsets:
+                    attention_offsets[offset] = len(attention_offsets)
+                idxs.append(attention_offsets[offset])
+        self.attention_biases = self.create_parameter(
+            shape=(num_heads, len(attention_offsets)),
+            default_initializer=zeros_,
+            attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
+        tensor_idxs = paddle.to_tensor(idxs, dtype='int64')
+        self.register_buffer('attention_bias_idxs',
+                             paddle.reshape(tensor_idxs, [N, N]))
+    @paddle.no_grad()
+    def train(self, mode=True):
+        if mode:
+            super().train()
+        else:
+            super().eval()
+        if mode and hasattr(self, 'ab'):
+            del self.ab
+        else:
+            self.ab = cal_attention_biases(self.attention_biases,
+                                           self.attention_bias_idxs)
+    def forward(self, x):
+        self.training = True
+        B, N, C = x.shape
+        qkv = self.qkv(x)
+        qkv = paddle.reshape(qkv,
+                             [B, N, self.num_heads, self.h // self.num_heads])
+        q, k, v = paddle.split(
+            qkv, [self.key_dim, self.key_dim, self.d], axis=3)
+        q = paddle.transpose(q, perm=[0, 2, 1, 3])
+        k = paddle.transpose(k, perm=[0, 2, 1, 3])
+        v = paddle.transpose(v, perm=[0, 2, 1, 3])
+        k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2])
+        if self.training:
+            attention_biases = cal_attention_biases(self.attention_biases,
+                                                    self.attention_bias_idxs)
+        else:
+            attention_biases = self.ab
+        attn = ((q @k_transpose) * self.scale + attention_biases)
+        attn = F.softmax(attn)
+        x = paddle.transpose(attn @v, perm=[0, 2, 1, 3])
+        x = paddle.reshape(x, [B, N, self.dh])
+        x = self.proj(x)
+        return x
+class Subsample(nn.Layer):
+    def __init__(self, stride, resolution):
+        super().__init__()
+        self.stride = stride
+        self.resolution = resolution
+    def forward(self, x):
+        B, N, C = x.shape
+        x = paddle.reshape(x, [B, self.resolution, self.resolution,
+                               C])[:, ::self.stride, ::self.stride]
+        x = paddle.reshape(x, [B, -1, C])
+        return x
+class AttentionSubsample(nn.Layer):
+    def __init__(self,
+                 in_dim,
+                 out_dim,
+                 key_dim,
+                 num_heads=8,
+                 attn_ratio=2,
+                 activation=None,
+                 stride=2,
+                 resolution=14,
+                 resolution_=7):
+        super().__init__()
+        self.num_heads = num_heads
+        self.scale = key_dim**-0.5
+        self.key_dim = key_dim
+        self.nh_kd = nh_kd = key_dim * num_heads
+        self.d = int(attn_ratio * key_dim)
+        self.dh = int(attn_ratio * key_dim) * self.num_heads
+        self.attn_ratio = attn_ratio
+        self.resolution_ = resolution_
+        self.resolution_2 = resolution_**2
+        self.training = True
+        h = self.dh + nh_kd
+        self.kv = Linear_BN(in_dim, h)
+        self.q = nn.Sequential(
+            Subsample(stride, resolution), Linear_BN(in_dim, nh_kd))
+        self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim))
+        self.stride = stride
+        self.resolution = resolution
+        points = list(itertools.product(range(resolution), range(resolution)))
+        points_ = list(
+            itertools.product(range(resolution_), range(resolution_)))
+        N = len(points)
+        N_ = len(points_)
+        attention_offsets = {}
+        idxs = []
+        i = 0
+        j = 0
+        for p1 in points_:
+            i += 1
+            for p2 in points:
+                j += 1
+                size = 1
+                offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2),
+                          abs(p1[1] * stride - p2[1] + (size - 1) / 2))
+                if offset not in attention_offsets:
+                    attention_offsets[offset] = len(attention_offsets)
+                idxs.append(attention_offsets[offset])
+        self.attention_biases = self.create_parameter(
+            shape=(num_heads, len(attention_offsets)),
+            default_initializer=zeros_,
+            attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
+        tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64')
+        self.register_buffer('attention_bias_idxs',
+                             paddle.reshape(tensor_idxs_, [N_, N]))
+    @paddle.no_grad()
+    def train(self, mode=True):
+        if mode:
+            super().train()
+        else:
+            super().eval()
+        if mode and hasattr(self, 'ab'):
+            del self.ab
+        else:
+            self.ab = cal_attention_biases(self.attention_biases,
+                                           self.attention_bias_idxs)
+    def forward(self, x):
+        self.training = True
+        B, N, C = x.shape
+        kv = self.kv(x)
+        kv = paddle.reshape(kv, [B, N, self.num_heads, -1])
+        k, v = paddle.split(kv, [self.key_dim, self.d], axis=3)
+        k = paddle.transpose(k, perm=[0, 2, 1, 3])  # BHNC
+        v = paddle.transpose(v, perm=[0, 2, 1, 3])
+        q = paddle.reshape(
+            self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim])
+        q = paddle.transpose(q, perm=[0, 2, 1, 3])
+        if self.training:
+            attention_biases = cal_attention_biases(self.attention_biases,
+                                                    self.attention_bias_idxs)
+        else:
+            attention_biases = self.ab
+        attn = (q @paddle.transpose(
+            k, perm=[0, 1, 3, 2])) * self.scale + attention_biases
+        attn = F.softmax(attn)
+        x = paddle.reshape(
+            paddle.transpose(
+                (attn @v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
+        x = self.proj(x)
+        return x
+class LeViT(nn.Layer):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self,
+                 img_size=224,
+                 patch_size=16,
+                 in_chans=3,
+                 class_dim=1000,
+                 embed_dim=[192],
+                 key_dim=[64],
+                 depth=[12],
+                 num_heads=[3],
+                 attn_ratio=[2],
+                 mlp_ratio=[2],
+                 hybrid_backbone=None,
+                 down_ops=[],
+                 attention_activation=nn.Hardswish,
+                 mlp_activation=nn.Hardswish,
+                 distillation=True,
+                 drop_path=0):
+        super().__init__()
+        self.class_dim = class_dim
+        self.num_features = embed_dim[-1]
+        self.embed_dim = embed_dim
+        self.distillation = distillation
+        self.patch_embed = hybrid_backbone
+        self.blocks = []
+        down_ops.append([''])
+        resolution = img_size // patch_size
+        for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate(
+                zip(embed_dim, key_dim, depth, num_heads, attn_ratio,
+                    mlp_ratio, down_ops)):
+            for _ in range(dpth):
+                self.blocks.append(
+                    Residual(
+                        Attention(
+                            ed,
+                            kd,
+                            nh,
+                            attn_ratio=ar,
+                            activation=attention_activation,
+                            resolution=resolution, ),
+                        drop_path))
+                if mr > 0:
+                    h = int(ed * mr)
+                    self.blocks.append(
+                        Residual(
+                            nn.Sequential(
+                                Linear_BN(ed, h),
+                                mlp_activation(),
+                                Linear_BN(
+                                    h, ed, bn_weight_init=0), ),
+                            drop_path))
+            if do[0] == 'Subsample':
+                #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
+                resolution_ = (resolution - 1) // do[5] + 1
+                self.blocks.append(
+                    AttentionSubsample(
+                        *embed_dim[i:i + 2],
+                        key_dim=do[1],
+                        num_heads=do[2],
+                        attn_ratio=do[3],
+                        activation=attention_activation,
+                        stride=do[5],
+                        resolution=resolution,
+                        resolution_=resolution_))
+                resolution = resolution_
+                if do[4] > 0:  # mlp_ratio
+                    h = int(embed_dim[i + 1] * do[4])
+                    self.blocks.append(
+                        Residual(
+                            nn.Sequential(
+                                Linear_BN(embed_dim[i + 1], h),
+                                mlp_activation(),
+                                Linear_BN(
+                                    h, embed_dim[i + 1], bn_weight_init=0), ),
+                            drop_path))
+        self.blocks = nn.Sequential(*self.blocks)
+        # Classifier head
+        self.head = BN_Linear(embed_dim[-1],
+                              class_dim) if class_dim > 0 else Identity()
+        if distillation:
+            self.head_dist = BN_Linear(
+                embed_dim[-1], class_dim) if class_dim > 0 else Identity()
+    def forward(self, x):
+        x = self.patch_embed(x)
+        x = x.flatten(2)
+        x = paddle.transpose(x, perm=[0, 2, 1])
+        x = self.blocks(x)
+        x = x.mean(1)
+        if self.distillation:
+            x = self.head(x), self.head_dist(x)
+            if not self.training:
+                x = (x[0] + x[1]) / 2
+        else:
+            x = self.head(x)
+        return x
+def model_factory(C, D, X, N, drop_path, class_dim, distillation):
+    embed_dim = [int(x) for x in C.split('_')]
+    num_heads = [int(x) for x in N.split('_')]
+    depth = [int(x) for x in X.split('_')]
+    act = nn.Hardswish
+    model = LeViT(
+        patch_size=16,
+        embed_dim=embed_dim,
+        num_heads=num_heads,
+        key_dim=[D] * 3,
+        depth=depth,
+        attn_ratio=[2, 2, 2],
+        mlp_ratio=[2, 2, 2],
+        down_ops=[
+            #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
+            ['Subsample', D, embed_dim[0] // D, 4, 2, 2],
+            ['Subsample', D, embed_dim[1] // D, 4, 2, 2],
+        ],
+        attention_activation=act,
+        mlp_activation=act,
+        hybrid_backbone=b16(embed_dim[0], activation=act),
+        class_dim=class_dim,
+        drop_path=drop_path,
+        distillation=distillation)
+    return model
+specification = {
+    'LeViT_128S': {
+        'C': '128_256_384',
+        'D': 16,
+        'N': '4_6_8',
+        'X': '2_3_4',
+        'drop_path': 0
+    },
+    'LeViT_128': {
+        'C': '128_256_384',
+        'D': 16,
+        'N': '4_8_12',
+        'X': '4_4_4',
+        'drop_path': 0
+    },
+    'LeViT_192': {
+        'C': '192_288_384',
+        'D': 32,
+        'N': '3_5_6',
+        'X': '4_4_4',
+        'drop_path': 0
+    },
+    'LeViT_256': {
+        'C': '256_384_512',
+        'D': 32,
+        'N': '4_6_8',
+        'X': '4_4_4',
+        'drop_path': 0
+    },
+    'LeViT_384': {
+        'C': '384_512_768',
+        'D': 32,
+        'N': '6_9_12',
+        'X': '4_4_4',
+        'drop_path': 0.1
+    },
+}
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def LeViT_128S(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+    model = model_factory(
+        **specification['LeViT_128S'],
+        class_dim=class_dim,
+        distillation=distillation)
+    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
+    return model
+def LeViT_128(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+    model = model_factory(
+        **specification['LeViT_128'],
+        class_dim=class_dim,
+        distillation=distillation)
+    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
+    return model
+def LeViT_192(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+    model = model_factory(
+        **specification['LeViT_192'],
+        class_dim=class_dim,
+        distillation=distillation)
+    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
+    return model
+def LeViT_256(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+    model = model_factory(
+        **specification['LeViT_256'],
+        class_dim=class_dim,
+        distillation=distillation)
+    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
+    return model
+def LeViT_384(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
+    model = model_factory(
+        **specification['LeViT_384'],
+        class_dim=class_dim,
+        distillation=distillation)
+    _load_pretrained(pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
+    return model
--- a/ppcls/arch/backbone/model_zoo/mixnet.py
+++ b/ppcls/arch/backbone/model_zoo/mixnet.py
@@ -17,14 +17,20 @@
    https://arxiv.org/abs/1907.09595.
 """
-__all__ = ['MixNet_S', 'MixNet_M', 'MixNet_L']
 import os
 from inspect import isfunction
 from functools import reduce
 import paddle
 import paddle.nn as nn
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+MODEL_URLS = {"MixNet_S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams", 
+              "MixNet_M": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams", 
+              "MixNet_L": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class Identity(nn.Layer):
    """
@@ -755,13 +761,33 @@ def get_mixnet(version, width_scale, model_name=None, **kwargs):
    return net
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
+    model = InceptionV4DY(**kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
+    return model
 def MixNet_S(**kwargs):
    """
    MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
    https://arxiv.org/abs/1907.09595.
    """
-    return get_mixnet(
+    model = get_mixnet(
        version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
+    return model
 def MixNet_M(**kwargs):
@@ -769,14 +795,19 @@ def MixNet_M(**kwargs):
    MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
    https://arxiv.org/abs/1907.09595.
    """
-    return get_mixnet(
+    model = get_mixnet(
        version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
+    return model
 def MixNet_L(**kwargs):
    """
-    MixNet-L model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
+    MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
    https://arxiv.org/abs/1907.09595.
    """
-    return get_mixnet(
+    model = get_mixnet(
        version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
+    return model
--- a/ppcls/arch/backbone/model_zoo/mobilenet_v1.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v1.py
@@ -26,9 +26,14 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import KaimingNormal
 import math
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    "MobileNetV1_x0_25", "MobileNetV1_x0_5", "MobileNetV1_x0_75", "MobileNetV1"
-]
+MODEL_URLS = {"MobileNetV1_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_25_pretrained.pdparams", 
+              "MobileNetV1_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_5_pretrained.pdparams", 
+              "MobileNetV1_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_75_pretrained.pdparams",
+              "MobileNetV1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
@@ -245,22 +250,39 @@ class MobileNet(nn.Layer):
        y = self.out(y)
        return y
-def MobileNetV1_x0_25(**args):
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-    model = MobileNet(scale=0.25, **args)
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def MobileNetV1_x0_25(pretrained=False, use_ssld=False, **kwargs):
+    model = MobileNet(scale=0.25, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_25"], use_ssld=use_ssld)
    return model
-def MobileNetV1_x0_5(**args):
+def MobileNetV1_x0_5(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=0.5, **args)
+    model = MobileNet(scale=0.5, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_5"], use_ssld=use_ssld)
    return model
-def MobileNetV1_x0_75(**args):
+def MobileNetV1_x0_75(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=0.75, **args)
+    model = MobileNet(scale=0.75, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_75"], use_ssld=use_ssld)
    return model
-def MobileNetV1(**args):
+def MobileNetV1(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=1.0, **args)
+    model = MobileNet(scale=1.0, **kwargs)
-    return model
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1"], use_ssld=use_ssld)
+    return model
\ No newline at end of file
--- a/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v2.py
@@ -26,10 +26,16 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 import math
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    "MobileNetV2_x0_25", "MobileNetV2_x0_5", "MobileNetV2_x0_75",
-    "MobileNetV2", "MobileNetV2_x1_5", "MobileNetV2_x2_0"
+MODEL_URLS = {"MobileNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams", 
-]
+              "MobileNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams", 
+              "MobileNetV2_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
+              "MobileNetV2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
+              "MobileNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
+              "MobileNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 class ConvBNLayer(nn.Layer):
@@ -149,7 +155,7 @@ class InvresiBlocks(nn.Layer):
 class MobileNet(nn.Layer):
-    def __init__(self, class_dim=1000, scale=1.0, prefix_name="", **args):
+    def __init__(self, class_dim=1000, scale=1.0, prefix_name=""):
        super(MobileNet, self).__init__()
        self.scale = scale
        self.class_dim = class_dim
@@ -216,33 +222,52 @@ class MobileNet(nn.Layer):
        y = paddle.flatten(y, start_axis=1, stop_axis=-1)
        y = self.out(y)
        return y
-def MobileNetV2_x0_25(**args):
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-    model = MobileNet(scale=0.25, **args)
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
+    model = MobileNet(scale=0.25, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
    return model
-def MobileNetV2_x0_5(**args):
+def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=0.5, **args)
+    model = MobileNet(scale=0.5, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
    return model
-def MobileNetV2_x0_75(**args):
+def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=0.75, **args)
+    model = MobileNet(scale=0.75, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
    return model
-def MobileNetV2(**args):
+def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=1.0, **args)
+    model = MobileNet(scale=1.0, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
    return model
-def MobileNetV2_x1_5(**args):
+def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=1.5, **args)
+    model = MobileNet(scale=1.5, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
    return model
-def MobileNetV2_x2_0(**args):
+def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNet(scale=2.0, **args)
+    model = MobileNet(scale=2.0, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/mobilenet_v3.py
+++ b/ppcls/arch/backbone/model_zoo/mobilenet_v3.py
@@ -28,13 +28,20 @@ from paddle.regularizer import L2Decay
 import math
-__all__ = [
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
-    "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
-    "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
+MODEL_URLS = {"MobileNetV3_small_x0_35": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_35_pretrained.pdparams", 
-    "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
+             "MobileNetV3_small_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_5_pretrained.pdparams", 
-    "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
+             "MobileNetV3_small_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_75_pretrained.pdparams", 
-    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
+             "MobileNetV3_small_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_0_pretrained.pdparams", 
-]
+             "MobileNetV3_small_x1_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_25_pretrained.pdparams",
+             "MobileNetV3_large_x0_35": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_35_pretrained.pdparams", 
+             "MobileNetV3_large_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams", 
+             "MobileNetV3_large_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_75_pretrained.pdparams", 
+             "MobileNetV3_large_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams", 
+             "MobileNetV3_large_x1_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_25_pretrained.pdparams"}
+__all__ = list(MODEL_URLS.keys())
 def make_divisible(v, divisor=8, min_value=None):
@@ -308,52 +315,75 @@ class SEModule(nn.Layer):
        outputs = hardsigmoid(outputs, slope=0.2, offset=0.5)
        return paddle.multiply(x=inputs, y=outputs)
-def MobileNetV3_small_x0_35(**args):
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
-    model = MobileNetV3(model_name="small", scale=0.35, **args)
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+def MobileNetV3_small_x0_35(pretrained=False, use_ssld=False, **kwargs):
+    model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_35"], use_ssld=use_ssld)
    return model
-def MobileNetV3_small_x0_5(**args):
+def MobileNetV3_small_x0_5(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="small", scale=0.5, **args)
+    model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_5"], use_ssld=use_ssld)
    return model
-def MobileNetV3_small_x0_75(**args):
+def MobileNetV3_small_x0_75(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="small", scale=0.75, **args)
+    model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_75"], use_ssld=use_ssld)
    return model
-def MobileNetV3_small_x1_0(**args):
+def MobileNetV3_small_x1_0(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="small", scale=1.0, **args)
+    model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_0"], use_ssld=use_ssld)
    return model
-def MobileNetV3_small_x1_25(**args):
+def MobileNetV3_small_x1_25(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="small", scale=1.25, **args)
+    model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_25"], use_ssld=use_ssld)
    return model
-def MobileNetV3_large_x0_35(**args):
+def MobileNetV3_large_x0_35(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="large", scale=0.35, **args)
+    model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_35"], use_ssld=use_ssld)
    return model
-def MobileNetV3_large_x0_5(**args):
+def MobileNetV3_large_x0_5(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="large", scale=0.5, **args)
+    model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_5"], use_ssld=use_ssld)
    return model
-def MobileNetV3_large_x0_75(**args):
+def MobileNetV3_large_x0_75(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="large", scale=0.75, **args)
+    model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_75"], use_ssld=use_ssld)
    return model
-def MobileNetV3_large_x1_0(**args):
+def MobileNetV3_large_x1_0(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="large", scale=1.0, **args)
+    model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_0"], use_ssld=use_ssld)
    return model
-def MobileNetV3_large_x1_25(**args):
+def MobileNetV3_large_x1_25(pretrained=False, use_ssld=False, **kwargs):
-    model = MobileNetV3(model_name="large", scale=1.25, **args)
+    model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
+    _load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_25"], use_ssld=use_ssld)
    return model
--- a/ppcls/arch/backbone/model_zoo/rednet.py
+++ b/ppcls/arch/backbone/model_zoo/rednet.py
--- a/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/ppcls/arch/backbone/model_zoo/regnet.py
--- a/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/ppcls/arch/backbone/model_zoo/repvgg.py
--- a/ppcls/arch/backbone/model_zoo/res2net.py
+++ b/ppcls/arch/backbone/model_zoo/res2net.py
--- a/ppcls/arch/backbone/model_zoo/res2net_vd.py
+++ b/ppcls/arch/backbone/model_zoo/res2net_vd.py
--- a/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/ppcls/arch/backbone/model_zoo/resnest.py
--- a/ppcls/arch/backbone/model_zoo/resnet.py
+++ b/ppcls/arch/backbone/model_zoo/resnet.py
--- a/ppcls/arch/backbone/model_zoo/resnet_vc.py
+++ b/ppcls/arch/backbone/model_zoo/resnet_vc.py
--- a/ppcls/arch/backbone/model_zoo/resnet_vd.py
+++ b/ppcls/arch/backbone/model_zoo/resnet_vd.py
--- a/ppcls/arch/backbone/model_zoo/resnext.py
+++ b/ppcls/arch/backbone/model_zoo/resnext.py
--- a/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
+++ b/ppcls/arch/backbone/model_zoo/resnext101_wsl.py
--- a/ppcls/arch/backbone/model_zoo/resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/resnext_vd.py
--- a/ppcls/arch/backbone/model_zoo/rexnet.py
+++ b/ppcls/arch/backbone/model_zoo/rexnet.py
--- a/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnet_vd.py
--- a/ppcls/arch/backbone/model_zoo/se_resnext.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext.py
--- a/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
+++ b/ppcls/arch/backbone/model_zoo/se_resnext_vd.py
--- a/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
+++ b/ppcls/arch/backbone/model_zoo/shufflenet_v2.py
--- a/ppcls/arch/backbone/model_zoo/squeezenet.py
+++ b/ppcls/arch/backbone/model_zoo/squeezenet.py
--- a/ppcls/arch/backbone/model_zoo/swin_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/swin_transformer.py
--- a/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/ppcls/arch/backbone/model_zoo/tnt.py
--- a/ppcls/arch/backbone/model_zoo/vgg.py
+++ b/ppcls/arch/backbone/model_zoo/vgg.py
--- a/ppcls/arch/backbone/model_zoo/vision_transformer.py
+++ b/ppcls/arch/backbone/model_zoo/vision_transformer.py
--- a/ppcls/arch/backbone/model_zoo/xception.py
+++ b/ppcls/arch/backbone/model_zoo/xception.py
--- a/ppcls/arch/backbone/model_zoo/xception_deeplab.py
+++ b/ppcls/arch/backbone/model_zoo/xception_deeplab.py
--- a/ppcls/arch/loss_metrics/__init__.py
+++ b/ppcls/arch/loss_metrics/__init__.py
--- a/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml
+++ b/ppcls/configs/Cartoonface/ResNet50_icartoon.yaml
--- a/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml
+++ b/ppcls/configs/ImageNet/AlexNet/AlexNet.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA102.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA102.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA102x.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA102x.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA102x2.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA102x2.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA169.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA169.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA34.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA34.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA46_c.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA46_c.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA46x_c.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA60.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA60.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA60x.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA60x.yaml
--- a/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml
+++ b/ppcls/configs/ImageNet/DLA/DLA60x_c.yaml
--- a/ppcls/configs/ImageNet/DPN/DPN107.yaml
+++ b/ppcls/configs/ImageNet/DPN/DPN107.yaml
--- a/ppcls/configs/ImageNet/DPN/DPN131.yaml
+++ b/ppcls/configs/ImageNet/DPN/DPN131.yaml
--- a/ppcls/configs/ImageNet/DPN/DPN68.yaml
+++ b/ppcls/configs/ImageNet/DPN/DPN68.yaml
--- a/ppcls/configs/ImageNet/DPN/DPN92.yaml
+++ b/ppcls/configs/ImageNet/DPN/DPN92.yaml
--- a/ppcls/configs/ImageNet/DPN/DPN98.yaml
+++ b/ppcls/configs/ImageNet/DPN/DPN98.yaml
--- a/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml
+++ b/ppcls/configs/ImageNet/DarkNet/DarkNet53.yaml
--- a/ppcls/configs/ImageNet/DenseNet/DenseNet121.yaml
+++ b/ppcls/configs/ImageNet/DenseNet/DenseNet121.yaml
--- a/ppcls/configs/ImageNet/DenseNet/DenseNet161.yaml
+++ b/ppcls/configs/ImageNet/DenseNet/DenseNet161.yaml
--- a/ppcls/configs/ImageNet/DenseNet/DenseNet169.yaml
+++ b/ppcls/configs/ImageNet/DenseNet/DenseNet169.yaml
--- a/ppcls/configs/ImageNet/DenseNet/DenseNet201.yaml
+++ b/ppcls/configs/ImageNet/DenseNet/DenseNet201.yaml
--- a/ppcls/configs/ImageNet/DenseNet/DenseNet264.yaml
+++ b/ppcls/configs/ImageNet/DenseNet/DenseNet264.yaml
--- a/ppcls/configs/ImageNet/Ineption/InceptionV3.yaml
+++ b/ppcls/configs/ImageNet/Ineption/InceptionV3.yaml
--- a/ppcls/configs/ImageNet/EfficientNet/EfficientNetB0.yaml
+++ b/ppcls/configs/ImageNet/EfficientNet/EfficientNetB0.yaml
--- a/ppcls/configs/ImageNet/GhostNet/GhostNet_x0_5.yaml
+++ b/ppcls/configs/ImageNet/GhostNet/GhostNet_x0_5.yaml
--- a/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_0.yaml
+++ b/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_0.yaml
--- a/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_3.yaml
+++ b/ppcls/configs/ImageNet/GhostNet/GhostNet_x1_3.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W18_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W18_C.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W30_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W30_C.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W32_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W32_C.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W40_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W40_C.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W44_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W44_C.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W48_C.yaml
--- a/ppcls/configs/ImageNet/HRNet/HRNet_W64_C.yaml
+++ b/ppcls/configs/ImageNet/HRNet/HRNet_W64_C.yaml
--- a/ppcls/configs/ImageNet/HarDNet/HarDNet39_ds.yaml
+++ b/ppcls/configs/ImageNet/HarDNet/HarDNet39_ds.yaml
--- a/ppcls/configs/ImageNet/HarDNet/HarDNet68.yaml
+++ b/ppcls/configs/ImageNet/HarDNet/HarDNet68.yaml
--- a/ppcls/configs/ImageNet/HarDNet/HarDNet68_ds.yaml
+++ b/ppcls/configs/ImageNet/HarDNet/HarDNet68_ds.yaml
--- a/ppcls/configs/ImageNet/HarDNet/HarDNet85.yaml
+++ b/ppcls/configs/ImageNet/HarDNet/HarDNet85.yaml
--- a/ppcls/configs/ImageNet/Inception/GoogLeNet.yaml
+++ b/ppcls/configs/ImageNet/Inception/GoogLeNet.yaml
--- a/ppcls/configs/ImageNet/Inception/InceptionV3.yaml
+++ b/ppcls/configs/ImageNet/Inception/InceptionV3.yaml
--- a/ppcls/configs/ImageNet/Inception/InceptionV4.yaml
+++ b/ppcls/configs/ImageNet/Inception/InceptionV4.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet50_retrieval.yml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet50_retrieval.yml
--- a/ppcls/configs/ImageNet/LeViT/LeViT_128S.yaml
+++ b/ppcls/configs/ImageNet/LeViT/LeViT_128S.yaml
--- a/ppcls/configs/ImageNet/LeViT/LeViT_192.yaml
+++ b/ppcls/configs/ImageNet/LeViT/LeViT_192.yaml
--- a/ppcls/configs/ImageNet/LeViT/LeViT_256.yaml
+++ b/ppcls/configs/ImageNet/LeViT/LeViT_256.yaml
--- a/ppcls/configs/ImageNet/LeViT/LeViT_384.yaml
+++ b/ppcls/configs/ImageNet/LeViT/LeViT_384.yaml
--- a/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml
--- a/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_25.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_25.yaml
--- a/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_5.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_5.yaml
--- a/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_75.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV1/MobileNetV1_x0_75.yaml
--- a/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml
--- a/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_25.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_25.yaml
--- a/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_5.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_5.yaml
--- a/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_75.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x0_75.yaml
--- a/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x1_5.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x1_5.yaml
--- a/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x2_0.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV2/MobileNetV2_x2_0.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_35.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_35.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_5.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_5.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_75.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x0_75.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_25.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_25.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_35.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_35.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_5.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_5.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_75.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x0_75.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_0.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_0.yaml
--- a/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_25.yaml
+++ b/ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_small_x1_25.yaml
--- a/ppcls/configs/ImageNet/RedNet/RedNet101.yaml
+++ b/ppcls/configs/ImageNet/RedNet/RedNet101.yaml
--- a/ppcls/configs/ImageNet/RedNet/RedNet152.yaml
+++ b/ppcls/configs/ImageNet/RedNet/RedNet152.yaml
--- a/ppcls/configs/ImageNet/RedNet/RedNet26.yaml
+++ b/ppcls/configs/ImageNet/RedNet/RedNet26.yaml
--- a/ppcls/configs/ImageNet/RedNet/RedNet38.yaml
+++ b/ppcls/configs/ImageNet/RedNet/RedNet38.yaml
--- a/ppcls/configs/ImageNet/RedNet/RedNet50.yaml
+++ b/ppcls/configs/ImageNet/RedNet/RedNet50.yaml
--- a/ppcls/configs/ImageNet/Res2Net/Res2Net101_vd_26w_4s.yaml
+++ b/ppcls/configs/ImageNet/Res2Net/Res2Net101_vd_26w_4s.yaml
--- a/ppcls/configs/ImageNet/Res2Net/Res2Net200_vd_26w_4s.yaml
+++ b/ppcls/configs/ImageNet/Res2Net/Res2Net200_vd_26w_4s.yaml
--- a/ppcls/configs/ImageNet/Res2Net/Res2Net50_14w_8s.yaml
+++ b/ppcls/configs/ImageNet/Res2Net/Res2Net50_14w_8s.yaml
--- a/ppcls/configs/ImageNet/Res2Net/Res2Net50_26w_4s.yaml
+++ b/ppcls/configs/ImageNet/Res2Net/Res2Net50_26w_4s.yaml
--- a/ppcls/configs/ImageNet/Res2Net/Res2Net50_vd_26w_4s.yaml
+++ b/ppcls/configs/ImageNet/Res2Net/Res2Net50_vd_26w_4s.yaml
--- a/ppcls/configs/ImageNet/ResNeSt/ResNeSt101.yaml
+++ b/ppcls/configs/ImageNet/ResNeSt/ResNeSt101.yaml
--- a/ppcls/configs/ImageNet/ResNeSt/ResNeSt50.yaml
+++ b/ppcls/configs/ImageNet/ResNeSt/ResNeSt50.yaml
--- a/ppcls/configs/ImageNet/ResNeSt/ResNeSt50_fast_1s1x64d.yaml
+++ b/ppcls/configs/ImageNet/ResNeSt/ResNeSt50_fast_1s1x64d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_32x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_32x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_64x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_64x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_32x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_32x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_64x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt101_vd_64x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_32x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_32x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_64x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_64x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_32x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_32x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_64x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt152_vd_64x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_32x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_32x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_64x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_64x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_32x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_32x4d.yaml
--- a/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_64x4d.yaml
+++ b/ppcls/configs/ImageNet/ResNeXt/ResNeXt50_vd_64x4d.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet101.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet101.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet101_vd.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet101_vd.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet152.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet152.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet152_vd.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet152_vd.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet18.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet18.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet18_vd.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet18_vd.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet200_vd.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet200_vd.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet34.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet34.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet34_vd.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet34_vd.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet50.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet50.yaml
--- a/ppcls/configs/ImageNet/ResNet/ResNet50_vd.yaml
+++ b/ppcls/configs/ImageNet/ResNet/ResNet50_vd.yaml
--- a/ppcls/configs/ImageNet/SENet/SENet154_vd.yaml
+++ b/ppcls/configs/ImageNet/SENet/SENet154_vd.yaml
--- a/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d.yaml
--- a/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_32x4d.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_32x4d.yaml
--- a/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_vd_32x4d.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNeXt50_vd_32x4d.yaml
--- a/ppcls/configs/ImageNet/SENet/SE_ResNet18_vd.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNet18_vd.yaml
--- a/ppcls/configs/ImageNet/SENet/SE_ResNet34_vd.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNet34_vd.yaml
--- a/ppcls/configs/ImageNet/SENet/SE_ResNet50_vd.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNet50_vd.yaml
--- a/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_25.yaml
+++ b/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_25.yaml
--- a/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_33.yaml
+++ b/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_33.yaml
--- a/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_5.yaml
+++ b/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x0_5.yaml
--- a/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml
+++ b/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml
--- a/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_5.yaml
+++ b/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_5.yaml
--- a/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x2_0.yaml
+++ b/ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x2_0.yaml
--- a/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_0.yaml
+++ b/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_0.yaml
--- a/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_1.yaml
+++ b/ppcls/configs/ImageNet/SqueezeNet/SqueezeNet1_1.yaml
--- a/ppcls/configs/ImageNet/TNT/TNT_small.yaml
+++ b/ppcls/configs/ImageNet/TNT/TNT_small.yaml
--- a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml
+++ b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml
--- a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml
+++ b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml
--- a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml
+++ b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml
--- a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml
+++ b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml
--- a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml
+++ b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml
--- a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml
+++ b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml
--- a/ppcls/configs/ImageNet/VGG/VGG11.yaml
+++ b/ppcls/configs/ImageNet/VGG/VGG11.yaml
--- a/ppcls/configs/ImageNet/VGG/VGG13.yaml
+++ b/ppcls/configs/ImageNet/VGG/VGG13.yaml
--- a/ppcls/configs/ImageNet/VGG/VGG16.yaml
+++ b/ppcls/configs/ImageNet/VGG/VGG16.yaml
--- a/ppcls/configs/ImageNet/VGG/VGG19.yaml
+++ b/ppcls/configs/ImageNet/VGG/VGG19.yaml
--- a/ppcls/configs/ImageNet/Xception/Xception41.yaml
+++ b/ppcls/configs/ImageNet/Xception/Xception41.yaml
--- a/ppcls/configs/ImageNet/Xception/Xception41_deeplab.yaml
+++ b/ppcls/configs/ImageNet/Xception/Xception41_deeplab.yaml
--- a/ppcls/configs/ImageNet/Xception/Xception65.yaml
+++ b/ppcls/configs/ImageNet/Xception/Xception65.yaml
--- a/ppcls/configs/ImageNet/Xception/Xception65_deeplab.yaml
+++ b/ppcls/configs/ImageNet/Xception/Xception65_deeplab.yaml
--- a/ppcls/configs/ImageNet/Xception/Xception71.yaml
+++ b/ppcls/configs/ImageNet/Xception/Xception71.yaml
--- a/ppcls/configs/Logo/ResNet50_ReID.yaml
+++ b/ppcls/configs/Logo/ResNet50_ReID.yaml
--- a/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml
+++ b/ppcls/configs/Products/ResNet50_vd_Aliproduct.yaml
--- a/ppcls/configs/Products/ResNet50_vd_Inshop.yaml
+++ b/ppcls/configs/Products/ResNet50_vd_Inshop.yaml
--- a/ppcls/configs/Products/ResNet50_vd_SOP.yaml
+++ b/ppcls/configs/Products/ResNet50_vd_SOP.yaml
--- a/ppcls/configs/Vehicle/ResNet50.yaml
+++ b/ppcls/configs/Vehicle/ResNet50.yaml
--- a/ppcls/configs/Vehicle/ResNet50_ReID.yaml
+++ b/ppcls/configs/Vehicle/ResNet50_ReID.yaml
--- a/ppcls/data/__init__.py
+++ b/ppcls/data/__init__.py
--- a/ppcls/data/dataloader/common_dataset.py
+++ b/ppcls/data/dataloader/common_dataset.py
--- a/ppcls/data/dataloader/icartoon_dataset.py
+++ b/ppcls/data/dataloader/icartoon_dataset.py
--- a/ppcls/data/dataloader/imagenet_dataset.py
+++ b/ppcls/data/dataloader/imagenet_dataset.py
--- a/ppcls/data/imaug/__init__.py
+++ b/ppcls/data/imaug/__init__.py
--- a/ppcls/data/imaug/autoaugment.py
+++ b/ppcls/data/imaug/autoaugment.py
--- a/ppcls/data/imaug/batch_operators.py
+++ b/ppcls/data/imaug/batch_operators.py
--- a/ppcls/data/imaug/fmix.py
+++ b/ppcls/data/imaug/fmix.py
--- a/ppcls/data/imaug/grid.py
+++ b/ppcls/data/imaug/grid.py
--- a/ppcls/data/imaug/hide_and_seek.py
+++ b/ppcls/data/imaug/hide_and_seek.py
--- a/ppcls/data/imaug/operators.py
+++ b/ppcls/data/imaug/operators.py
--- a/ppcls/data/imaug/randaugment.py
+++ b/ppcls/data/imaug/randaugment.py
--- a/ppcls/data/imaug/random_erasing.py
+++ b/ppcls/data/imaug/random_erasing.py
--- a/ppcls/data/postprocess/__init__.py
+++ b/ppcls/data/postprocess/__init__.py
--- a/ppcls/data/reader.py
+++ b/ppcls/data/reader.py
--- a/ppcls/engine/trainer.py
+++ b/ppcls/engine/trainer.py
--- a/ppcls/loss/__init__.py
+++ b/ppcls/loss/__init__.py
--- a/ppcls/loss/celoss.py
+++ b/ppcls/loss/celoss.py
--- a/ppcls/loss/distanceloss.py
+++ b/ppcls/loss/distanceloss.py
--- a/ppcls/loss/distillationloss.py
+++ b/ppcls/loss/distillationloss.py
--- a/ppcls/data/imaug/cutout.py
+++ b/ppcls/data/imaug/cutout.py
--- a/ppcls/metric/__init__.py
+++ b/ppcls/metric/__init__.py
--- a/ppcls/metric/metrics.py
+++ b/ppcls/metric/metrics.py
--- a/ppcls/optimizer/__init__.py
+++ b/ppcls/optimizer/__init__.py
--- a/ppcls/utils/config.py
+++ b/ppcls/utils/config.py
--- a/ppcls/utils/download.py
+++ b/ppcls/utils/download.py
--- a/tools/ema.py
+++ b/tools/ema.py
--- a/tools/feature_maps_visualization/download_resnet50_pretrained.sh
+++ b/tools/feature_maps_visualization/download_resnet50_pretrained.sh
--- a/tools/feature_maps_visualization/fm_vis.py
+++ b/tools/feature_maps_visualization/fm_vis.py
--- a/tools/feature_maps_visualization/resnet.py
+++ b/tools/feature_maps_visualization/resnet.py
--- a/tools/feature_maps_visualization/utils.py
+++ b/tools/feature_maps_visualization/utils.py
--- a/ppcls/utils/logger.py
+++ b/ppcls/utils/logger.py
--- a/ppcls/utils/save_load.py
+++ b/ppcls/utils/save_load.py
--- a/tools/static/dali.py
+++ b/tools/static/dali.py
--- a/tools/static/program.py
+++ b/tools/static/program.py
--- a/tools/static/run_dali.sh
+++ b/tools/static/run_dali.sh
--- a/tools/static/save_load.py
+++ b/tools/static/save_load.py
--- a/tools/static/train.py
+++ b/tools/static/train.py
--- a/tools/benchmark/benchmark.sh
+++ b/tools/benchmark/benchmark.sh
--- a/tools/benchmark/benchmark_acc.py
+++ b/tools/benchmark/benchmark_acc.py
--- a/tools/benchmark/benchmark_list.txt
+++ b/tools/benchmark/benchmark_list.txt
--- a/tools/benchmark/run_multi_nodes.sh
+++ b/tools/benchmark/run_multi_nodes.sh
--- a/tools/benchmark/run_single_node.sh
+++ b/tools/benchmark/run_single_node.sh
--- a/tools/download.py
+++ b/tools/download.py
--- a/tools/eval.py
+++ b/tools/eval.py
--- a/tools/eval.sh
+++ b/tools/eval.sh
--- a/tools/export_model.py
+++ b/tools/export_model.py
--- a/tools/export_serving_model.py
+++ b/tools/export_serving_model.py
--- a/tools/infer.py
+++ b/tools/infer.py
--- a/tools/infer/infer.py
+++ b/tools/infer/infer.py
--- a/tools/program.py
+++ b/tools/program.py
--- a/tools/run.sh
+++ b/tools/run.sh
--- a/tools/run_download.sh
+++ b/tools/run_download.sh
--- a/tools/train.py
+++ b/tools/train.py
--- a/tools/train.sh
+++ b/tools/train.sh