add support for `CvT_21_244`, `CvT_13_384`, `CvT_21_384` and `CvT_W24_384`

a2052232 · Yang Nie · Tingquan Gao · 7ed40fb9 · a2052232 · a2052232
13 changed file
--- a/ppcls/arch/backbone/__init__.py
+++ b/ppcls/arch/backbone/__init__.py
@@ -75,7 +75,7 @@ from .model_zoo.foundation_vit import CLIP_vit_base_patch32_224, CLIP_vit_base_p
 from .model_zoo.convnext import ConvNeXt_tiny, ConvNeXt_small, ConvNeXt_base_224, ConvNeXt_base_384, ConvNeXt_large_224, ConvNeXt_large_384
 from .model_zoo.nextvit import NextViT_small_224, NextViT_base_224, NextViT_large_224, NextViT_small_384, NextViT_base_384, NextViT_large_384
 from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224
-from .model_zoo.cvt import CvT_13_224, CvT_13_384, CvT_21_224, CvT_21_384
+from .model_zoo.cvt import CvT_13_224, CvT_13_384, CvT_21_224, CvT_21_384, CvT_W24_384
 from .variant_models.resnet_variant import ResNet50_last_stage_stride1
 from .variant_models.resnet_variant import ResNet50_adaptive_max_pool2d

--- a/ppcls/arch/backbone/model_zoo/cvt.py
+++ b/ppcls/arch/backbone/model_zoo/cvt.py
@@ -26,6 +26,7 @@ MODEL_URLS = {
    "CvT_13_384": "",  # TODO
    "CvT_21_224": "",  # TODO
    "CvT_21_384": "",  # TODO
+    "CvT_W24_384": "",  # TODO
 }
 __all__ = list(MODEL_URLS.keys())
@@ -655,3 +656,37 @@ def CvT_21_384(pretrained=False, use_ssld=False, **kwargs):
    _load_pretrained(
        pretrained, model, MODEL_URLS["CvT_21_384"], use_ssld=use_ssld)
    return model
+def CvT_W24_384(pretrained=False, use_ssld=False, **kwargs):
+    msvit_spec = dict(
+        INIT='trunc_norm',
+        NUM_STAGES=3,
+        PATCH_SIZE=[7, 3, 3],
+        PATCH_STRIDE=[4, 2, 2],
+        PATCH_PADDING=[2, 1, 1],
+        DIM_EMBED=[192, 768, 1024],
+        NUM_HEADS=[3, 12, 16],
+        DEPTH=[2, 2, 20],
+        MLP_RATIO=[4.0, 4.0, 4.0],
+        ATTN_DROP_RATE=[0.0, 0.0, 0.0],
+        DROP_RATE=[0.0, 0.0, 0.0],
+        DROP_PATH_RATE=[0.0, 0.0, 0.3],
+        QKV_BIAS=[True, True, True],
+        CLS_TOKEN=[False, False, True],
+        POS_EMBED=[False, False, False],
+        QKV_PROJ_METHOD=['dw_bn', 'dw_bn', 'dw_bn'],
+        KERNEL_QKV=[3, 3, 3],
+        PADDING_KV=[1, 1, 1],
+        STRIDE_KV=[2, 2, 2],
+        PADDING_Q=[1, 1, 1],
+        STRIDE_Q=[1, 1, 1])
+    model = ConvolutionalVisionTransformer(
+        in_chans=3,
+        act_layer=QuickGELU,
+        init=msvit_spec.get('INIT', 'trunc_norm'),
+        spec=msvit_spec,
+        **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["CvT_21_384"], use_ssld=use_ssld)
+    return model
--- a/ppcls/configs/ImageNet/CvT/CvT_13_224.yaml
+++ b/ppcls/configs/ImageNet/CvT/CvT_13_224.yaml
@@ -124,7 +124,7 @@ DataLoader:
            order: ''
    sampler:
      name: DistributedBatchSampler
-      batch_size: 256
+      batch_size: 128
      drop_last: False
      shuffle: False
    loader:

--- a/ppcls/configs/ImageNet/CvT/CvT_13_384.yaml
+++ b/ppcls/configs/ImageNet/CvT/CvT_13_384.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 50
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 384, 384]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  update_freq: 2  # for 8 cards
+# model architecture
+Arch:
+  name: CvT_13_384
+  class_num: 1000
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.05
+  no_weight_decay_name: pos_embed cls_token .bias
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 2e-3  # lr 2e-3 for total_batch_size 2048
+    eta_min: 1e-5
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+    by_epoch: True
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - RandCropImage:
+            size: 384
+            interpolation: bicubic
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - TimmAutoAugment:
+            config_str: rand-m9-mstd0.5-inc1
+            interpolation: bicubic
+            img_size: 384
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - RandomErasing:
+            EPSILON: 0.25
+            sl: 0.02
+            sh: 1.0/3.0
+            r1: 0.3
+            attempt: 10
+            use_log_aspect: True
+            mode: pixel
+      batch_transform_ops:
+        - OpSampler:
+            MixupOperator:
+              alpha: 0.8
+              prob: 0.5
+            CutmixOperator:
+              alpha: 1.0
+              prob: 0.5
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 128
+      drop_last: True
+      shuffle: True
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - ResizeImage:
+            size: 384
+            interpolation: bicubic
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 128
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+        backend: pil
+    - ResizeImage:
+        size: 384
+        interpolation: bicubic
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+Metric:
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/ImageNet/CvT/CvT_21_224.yaml
+++ b/ppcls/configs/ImageNet/CvT/CvT_21_224.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 50
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 224, 224]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  update_freq: 2  # for 8 cards
+# model architecture
+Arch:
+  name: CvT_21_224
+  class_num: 1000
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.1
+  no_weight_decay_name: pos_embed cls_token .bias
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 1e-3  # lr 1e-3 for total_batch_size 1024
+    eta_min: 1e-5
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+    by_epoch: True
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - RandCropImage:
+            size: 224
+            interpolation: bicubic
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: RASampler
+      batch_size: 64
+      drop_last: True
+      shuffle: True
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - ResizeImage:
+            resize_short: 256
+            interpolation: bicubic
+            backend: pil
+        - CropImage:
+            size: 224
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 128
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+        backend: pil
+    - ResizeImage:
+        resize_short: 256
+        interpolation: bicubic
+        backend: pil
+    - CropImage:
+        size: 224
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+Metric:
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/ImageNet/CvT/CvT_21_384.yaml
+++ b/ppcls/configs/ImageNet/CvT/CvT_21_384.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 50
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 384, 384]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  update_freq: 2  # for 8 cards
+# model architecture
+Arch:
+  name: CvT_21_384
+  class_num: 1000
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.1
+  no_weight_decay_name: pos_embed cls_token .bias
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 1e-3  # lr 1e-3 for total_batch_size 1024
+    eta_min: 1e-5
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+    by_epoch: True
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - RandCropImage:
+            size: 384
+            interpolation: bicubic
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - TimmAutoAugment:
+            config_str: rand-m9-mstd0.5-inc1
+            interpolation: bicubic
+            img_size: 384
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - RandomErasing:
+            EPSILON: 0.25
+            sl: 0.02
+            sh: 1.0/3.0
+            r1: 0.3
+            attempt: 10
+            use_log_aspect: True
+            mode: pixel
+      batch_transform_ops:
+        - OpSampler:
+            MixupOperator:
+              alpha: 0.8
+              prob: 0.5
+            CutmixOperator:
+              alpha: 1.0
+              prob: 0.5
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: True
+      shuffle: True
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - ResizeImage:
+            size: 384
+            interpolation: bicubic
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+        backend: pil
+    - ResizeImage:
+        size: 384
+        interpolation: bicubic
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+Metric:
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/ImageNet/CvT/CvT_w24_384.yaml
+++ b/ppcls/configs/ImageNet/CvT/CvT_w24_384.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 50
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 384, 384]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+  update_freq: 2  # for 8 cards
+# model architecture
+Arch:
+  name: CvT_W24_384
+  class_num: 1000
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.1
+  no_weight_decay_name: pos_embed cls_token .bias
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 1e-3  # lr 1e-3 for total_batch_size 1024
+    eta_min: 1e-5
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+    by_epoch: True
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - RandCropImage:
+            size: 384
+            interpolation: bicubic
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - TimmAutoAugment:
+            config_str: rand-m9-mstd0.5-inc1
+            interpolation: bicubic
+            img_size: 384
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - RandomErasing:
+            EPSILON: 0.25
+            sl: 0.02
+            sh: 1.0/3.0
+            r1: 0.3
+            attempt: 10
+            use_log_aspect: True
+            mode: pixel
+      batch_transform_ops:
+        - OpSampler:
+            MixupOperator:
+              alpha: 0.8
+              prob: 0.5
+            CutmixOperator:
+              alpha: 1.0
+              prob: 0.5
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: True
+      shuffle: True
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+            backend: pil
+        - ResizeImage:
+            size: 384
+            interpolation: bicubic
+            backend: pil
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+        backend: pil
+    - ResizeImage:
+        size: 384
+        interpolation: bicubic
+        backend: pil
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+Metric:
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/data/__init__.py
+++ b/ppcls/data/__init__.py
@@ -43,6 +43,7 @@ from ppcls.data.dataloader.DistributedRandomIdentitySampler import DistributedRa
 from ppcls.data.dataloader.pk_sampler import PKSampler
 from ppcls.data.dataloader.mix_sampler import MixSampler
 from ppcls.data.dataloader.multi_scale_sampler import MultiScaleSampler
+from ppcls.data.dataloader.ra_sampler import RASampler
 from ppcls.data import preprocess
 from ppcls.data.preprocess import transform

--- a/ppcls/data/dataloader/ra_sampler.py
+++ b/ppcls/data/dataloader/ra_sampler.py
+import math
+import numpy as np
+from paddle.io import DistributedBatchSampler
+class RASampler(DistributedBatchSampler):
+    """
+    based on https://github.com/facebookresearch/deit/blob/main/samplers.py
+    """
+    def __init__(self,
+                 dataset,
+                 batch_size,
+                 num_replicas=None,
+                 rank=None,
+                 shuffle=False,
+                 drop_last=False,
+                 num_repeats: int=3):
+        super().__init__(dataset, batch_size, num_replicas, rank, shuffle,
+                         drop_last)
+        self.num_repeats = num_repeats
+        self.num_samples = int(
+            math.ceil(len(self.dataset) * num_repeats / self.nranks))
+        self.total_size = self.num_samples * self.nranks
+        self.num_selected_samples = int(
+            math.floor(len(self.dataset) // 256 * 256 / self.nranks))
+    def __iter__(self):
+        num_samples = len(self.dataset)
+        indices = np.arange(num_samples).tolist()
+        if self.shuffle:
+            np.random.RandomState(self.epoch).shuffle(indices)
+            self.epoch += 1
+        indices = [ele for ele in indices for i in range(self.num_repeats)]
+        indices += indices[:(self.total_size - len(indices))]
+        assert len(indices) == self.total_size
+        # subsample
+        indices = indices[self.local_rank:self.total_size:self.nranks]
+        assert len(indices) == self.num_samples
+        _sample_iter = iter(indices[:self.num_selected_samples])
+        batch_indices = []
+        for idx in _sample_iter:
+            batch_indices.append(idx)
+            if len(batch_indices) == self.batch_size:
+                yield batch_indices
+                batch_indices = []
+        if not self.drop_last and len(batch_indices) > 0:
+            yield batch_indices
+    def __len__(self):
+        num_samples = self.num_selected_samples
+        num_samples += int(not self.drop_last) * (self.batch_size - 1)
+        return num_samples // self.batch_size
--- a/test_tipc/configs/CvT/CvT_13_384_train_infer_python.txt
+++ b/test_tipc/configs/CvT/CvT_13_384_train_infer_python.txt
+===========================train_params===========================
+model_name:CvT_13_384
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c ppcls/configs/ImageNet/CvT/CvT_13_384.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/CvT/CvT_13_384.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/CvT/CvT_13_384.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+inference_dir:null
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.interpolation=bicubic -o PreProcess.transform_ops.0.ResizeImage.backend=pil
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:False
+-o Global.cpu_num_threads:1
+-o Global.batch_size:1
+-o Global.use_tensorrt:False
+-o Global.use_fp16:False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val/ILSVRC2012_val_00000001.JPEG
+-o Global.save_log_path:null
+-o Global.benchmark:False
+null:null
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,384,384]}]
--- a/test_tipc/configs/CvT/CvT_21_224_train_infer_python.txt
+++ b/test_tipc/configs/CvT/CvT_21_224_train_infer_python.txt
+===========================train_params===========================
+model_name:CvT_21_224
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c ppcls/configs/ImageNet/CvT/CvT_21_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/CvT/CvT_21_224.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/CvT/CvT_21_224.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+inference_dir:null
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.interpolation=bicubic -o PreProcess.transform_ops.0.ResizeImage.backend=pil
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:False
+-o Global.cpu_num_threads:1
+-o Global.batch_size:1
+-o Global.use_tensorrt:False
+-o Global.use_fp16:False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val/ILSVRC2012_val_00000001.JPEG
+-o Global.save_log_path:null
+-o Global.benchmark:False
+null:null
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,224,224]}]
--- a/test_tipc/configs/CvT/CvT_21_384_train_infer_python.txt
+++ b/test_tipc/configs/CvT/CvT_21_384_train_infer_python.txt
+===========================train_params===========================
+model_name:CvT_21_384
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c ppcls/configs/ImageNet/CvT/CvT_21_384.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/CvT/CvT_21_384.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/CvT/CvT_21_384.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+inference_dir:null
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.interpolation=bicubic -o PreProcess.transform_ops.0.ResizeImage.backend=pil
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:False
+-o Global.cpu_num_threads:1
+-o Global.batch_size:1
+-o Global.use_tensorrt:False
+-o Global.use_fp16:False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val/ILSVRC2012_val_00000001.JPEG
+-o Global.save_log_path:null
+-o Global.benchmark:False
+null:null
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,384,384]}]
--- a/test_tipc/configs/CvT/CvT_W24_384_train_infer_python.txt
+++ b/test_tipc/configs/CvT/CvT_W24_384_train_infer_python.txt
+===========================train_params===========================
+model_name:CvT_W24_384
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c ppcls/configs/ImageNet/CvT/CvT_W24_384.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/CvT/CvT_W24_384.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/CvT/CvT_W24_384.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+inference_dir:null
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.interpolation=bicubic -o PreProcess.transform_ops.0.ResizeImage.backend=pil
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:False
+-o Global.cpu_num_threads:1
+-o Global.batch_size:1
+-o Global.use_tensorrt:False
+-o Global.use_fp16:False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val/ILSVRC2012_val_00000001.JPEG
+-o Global.save_log_path:null
+-o Global.benchmark:False
+null:null
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,384,384]}]