CLIP_vit_base_patch16_224_finetune.yaml 3.4 KB
Newer Older
G
gaotingquan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
# global configs
Global:
  checkpoints: null
  pretrained_model: null
  output_dir: ./output/
  device: gpu
  save_interval: 10
  eval_during_train: True
  eval_interval: 1
  epochs: 50
  print_batch_step: 10
  use_visualdl: False
  # used for static mode and model export
  image_shape: [3, 224, 224]
  save_inference_dir: ./inference

# mixed precision training
AMP:
  scale_loss: 128.0
  use_dynamic_loss_scaling: True
  # O1: mixed fp16
  level: O1

# model architecture
Arch:
  name: CLIP_vit_base_patch16_224
  class_num: 1000
  return_embed: False
  pretrained: True

# loss function config for traing/eval process
Loss:
  Train:
    - CELoss:
        weight: 1.0
        epsilon: 0.1
  Eval:
    - CELoss:
        weight: 1.0

Optimizer:
  name: AdamWDL
  beta1: 0.9
  beta2: 0.999
  epsilon: 1e-8
  weight_decay: 0.05
  layerwise_decay: 0.6
  filter_bias_and_bn: True
  lr:
    name: Cosine
    learning_rate: 0.0003
    eta_min: 1e-6
    warmup_epoch: 10
    warmup_start_lr: 1e-6

# data loader for train and eval
DataLoader:
  Train:
    dataset:
      name: ImageNetDataset
      image_root: ./dataset/ILSVRC2012/
      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
      transform_ops:
        - DecodeImage:
            to_rgb: True
            channel_first: False
        - RandCropImage:
            size: 224
            interpolation: bicubic
            backend: pil
        - RandFlipImage:
            flip_code: 1
        - TimmAutoAugment:
            config_str: rand-m9-mstd0.5-inc1
            interpolation: bicubic
            img_size: 224
        - NormalizeImage:
            scale: 1.0/255.0
79 80
            mean: [0.485, 0.456, 0.406]
            std: [0.229, 0.224, 0.225]
G
gaotingquan 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
            order: ''
        - RandomErasing:
            EPSILON: 0.25
            sl: 0.02
            sh: 1.0/3.0
            r1: 0.3
            attempt: 10
            use_log_aspect: True
            mode: pixel
    sampler:
      name: DistributedBatchSampler
      batch_size: 128
      drop_last: True
      shuffle: True
    loader:
      num_workers: 16
      use_shared_memory: True

  Eval:
    dataset:
      name: ImageNetDataset
      image_root: ./dataset/ILSVRC2012/
      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
      transform_ops:
        - DecodeImage:
            to_rgb: True
            channel_first: False
        - ResizeImage:
            resize_short: 224
            interpolation: bicubic
            backend: pil
        - CropImage:
            size: 224
        - NormalizeImage:
            scale: 1.0/255.0
116 117
            mean: [0.485, 0.456, 0.406]
            std: [0.229, 0.224, 0.225]
G
gaotingquan 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
            order: ''
    sampler:
      name: DistributedBatchSampler
      batch_size: 128
      drop_last: False
      shuffle: False
    loader:
      num_workers: 8
      use_shared_memory: True

Infer:
  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
  batch_size: 10
  transforms:
    - DecodeImage:
        to_rgb: True
        channel_first: False
    - ResizeImage:
        resize_short: 256
    - CropImage:
        size: 224
    - NormalizeImage:
        scale: 1.0/255.0
141 142
        mean: [0.485, 0.456, 0.406]
        std: [0.229, 0.224, 0.225]
G
gaotingquan 已提交
143 144 145 146 147 148 149 150 151 152 153 154 155 156
        order: ''
    - ToCHWImage:
  PostProcess:
    name: Topk
    topk: 5
    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt

Metric:
  Train:
    - TopkAcc:
        topk: [1, 5]
  Eval:
    - TopkAcc:
        topk: [1, 5]