FixMatch_cifar10_4000.yaml 3.7 KB
Newer Older
D
dongshuilong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
# global configs
Global:
  checkpoints: null
  pretrained_model: 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/others/torch2paddle_weight/torch2paddle_initialize_cifar10_WideResNet_depth28_widenfactor2_classnum10.pdparams'
  output_dir: ./output
  device: gpu
  save_interval: -1
  eval_during_train: True
  eval_interval: 1
  epochs: 1024
  iter_per_epoch: 1024
  print_batch_step: 20
  use_visualdl: False
  use_dali: False
  train_mode: fixmatch
  # used for static mode and model export
  image_shape: [3, 224, 224]
  save_inference_dir: ./inference

SSL:
  tempture: 1
  threshold: 0.95

EMA:
  decay: 0.999

# AMP:
#   scale_loss: 65536
#   use_dynamic_loss_scaling: True
#   # O1: mixed fp16
#   level: O1

# model architecture
Arch:
  name: WideResNet
  depth: 28
  widen_factor: 2
  dropout: 0
  num_classes: 10

# loss function config for traing/eval process
Loss:
  Train:
    - CELoss:
        weight: 1.0
        reduction: "mean"
  Eval:
    - CELoss:
        weight: 1.0
UnLabelLoss:
  Train:
    - CELoss:
        weight: 1.0
        reduction: "none"

Optimizer:
  name: Momentum
  momentum: 0.9
  use_nesterov: True
  no_weight_decay_name: bn bias
  weight_decay: 0.0005
  lr:
    name: CosineFixmatch
    learning_rate: 0.03
    num_warmup_steps: 0
    num_cycles: 0.4375

# data loader for train and eval
DataLoader:
  Train:
    dataset:
      name: Cifar10
      data_file: None
      mode: 'train'
      download: True
      backend: 'pil'
      sample_per_label: 400
      expand_labels: 17
      transform_ops:
        - RandFlipImage:
            flip_code: 1
        - Pad_paddle_vision:
            padding: 4
            padding_mode: reflect
        - RandCropImageV2:
            size: [32, 32]
        - NormalizeImage:
            scale: 1.0/255.0
            mean: [0.4914, 0.4822, 0.4465]
            std: [0.2471, 0.2435, 0.2616]
            order: hwc
    sampler:
      name: DistributedBatchSampler
      batch_size: 64
      drop_last: True
      shuffle: True
    loader:
      num_workers: 4
      use_shared_memory: True

  UnLabelTrain:
    dataset:
      name: Cifar10
      data_file: None
      mode: 'train'
      download: True
      backend: 'pil'
      sample_per_label: None
      transform_ops_weak:
        - RandFlipImage:
            flip_code: 1
        - Pad_paddle_vision:
            padding: 4
            padding_mode: reflect
        - RandCropImageV2:
            size: [32, 32]
        - NormalizeImage:
            scale: 1.0/255.0
            mean: [0.4914, 0.4822, 0.4465]
            std: [0.2471, 0.2435, 0.2616]
            order: hwc
      transform_ops_strong:
        - RandFlipImage:
            flip_code: 1
        - Pad_paddle_vision:
            padding: 4
            padding_mode: reflect
        - RandCropImageV2:
            size: [32, 32]
        - RandAugment:
            num_layers: 2
            magnitude: 10
        - NormalizeImage:
            scale: 1.0/255.0
            mean: [0.4914, 0.4822, 0.4465]
            std: [0.2471, 0.2435, 0.2616]
            order: hwc
    sampler:
      name: DistributedBatchSampler
      batch_size: 448
      drop_last: True
      shuffle: True
    loader:
      num_workers: 4
      use_shared_memory: True


  Eval:
    dataset:
      name: Cifar10
      data_file: None
      mode: 'test'
      download: True
      backend: 'pil'
      sample_per_label: None
      transform_ops:
        - NormalizeImage:
            scale: 1.0/255.0
            mean: [0.4914, 0.4822, 0.4465]
            std: [0.2471, 0.2435, 0.2616]
            order: hwc
    sampler:
      name: DistributedBatchSampler
      batch_size: 64
      drop_last: False
      shuffle: True
    loader:
      num_workers: 4
      use_shared_memory: True


Metric:
  Eval:
    - TopkAcc:
        topk: [1, 5]