From f5b32a02ea4b62a15611cf70dac480424a233a86 Mon Sep 17 00:00:00 2001 From: gaotingquan Date: Fri, 17 Sep 2021 07:47:01 +0000 Subject: [PATCH] fix: fix the training configs of deit, swin, twins --- .../DeiT/DeiT_base_distilled_patch16_224.yaml | 62 +++++++++++------ .../DeiT/DeiT_base_distilled_patch16_384.yaml | 68 +++++++++++++------ .../ImageNet/DeiT/DeiT_base_patch16_224.yaml | 62 +++++++++++------ .../ImageNet/DeiT/DeiT_base_patch16_384.yaml | 68 +++++++++++++------ .../DeiT_small_distilled_patch16_224.yaml | 62 +++++++++++------ .../ImageNet/DeiT/DeiT_small_patch16_224.yaml | 62 +++++++++++------ .../DeiT/DeiT_tiny_distilled_patch16_224.yaml | 62 +++++++++++------ .../ImageNet/DeiT/DeiT_tiny_patch16_224.yaml | 62 +++++++++++------ ...nTransformer_base_patch4_window12_384.yaml | 61 ++++++++++++----- ...inTransformer_base_patch4_window7_224.yaml | 53 ++++++++++----- ...Transformer_large_patch4_window12_384.yaml | 61 ++++++++++++----- ...nTransformer_large_patch4_window7_224.yaml | 53 ++++++++++----- ...nTransformer_small_patch4_window7_224.yaml | 53 ++++++++++----- ...inTransformer_tiny_patch4_window7_224.yaml | 53 ++++++++++----- .../configs/ImageNet/Twins/alt_gvt_base.yaml | 61 ++++++++++++----- .../configs/ImageNet/Twins/alt_gvt_large.yaml | 61 ++++++++++++----- .../configs/ImageNet/Twins/alt_gvt_small.yaml | 61 ++++++++++++----- ppcls/configs/ImageNet/Twins/pcpvt_base.yaml | 61 ++++++++++++----- ppcls/configs/ImageNet/Twins/pcpvt_large.yaml | 61 ++++++++++++----- ppcls/configs/ImageNet/Twins/pcpvt_small.yaml | 61 ++++++++++++----- 20 files changed, 854 insertions(+), 354 deletions(-) diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml index 951c3ad0..fb3b9cca 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -55,17 +57,38 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -83,6 +106,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -108,6 +133,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml index 3b243619..d30b5f7d 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -54,18 +56,39 @@ DataLoader: to_rgb: True channel_first: False - RandCropImage: - size: 384 + size: 384 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -82,7 +105,9 @@ DataLoader: to_rgb: True channel_first: False - ResizeImage: - resize_short: 426 + resize_short: 438 + interpolation: bicubic + backend: pil - CropImage: size: 384 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -107,7 +132,9 @@ Infer: to_rgb: True channel_first: False - ResizeImage: - resize_short: 426 + resize_short: 438 + interpolation: bicubic + backend: pil - CropImage: size: 384 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml index 6d94b375..8f4207e4 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -55,17 +57,38 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -83,6 +106,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -108,6 +133,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml index a3f33d04..00afe54b 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -54,18 +56,39 @@ DataLoader: to_rgb: True channel_first: False - RandCropImage: - size: 384 + size: 384 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -82,7 +105,9 @@ DataLoader: to_rgb: True channel_first: False - ResizeImage: - resize_short: 426 + resize_short: 438 + interpolation: bicubic + backend: pil - CropImage: size: 384 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -107,7 +132,9 @@ Infer: to_rgb: True channel_first: False - ResizeImage: - resize_short: 426 + resize_short: 438 + interpolation: bicubic + backend: pil - CropImage: size: 384 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml index d749681c..c27bed40 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -55,17 +57,38 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -83,6 +106,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -108,6 +133,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml index 0f01161f..f53b8ec1 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -55,17 +57,38 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -83,6 +106,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -108,6 +133,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml index 34a1bde0..8b9e00fd 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -55,17 +57,38 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -83,6 +106,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -108,6 +133,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml index 20c1d2f7..242093db 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -22,25 +22,27 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 - Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token pos_embed dist_token + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 - + name: Cosine + learning_rate: 1e-3 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval DataLoader: @@ -55,17 +57,38 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -83,6 +106,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -92,7 +117,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: False loader: @@ -108,6 +133,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -122,9 +149,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml index fa48840c..af54e4aa 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -24,24 +24,28 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -59,15 +63,35 @@ DataLoader: size: 384 - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -84,7 +108,9 @@ DataLoader: to_rgb: True channel_first: False - ResizeImage: - size: [384, 384] + resize_short: 438 + - CropImage: + size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] @@ -92,7 +118,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -107,7 +133,9 @@ Infer: to_rgb: True channel_first: False - ResizeImage: - size: [384, 384] + resize_short: 438 + - CropImage: + size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] @@ -120,9 +148,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml index aa05383d..4b9baa1b 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -24,24 +24,28 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -59,15 +63,35 @@ DataLoader: size: 224 - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -94,7 +118,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -124,9 +148,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml index c4eeaa2c..58c9667e 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -24,24 +24,28 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -59,15 +63,35 @@ DataLoader: size: 384 - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -84,7 +108,9 @@ DataLoader: to_rgb: True channel_first: False - ResizeImage: - size: [384, 384] + resize_short: 438 + - CropImage: + size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] @@ -92,7 +118,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -107,7 +133,9 @@ Infer: to_rgb: True channel_first: False - ResizeImage: - size: [384, 384] + resize_short: 438 + - CropImage: + size: 384 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] @@ -120,9 +148,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml index e6bfc460..16f5a7dc 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -24,24 +24,28 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -59,15 +63,35 @@ DataLoader: size: 224 - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -94,7 +118,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -124,9 +148,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml index f3bcad06..88fc3da4 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -24,24 +24,28 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -59,15 +63,35 @@ DataLoader: size: 224 - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -94,7 +118,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -124,9 +148,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml index 390db2be..ed9b4d50 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -24,24 +24,28 @@ Arch: # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 20 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -59,15 +63,35 @@ DataLoader: size: 224 - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -94,7 +118,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -124,9 +148,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml index 7c06a3ba..17fd657d 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -20,28 +20,34 @@ Global: Arch: name: alt_gvt_base class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.3 # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -57,17 +63,39 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -85,6 +113,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -94,7 +124,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -110,6 +140,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -124,9 +156,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml index 4a56a8ee..393a6387 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -20,28 +20,34 @@ Global: Arch: name: alt_gvt_large class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.5 # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -57,17 +63,39 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -85,6 +113,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -94,7 +124,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -110,6 +140,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -124,9 +156,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml index 78cc263f..b40f5183 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -20,28 +20,34 @@ Global: Arch: name: alt_gvt_small class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.2 # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -57,17 +63,39 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -85,6 +113,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -94,7 +124,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -110,6 +140,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -124,9 +156,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml index 100e87a9..4c7c0991 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -20,28 +20,34 @@ Global: Arch: name: pcpvt_base class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.3 # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -57,17 +63,39 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -85,6 +113,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -94,7 +124,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -110,6 +140,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -124,9 +156,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml index ad7b4df5..e0e5c6f5 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -20,28 +20,34 @@ Global: Arch: name: pcpvt_large class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.5 # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -57,17 +63,39 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -85,6 +113,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -94,7 +124,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -110,6 +140,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -124,9 +156,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml index dff588cc..547d2583 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml @@ -7,7 +7,7 @@ Global: save_interval: 1 eval_during_train: True eval_interval: 1 - epochs: 120 + epochs: 300 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -20,28 +20,34 @@ Global: Arch: name: pcpvt_small class_num: 1000 + drop_rate: 0.0 + drop_path_rate: 0.2 # loss function config for traing/eval process Loss: Train: - - CELoss: + - MixCELoss: weight: 1.0 + epsilon: 0.1 Eval: - CELoss: weight: 1.0 Optimizer: - name: Momentum - momentum: 0.9 + name: AdamW + beta1: 0.9 + beta2: 0.999 + epsilon: 1e-8 + weight_decay: 0.05 + no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block + one_dim_param_no_weight_decay: True lr: - name: Piecewise - learning_rate: 0.1 - decay_epochs: [30, 60, 90] - values: [0.1, 0.01, 0.001, 0.0001] - regularizer: - name: 'L2' - coeff: 0.0001 + name: Cosine + learning_rate: 5e-4 + eta_min: 1e-5 + warmup_epoch: 5 + warmup_start_lr: 1e-6 # data loader for train and eval @@ -57,17 +63,39 @@ DataLoader: channel_first: False - RandCropImage: size: 224 + interpolation: bicubic + backend: pil - RandFlipImage: flip_code: 1 + - TimmAutoAugment: + config_str: rand-m9-mstd0.5-inc1 + interpolation: bicubic + img_size: 224 - NormalizeImage: scale: 1.0/255.0 mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + - RandomErasing: + EPSILON: 0.25 + sl: 0.02 + sh: 1.0/3.0 + r1: 0.3 + attempt: 10 + use_log_aspect: True + mode: pixel + batch_transform_ops: + - OpSampler: + MixupOperator: + alpha: 0.8 + prob: 0.5 + CutmixOperator: + alpha: 1.0 + prob: 0.5 sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: True loader: @@ -85,6 +113,8 @@ DataLoader: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -94,7 +124,7 @@ DataLoader: order: '' sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 128 drop_last: False shuffle: False loader: @@ -110,6 +140,8 @@ Infer: channel_first: False - ResizeImage: resize_short: 256 + interpolation: bicubic + backend: pil - CropImage: size: 224 - NormalizeImage: @@ -124,9 +156,6 @@ Infer: class_id_map_file: ppcls/utils/imagenet1k_label_list.txt Metric: - Train: - - TopkAcc: - topk: [1, 5] Eval: - TopkAcc: topk: [1, 5] -- GitLab