提交 f5b32a02 编写于 作者: G gaotingquan 提交者: Tingquan Gao

fix: fix the training configs of deit, swin, twins

上级 ef2fd19b
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -55,17 +57,38 @@ DataLoader: ...@@ -55,17 +57,38 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -83,6 +106,8 @@ DataLoader: ...@@ -83,6 +106,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -108,6 +133,8 @@ Infer: ...@@ -108,6 +133,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -54,18 +56,39 @@ DataLoader: ...@@ -54,18 +56,39 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 384 size: 384
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -82,7 +105,9 @@ DataLoader: ...@@ -82,7 +105,9 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 426 resize_short: 438
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 384 size: 384
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -107,7 +132,9 @@ Infer: ...@@ -107,7 +132,9 @@ Infer:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 426 resize_short: 438
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 384 size: 384
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -55,17 +57,38 @@ DataLoader: ...@@ -55,17 +57,38 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -83,6 +106,8 @@ DataLoader: ...@@ -83,6 +106,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -108,6 +133,8 @@ Infer: ...@@ -108,6 +133,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -54,18 +56,39 @@ DataLoader: ...@@ -54,18 +56,39 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 384 size: 384
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -82,7 +105,9 @@ DataLoader: ...@@ -82,7 +105,9 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 426 resize_short: 438
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 384 size: 384
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -107,7 +132,9 @@ Infer: ...@@ -107,7 +132,9 @@ Infer:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 426 resize_short: 438
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 384 size: 384
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -55,17 +57,38 @@ DataLoader: ...@@ -55,17 +57,38 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -83,6 +106,8 @@ DataLoader: ...@@ -83,6 +106,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -108,6 +133,8 @@ Infer: ...@@ -108,6 +133,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -55,17 +57,38 @@ DataLoader: ...@@ -55,17 +57,38 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -83,6 +106,8 @@ DataLoader: ...@@ -83,6 +106,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -108,6 +133,8 @@ Infer: ...@@ -108,6 +133,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -55,17 +57,38 @@ DataLoader: ...@@ -55,17 +57,38 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -83,6 +106,8 @@ DataLoader: ...@@ -83,6 +106,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -108,6 +133,8 @@ Infer: ...@@ -108,6 +133,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -22,25 +22,27 @@ Arch: ...@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 1e-3
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
DataLoader: DataLoader:
...@@ -55,17 +57,38 @@ DataLoader: ...@@ -55,17 +57,38 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -83,6 +106,8 @@ DataLoader: ...@@ -83,6 +106,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -92,7 +117,7 @@ DataLoader: ...@@ -92,7 +117,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 256
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -108,6 +133,8 @@ Infer: ...@@ -108,6 +133,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -122,9 +149,6 @@ Infer: ...@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -24,24 +24,28 @@ Arch: ...@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 20
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -59,15 +63,35 @@ DataLoader: ...@@ -59,15 +63,35 @@ DataLoader:
size: 384 size: 384
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -84,7 +108,9 @@ DataLoader: ...@@ -84,7 +108,9 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
size: [384, 384] resize_short: 438
- CropImage:
size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
...@@ -92,7 +118,7 @@ DataLoader: ...@@ -92,7 +118,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -107,7 +133,9 @@ Infer: ...@@ -107,7 +133,9 @@ Infer:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
size: [384, 384] resize_short: 438
- CropImage:
size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
...@@ -120,9 +148,6 @@ Infer: ...@@ -120,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -24,24 +24,28 @@ Arch: ...@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 20
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -59,15 +63,35 @@ DataLoader: ...@@ -59,15 +63,35 @@ DataLoader:
size: 224 size: 224
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -94,7 +118,7 @@ DataLoader: ...@@ -94,7 +118,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -124,9 +148,6 @@ Infer: ...@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -24,24 +24,28 @@ Arch: ...@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 20
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -59,15 +63,35 @@ DataLoader: ...@@ -59,15 +63,35 @@ DataLoader:
size: 384 size: 384
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -84,7 +108,9 @@ DataLoader: ...@@ -84,7 +108,9 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
size: [384, 384] resize_short: 438
- CropImage:
size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
...@@ -92,7 +118,7 @@ DataLoader: ...@@ -92,7 +118,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -107,7 +133,9 @@ Infer: ...@@ -107,7 +133,9 @@ Infer:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
size: [384, 384] resize_short: 438
- CropImage:
size: 384
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
...@@ -120,9 +148,6 @@ Infer: ...@@ -120,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -24,24 +24,28 @@ Arch: ...@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 20
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -59,15 +63,35 @@ DataLoader: ...@@ -59,15 +63,35 @@ DataLoader:
size: 224 size: 224
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -94,7 +118,7 @@ DataLoader: ...@@ -94,7 +118,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -124,9 +148,6 @@ Infer: ...@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -24,24 +24,28 @@ Arch: ...@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 20
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -59,15 +63,35 @@ DataLoader: ...@@ -59,15 +63,35 @@ DataLoader:
size: 224 size: 224
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -94,7 +118,7 @@ DataLoader: ...@@ -94,7 +118,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -124,9 +148,6 @@ Infer: ...@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -24,24 +24,28 @@ Arch: ...@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 20
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -59,15 +63,35 @@ DataLoader: ...@@ -59,15 +63,35 @@ DataLoader:
size: 224 size: 224
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -94,7 +118,7 @@ DataLoader: ...@@ -94,7 +118,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -124,9 +148,6 @@ Infer: ...@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -20,28 +20,34 @@ Global: ...@@ -20,28 +20,34 @@ Global:
Arch: Arch:
name: alt_gvt_base name: alt_gvt_base
class_num: 1000 class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.3
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -57,17 +63,39 @@ DataLoader: ...@@ -57,17 +63,39 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -85,6 +113,8 @@ DataLoader: ...@@ -85,6 +113,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -94,7 +124,7 @@ DataLoader: ...@@ -94,7 +124,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -110,6 +140,8 @@ Infer: ...@@ -110,6 +140,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -124,9 +156,6 @@ Infer: ...@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -20,28 +20,34 @@ Global: ...@@ -20,28 +20,34 @@ Global:
Arch: Arch:
name: alt_gvt_large name: alt_gvt_large
class_num: 1000 class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.5
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -57,17 +63,39 @@ DataLoader: ...@@ -57,17 +63,39 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -85,6 +113,8 @@ DataLoader: ...@@ -85,6 +113,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -94,7 +124,7 @@ DataLoader: ...@@ -94,7 +124,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -110,6 +140,8 @@ Infer: ...@@ -110,6 +140,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -124,9 +156,6 @@ Infer: ...@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -20,28 +20,34 @@ Global: ...@@ -20,28 +20,34 @@ Global:
Arch: Arch:
name: alt_gvt_small name: alt_gvt_small
class_num: 1000 class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.2
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -57,17 +63,39 @@ DataLoader: ...@@ -57,17 +63,39 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -85,6 +113,8 @@ DataLoader: ...@@ -85,6 +113,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -94,7 +124,7 @@ DataLoader: ...@@ -94,7 +124,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -110,6 +140,8 @@ Infer: ...@@ -110,6 +140,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -124,9 +156,6 @@ Infer: ...@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -20,28 +20,34 @@ Global: ...@@ -20,28 +20,34 @@ Global:
Arch: Arch:
name: pcpvt_base name: pcpvt_base
class_num: 1000 class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.3
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -57,17 +63,39 @@ DataLoader: ...@@ -57,17 +63,39 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -85,6 +113,8 @@ DataLoader: ...@@ -85,6 +113,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -94,7 +124,7 @@ DataLoader: ...@@ -94,7 +124,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -110,6 +140,8 @@ Infer: ...@@ -110,6 +140,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -124,9 +156,6 @@ Infer: ...@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -20,28 +20,34 @@ Global: ...@@ -20,28 +20,34 @@ Global:
Arch: Arch:
name: pcpvt_large name: pcpvt_large
class_num: 1000 class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.5
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -57,17 +63,39 @@ DataLoader: ...@@ -57,17 +63,39 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -85,6 +113,8 @@ DataLoader: ...@@ -85,6 +113,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -94,7 +124,7 @@ DataLoader: ...@@ -94,7 +124,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -110,6 +140,8 @@ Infer: ...@@ -110,6 +140,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -124,9 +156,6 @@ Infer: ...@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
...@@ -7,7 +7,7 @@ Global: ...@@ -7,7 +7,7 @@ Global:
save_interval: 1 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 120 epochs: 300
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -20,28 +20,34 @@ Global: ...@@ -20,28 +20,34 @@ Global:
Arch: Arch:
name: pcpvt_small name: pcpvt_small
class_num: 1000 class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.2
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
Train: Train:
- CELoss: - MixCELoss:
weight: 1.0 weight: 1.0
epsilon: 0.1
Eval: Eval:
- CELoss: - CELoss:
weight: 1.0 weight: 1.0
Optimizer: Optimizer:
name: Momentum name: AdamW
momentum: 0.9 beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr: lr:
name: Piecewise name: Cosine
learning_rate: 0.1 learning_rate: 5e-4
decay_epochs: [30, 60, 90] eta_min: 1e-5
values: [0.1, 0.01, 0.001, 0.0001] warmup_epoch: 5
regularizer: warmup_start_lr: 1e-6
name: 'L2'
coeff: 0.0001
# data loader for train and eval # data loader for train and eval
...@@ -57,17 +63,39 @@ DataLoader: ...@@ -57,17 +63,39 @@ DataLoader:
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 224 size: 224
interpolation: bicubic
backend: pil
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage: - NormalizeImage:
scale: 1.0/255.0 scale: 1.0/255.0
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: True shuffle: True
loader: loader:
...@@ -85,6 +113,8 @@ DataLoader: ...@@ -85,6 +113,8 @@ DataLoader:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -94,7 +124,7 @@ DataLoader: ...@@ -94,7 +124,7 @@ DataLoader:
order: '' order: ''
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 64 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
...@@ -110,6 +140,8 @@ Infer: ...@@ -110,6 +140,8 @@ Infer:
channel_first: False channel_first: False
- ResizeImage: - ResizeImage:
resize_short: 256 resize_short: 256
interpolation: bicubic
backend: pil
- CropImage: - CropImage:
size: 224 size: 224
- NormalizeImage: - NormalizeImage:
...@@ -124,9 +156,6 @@ Infer: ...@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric: Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval: Eval:
- TopkAcc: - TopkAcc:
topk: [1, 5] topk: [1, 5]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册