提交 f5b32a02 编写于 作者: G gaotingquan 提交者: Tingquan Gao

fix: fix the training configs of deit, swin, twins

上级 ef2fd19b
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -55,17 +57,38 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -83,6 +106,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -108,6 +133,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -54,18 +56,39 @@ DataLoader:
to_rgb: True
channel_first: False
- RandCropImage:
size: 384
size: 384
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -82,7 +105,9 @@ DataLoader:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 426
resize_short: 438
interpolation: bicubic
backend: pil
- CropImage:
size: 384
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -107,7 +132,9 @@ Infer:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 426
resize_short: 438
interpolation: bicubic
backend: pil
- CropImage:
size: 384
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -55,17 +57,38 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -83,6 +106,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -108,6 +133,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -54,18 +56,39 @@ DataLoader:
to_rgb: True
channel_first: False
- RandCropImage:
size: 384
size: 384
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -82,7 +105,9 @@ DataLoader:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 426
resize_short: 438
interpolation: bicubic
backend: pil
- CropImage:
size: 384
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -107,7 +132,9 @@ Infer:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 426
resize_short: 438
interpolation: bicubic
backend: pil
- CropImage:
size: 384
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -55,17 +57,38 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -83,6 +106,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -108,6 +133,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -55,17 +57,38 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -83,6 +106,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -108,6 +133,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -55,17 +57,38 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -83,6 +106,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -108,6 +133,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -22,25 +22,27 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token pos_embed dist_token
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 1e-3
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
DataLoader:
......@@ -55,17 +57,38 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: True
loader:
......@@ -83,6 +106,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -92,7 +117,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 256
drop_last: False
shuffle: False
loader:
......@@ -108,6 +133,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -122,9 +149,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -59,15 +63,35 @@ DataLoader:
size: 384
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -84,7 +108,9 @@ DataLoader:
to_rgb: True
channel_first: False
- ResizeImage:
size: [384, 384]
resize_short: 438
- CropImage:
size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
......@@ -92,7 +118,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -107,7 +133,9 @@ Infer:
to_rgb: True
channel_first: False
- ResizeImage:
size: [384, 384]
resize_short: 438
- CropImage:
size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
......@@ -120,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -59,15 +63,35 @@ DataLoader:
size: 224
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -94,7 +118,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -59,15 +63,35 @@ DataLoader:
size: 384
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -84,7 +108,9 @@ DataLoader:
to_rgb: True
channel_first: False
- ResizeImage:
size: [384, 384]
resize_short: 438
- CropImage:
size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
......@@ -92,7 +118,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -107,7 +133,9 @@ Infer:
to_rgb: True
channel_first: False
- ResizeImage:
size: [384, 384]
resize_short: 438
- CropImage:
size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
......@@ -120,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -59,15 +63,35 @@ DataLoader:
size: 224
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -94,7 +118,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -59,15 +63,35 @@ DataLoader:
size: 224
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -94,7 +118,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -24,24 +24,28 @@ Arch:
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 20
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -59,15 +63,35 @@ DataLoader:
size: 224
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -94,7 +118,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -124,9 +148,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -20,28 +20,34 @@ Global:
Arch:
name: alt_gvt_base
class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.3
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -57,17 +63,39 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -85,6 +113,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -94,7 +124,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -110,6 +140,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -20,28 +20,34 @@ Global:
Arch:
name: alt_gvt_large
class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.5
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -57,17 +63,39 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -85,6 +113,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -94,7 +124,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -110,6 +140,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -20,28 +20,34 @@ Global:
Arch:
name: alt_gvt_small
class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.2
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -57,17 +63,39 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -85,6 +113,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -94,7 +124,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -110,6 +140,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -20,28 +20,34 @@ Global:
Arch:
name: pcpvt_base
class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.3
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -57,17 +63,39 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -85,6 +113,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -94,7 +124,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -110,6 +140,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -20,28 +20,34 @@ Global:
Arch:
name: pcpvt_large
class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.5
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -57,17 +63,39 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -85,6 +113,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -94,7 +124,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -110,6 +140,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
......@@ -7,7 +7,7 @@ Global:
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 120
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -20,28 +20,34 @@ Global:
Arch:
name: pcpvt_small
class_num: 1000
drop_rate: 0.0
drop_path_rate: 0.2
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
- MixCELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: Momentum
momentum: 0.9
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block
one_dim_param_no_weight_decay: True
lr:
name: Piecewise
learning_rate: 0.1
decay_epochs: [30, 60, 90]
values: [0.1, 0.01, 0.001, 0.0001]
regularizer:
name: 'L2'
coeff: 0.0001
name: Cosine
learning_rate: 5e-4
eta_min: 1e-5
warmup_epoch: 5
warmup_start_lr: 1e-6
# data loader for train and eval
......@@ -57,17 +63,39 @@ DataLoader:
channel_first: False
- RandCropImage:
size: 224
interpolation: bicubic
backend: pil
- RandFlipImage:
flip_code: 1
- TimmAutoAugment:
config_str: rand-m9-mstd0.5-inc1
interpolation: bicubic
img_size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.25
sl: 0.02
sh: 1.0/3.0
r1: 0.3
attempt: 10
use_log_aspect: True
mode: pixel
batch_transform_ops:
- OpSampler:
MixupOperator:
alpha: 0.8
prob: 0.5
CutmixOperator:
alpha: 1.0
prob: 0.5
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: True
loader:
......@@ -85,6 +113,8 @@ DataLoader:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -94,7 +124,7 @@ DataLoader:
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 64
batch_size: 128
drop_last: False
shuffle: False
loader:
......@@ -110,6 +140,8 @@ Infer:
channel_first: False
- ResizeImage:
resize_short: 256
interpolation: bicubic
backend: pil
- CropImage:
size: 224
- NormalizeImage:
......@@ -124,9 +156,6 @@ Infer:
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册