提交 dc4fdba0 编写于 作者: Y Yang Nie 提交者: Tingquan Gao

add MobileViTv3

上级 df31d808
......@@ -78,6 +78,7 @@ from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224
from .model_zoo.cvt import CvT_13_224, CvT_13_384, CvT_21_224, CvT_21_384, CvT_W24_384
from .model_zoo.micronet import MicroNet_M0, MicroNet_M1, MicroNet_M2, MicroNet_M3
from .model_zoo.mobilenext import MobileNeXt_x0_35, MobileNeXt_x0_5, MobileNeXt_x0_75, MobileNeXt_x1_0, MobileNeXt_x1_4
from .model_zoo.mobilevit_v3 import MobileViTv3_XXS, MobileViTv3_XS, MobileViTv3_S, MobileViTv3_x0_5, MobileViTv3_x0_75, MobileViTv3_x1_0
from .variant_models.resnet_variant import ResNet50_last_stage_stride1
from .variant_models.resnet_variant import ResNet50_adaptive_max_pool2d
......
此差异已折叠。
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 256]
save_inference_dir: ./inference
use_dali: False
# mixed precision training
AMP:
scale_loss: 65536
use_dynamic_loss_scaling: True
# O1: mixed fp16
level: O1
# model ema
EMA:
decay: 0.9995
# model architecture
Arch:
name: MobileViTv3_S
class_num: 1000
dropout: 0.1
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.01
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 1 # 3000 iterations
warmup_start_lr: 0.0002
# by_epoch: True
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiScaleDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 256
interpolation: bilinear
use_log_aspect: True
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
# support to specify width and height respectively:
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
sampler:
name: MultiScaleSampler
scales: [256, 160, 192, 224, 288, 320]
# first_bs: batch size for the first image resolution in the scales list
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: 48
divided_factor: 32
is_training: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: False
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 48
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 256]
save_inference_dir: ./inference
use_dali: False
# mixed precision training
AMP:
scale_loss: 65536
use_dynamic_loss_scaling: True
# O1: mixed fp16
level: O1
# model ema
EMA:
decay: 0.9995
# model architecture
Arch:
name: MobileViTv3_XS
class_num: 1000
dropout: 0.1
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.01
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 1 # 3000 iterations
warmup_start_lr: 0.0002
# by_epoch: True
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiScaleDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 256
interpolation: bilinear
use_log_aspect: True
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
# support to specify width and height respectively:
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
sampler:
name: MultiScaleSampler
scales: [256, 160, 192, 224, 288, 320]
# first_bs: batch size for the first image resolution in the scales list
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: 48
divided_factor: 32
is_training: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: False
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 48
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 256]
save_inference_dir: ./inference
use_dali: False
# mixed precision training
AMP:
scale_loss: 65536
use_dynamic_loss_scaling: True
# O1: mixed fp16
level: O1
# model ema
EMA:
decay: 0.9995
# model architecture
Arch:
name: MobileViTv3_XXS
class_num: 1000
dropout: 0.05
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.01
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 1 # 3000 iterations
warmup_start_lr: 0.0002
# by_epoch: True
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiScaleDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 256
interpolation: bilinear
use_log_aspect: True
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
# support to specify width and height respectively:
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
sampler:
name: MultiScaleSampler
scales: [256, 160, 192, 224, 288, 320]
# first_bs: batch size for the first image resolution in the scales list
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: 48
divided_factor: 32
is_training: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: False
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 48
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
# global configs
Global:
checkpoints: null
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
eval_during_train: True
eval_interval: 1
epochs: 300
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
image_shape: [3, 256, 256]
save_inference_dir: ./inference
use_dali: False
# mixed precision training
AMP:
scale_loss: 65536
use_dynamic_loss_scaling: True
# O1: mixed fp16
level: O1
# model ema
EMA:
decay: 0.9995
# model architecture
Arch:
name: MobileViTv3_x0_5
class_num: 1000
classifier_dropout: 0.
# loss function config for traing/eval process
Loss:
Train:
- CELoss:
weight: 1.0
epsilon: 0.1
Eval:
- CELoss:
weight: 1.0
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.01
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 1 # 3000 iterations
warmup_start_lr: 0.0002
# by_epoch: True
# data loader for train and eval
DataLoader:
Train:
dataset:
name: MultiScaleDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/train_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
channel_first: False
- RandCropImage:
size: 256
interpolation: bilinear
use_log_aspect: True
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
# support to specify width and height respectively:
# scales: [(256,256) (160,160), (192,192), (224,224) (288,288) (320,320)]
sampler:
name: MultiScaleSampler
scales: [256, 160, 192, 224, 288, 320]
# first_bs: batch size for the first image resolution in the scales list
# divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
first_bs: 48
divided_factor: 32
is_training: True
loader:
num_workers: 4
use_shared_memory: True
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/ILSVRC2012/
cls_label_path: ./dataset/ILSVRC2012/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: False
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
sampler:
name: DistributedBatchSampler
batch_size: 48
drop_last: False
shuffle: False
loader:
num_workers: 4
use_shared_memory: True
Infer:
infer_imgs: docs/images/inference_deployment/whl_demo.jpg
batch_size: 10
transforms:
- DecodeImage:
to_rgb: True
channel_first: False
- ResizeImage:
resize_short: 288
interpolation: bilinear
- CropImage:
size: 256
- NormalizeImage:
scale: 1.0/255.0
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
Metric:
Train:
- TopkAcc:
topk: [1, 5]
Eval:
- TopkAcc:
topk: [1, 5]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册