From 90d591e4103660b8b83e15e940e8d2011a1e9b6f Mon Sep 17 00:00:00 2001 From: zengshao0622 Date: Thu, 8 Sep 2022 11:34:54 +0000 Subject: [PATCH] update ppcls --- ppcls/arch/backbone/__init__.py | 2 +- ppcls/arch/backbone/model_zoo/cae.py | 176 +++--------------- .../CAE/cae_base_patch16_224_finetune.yaml | 27 ++- 3 files changed, 42 insertions(+), 163 deletions(-) diff --git a/ppcls/arch/backbone/__init__.py b/ppcls/arch/backbone/__init__.py index 1e2a32f9..49d47bb7 100644 --- a/ppcls/arch/backbone/__init__.py +++ b/ppcls/arch/backbone/__init__.py @@ -69,7 +69,7 @@ from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG from .model_zoo.van import VAN_tiny from .model_zoo.peleenet import PeleeNet from .model_zoo.convnext import ConvNeXt_tiny -from .model_zoo.cae import cae_base_patch16_224, cae_base_patch16_384, cae_large_patch16_224, cae_large_patch16_384, cae_large_patch16_512, cae_small_patch16_224 +from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224 from .variant_models.resnet_variant import ResNet50_last_stage_stride1 from .variant_models.vgg_variant import VGG19Sigmoid diff --git a/ppcls/arch/backbone/model_zoo/cae.py b/ppcls/arch/backbone/model_zoo/cae.py index 007a8adc..7b700a36 100644 --- a/ppcls/arch/backbone/model_zoo/cae.py +++ b/ppcls/arch/backbone/model_zoo/cae.py @@ -25,6 +25,17 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F +from ....utils.download import get_weights_path_from_url + +MODEL_URLS = { + "cae_base_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/cae_base_patch16_224_pretrained.pdparams", + "cae_large_patch16_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/cae_large_patch16_224_pretrained.pdparams" +} + +__all__ = list(MODEL_URLS.keys()) + def _ntuple(n): def parse(x): @@ -615,13 +626,22 @@ def _enable_linear_eval(model): def _load_pretrained(pretrained, + pretrained_url, model, model_keys, model_ema_configs, abs_pos_emb, rel_pos_bias, use_ssld=False): - checkpoint = paddle.load(pretrained) + if pretrained is False: + pass + elif pretrained is True: + local_weight_path = get_weights_path_from_url(pretrained_url).replace( + ".pdparams", "") + checkpoint = paddle.load(local_weight_path + ".pdparams") + elif isinstance(pretrained, str): + checkpoint = paddle.load(local_weight_path + ".pdparams") + checkpoint_model = None for model_key in model_keys.split('|'): if model_key in checkpoint: @@ -766,48 +786,15 @@ def _load_pretrained(pretrained, return -def cae_small_patch16_224(**kwargs): +def cae_base_patch16_224(pretrained=True, use_ssld=False, **kwargs): config = kwargs.copy() enable_linear_eval = config.pop('enable_linear_eval') model_keys = config.pop('model_key') model_ema_configs = config.pop('model_ema') abs_pos_emb = config.pop('abs_pos_emb') rel_pos_bias = config.pop('rel_pos_bias') - pretrained = config.pop('pretrained') - - model = VisionTransformer( - patch_size=16, - embed_dim=384, - depth=12, - num_heads=12, - mlp_ratio=4, - qkv_bias=True, - norm_layer=partial( - nn.LayerNorm, epsilon=1e-6), - **kwargs) - - if enable_linear_eval: - _enable_linear_eval(model) - - _load_pretrained( - pretrained, - model, - model_keys, - model_ema_configs, - abs_pos_emb, - rel_pos_bias, - use_ssld=False) - return model - - -def cae_base_patch16_224(**kwargs): - config = kwargs.copy() - enable_linear_eval = config.pop('enable_linear_eval') - model_keys = config.pop('model_key') - model_ema_configs = config.pop('model_ema') - abs_pos_emb = config.pop('abs_pos_emb') - rel_pos_bias = config.pop('rel_pos_bias') - pretrained = config.pop('pretrained') + if pretrained in config: + pretrained = config.pop('pretrained') model = VisionTransformer( patch_size=16, @@ -825,6 +812,7 @@ def cae_base_patch16_224(**kwargs): _load_pretrained( pretrained, + MODEL_URLS["cae_base_patch16_224"], model, model_keys, model_ema_configs, @@ -835,124 +823,17 @@ def cae_base_patch16_224(**kwargs): return model -def cae_base_patch16_384(**kwargs): - config = kwargs.copy() - enable_linear_eval = config.pop('enable_linear_eval') - model_keys = config.pop('model_key') - model_ema_configs = config.pop('model_ema') - abs_pos_emb = config.pop('abs_pos_emb') - rel_pos_bias = config.pop('rel_pos_bias') - pretrained = config.pop('pretrained') - - model = VisionTransformer( - img_size=384, - patch_size=16, - embed_dim=768, - depth=12, - num_heads=12, - mlp_ratio=4, - qkv_bias=True, - norm_layer=partial( - nn.LayerNorm, epsilon=1e-6), - **kwargs) - - if enable_linear_eval: - _enable_linear_eval(model) - - _load_pretrained( - pretrained, - model, - model_keys, - model_ema_configs, - abs_pos_emb, - rel_pos_bias, - use_ssld=False) - - return model - - -def cae_large_patch16_224(**kwargs): - config = kwargs.copy() - enable_linear_eval = config.pop('enable_linear_eval') - model_keys = config.pop('model_key') - model_ema_configs = config.pop('model_ema') - abs_pos_emb = config.pop('abs_pos_emb') - rel_pos_bias = config.pop('rel_pos_bias') - pretrained = config.pop('pretrained') - - model = VisionTransformer( - patch_size=16, - embed_dim=1024, - depth=24, - num_heads=16, - mlp_ratio=4, - qkv_bias=True, - norm_layer=partial( - nn.LayerNorm, epsilon=1e-6), - **kwargs) - - if enable_linear_eval: - _enable_linear_eval(model) - - _load_pretrained( - pretrained, - model, - model_keys, - model_ema_configs, - abs_pos_emb, - rel_pos_bias, - use_ssld=False) - - return model - - -def cae_large_patch16_384(**kwargs): - config = kwargs.copy() - enable_linear_eval = config.pop('enable_linear_eval') - model_keys = config.pop('model_key') - model_ema_configs = config.pop('model_ema') - abs_pos_emb = config.pop('abs_pos_emb') - rel_pos_bias = config.pop('rel_pos_bias') - pretrained = config.pop('pretrained') - - model = VisionTransformer( - img_size=384, - patch_size=16, - embed_dim=1024, - depth=24, - num_heads=16, - mlp_ratio=4, - qkv_bias=True, - norm_layer=partial( - nn.LayerNorm, epsilon=1e-6), - **kwargs) - - if enable_linear_eval: - _enable_linear_eval(model) - - _load_pretrained( - pretrained, - model, - model_keys, - model_ema_configs, - abs_pos_emb, - rel_pos_bias, - use_ssld=False) - - return model - - -def cae_large_patch16_512(**kwargs): +def cae_large_patch16_224(pretrained=True, use_ssld=False, **kwargs): config = kwargs.copy() enable_linear_eval = config.pop('enable_linear_eval') model_keys = config.pop('model_key') model_ema_configs = config.pop('model_ema') abs_pos_emb = config.pop('abs_pos_emb') rel_pos_bias = config.pop('rel_pos_bias') - pretrained = config.pop('pretrained') + if pretrained in config: + pretrained = config.pop('pretrained') model = VisionTransformer( - img_size=512, patch_size=16, embed_dim=1024, depth=24, @@ -968,6 +849,7 @@ def cae_large_patch16_512(**kwargs): _load_pretrained( pretrained, + MODEL_URLS["cae_large_patch16_224"], model, model_keys, model_ema_configs, diff --git a/ppcls/configs/CAE/cae_base_patch16_224_finetune.yaml b/ppcls/configs/CAE/cae_base_patch16_224_finetune.yaml index 053dbeeb..74be75c6 100644 --- a/ppcls/configs/CAE/cae_base_patch16_224_finetune.yaml +++ b/ppcls/configs/CAE/cae_base_patch16_224_finetune.yaml @@ -4,10 +4,10 @@ Global: pretrained_model: null output_dir: ./output/ device: gpu - save_interval: 1 + save_interval: 20 eval_during_train: True eval_interval: 1 - epochs: 200 + epochs: 100 print_batch_step: 10 use_visualdl: False # used for static mode and model export @@ -17,7 +17,7 @@ Global: # model architecture Arch: name: cae_base_patch16_224 - class_num: 4 + class_num: 102 drop_rate: 0.0 drop_path_rate: 0.1 attn_drop_rate: 0.0 @@ -39,7 +39,7 @@ Arch: enable_model_ema: False model_ema_decay: 0.9999 model_ema_force_cpu: False - pretrained: ./pretrained/vit_base_cae_pretrained.pdparams + pretrained: True # loss function config for traing/eval process Loss: @@ -60,7 +60,7 @@ Optimizer: layerwise_decay: 0.65 lr: name: Cosine - learning_rate: 0.004 + learning_rate: 0.001 eta_min: 1e-6 warmup_epoch: 10 warmup_start_lr: 1e-6 @@ -71,14 +71,14 @@ DataLoader: Train: dataset: name: ImageNetDataset - image_root: ./dataset/paddle-job-153869-0/train_eval_data/images - cls_label_path: ./dataset/paddle-job-153869-0/train_eval_data/train_data_list.txt + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/train_list.txt batch_transform_ops: - MixupCutmixHybrid: mixup_alpha: 0.8 cutmix_alpha: 1.0 switch_prob: 0.5 - num_classes: 4 + num_classes: 102 transform_ops: - DecodeImage: to_rgb: True @@ -99,11 +99,10 @@ DataLoader: sl: 0.02 sh: 0.3 r1: 0.3 - delimiter: ' ' sampler: name: DistributedBatchSampler - batch_size: 128 + batch_size: 2 drop_last: True shuffle: True loader: @@ -113,8 +112,8 @@ DataLoader: Eval: dataset: name: ImageNetDataset - image_root: ./dataset/paddle-job-153869-0/train_eval_data/images - cls_label_path: ./dataset/paddle-job-153869-0/train_eval_data/eval_data_list.txt + image_root: ./dataset/flowers102/ + cls_label_path: ./dataset/flowers102/val_list.txt transform_ops: - DecodeImage: to_rgb: True @@ -128,11 +127,9 @@ DataLoader: mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' - delimiter: ' ' - sampler: name: DistributedBatchSampler - batch_size: 128 + batch_size: 2 drop_last: False shuffle: False loader: -- GitLab