提交 90d591e4 编写于 作者: 悟、's avatar 悟、

update ppcls

上级 7b50ce65
...@@ -69,7 +69,7 @@ from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG ...@@ -69,7 +69,7 @@ from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG
from .model_zoo.van import VAN_tiny from .model_zoo.van import VAN_tiny
from .model_zoo.peleenet import PeleeNet from .model_zoo.peleenet import PeleeNet
from .model_zoo.convnext import ConvNeXt_tiny from .model_zoo.convnext import ConvNeXt_tiny
from .model_zoo.cae import cae_base_patch16_224, cae_base_patch16_384, cae_large_patch16_224, cae_large_patch16_384, cae_large_patch16_512, cae_small_patch16_224 from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224
from .variant_models.resnet_variant import ResNet50_last_stage_stride1 from .variant_models.resnet_variant import ResNet50_last_stage_stride1
from .variant_models.vgg_variant import VGG19Sigmoid from .variant_models.vgg_variant import VGG19Sigmoid
......
...@@ -25,6 +25,17 @@ import paddle ...@@ -25,6 +25,17 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ....utils.download import get_weights_path_from_url
MODEL_URLS = {
"cae_base_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/cae_base_patch16_224_pretrained.pdparams",
"cae_large_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/cae_large_patch16_224_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
def _ntuple(n): def _ntuple(n):
def parse(x): def parse(x):
...@@ -615,13 +626,22 @@ def _enable_linear_eval(model): ...@@ -615,13 +626,22 @@ def _enable_linear_eval(model):
def _load_pretrained(pretrained, def _load_pretrained(pretrained,
pretrained_url,
model, model,
model_keys, model_keys,
model_ema_configs, model_ema_configs,
abs_pos_emb, abs_pos_emb,
rel_pos_bias, rel_pos_bias,
use_ssld=False): use_ssld=False):
checkpoint = paddle.load(pretrained) if pretrained is False:
pass
elif pretrained is True:
local_weight_path = get_weights_path_from_url(pretrained_url).replace(
".pdparams", "")
checkpoint = paddle.load(local_weight_path + ".pdparams")
elif isinstance(pretrained, str):
checkpoint = paddle.load(local_weight_path + ".pdparams")
checkpoint_model = None checkpoint_model = None
for model_key in model_keys.split('|'): for model_key in model_keys.split('|'):
if model_key in checkpoint: if model_key in checkpoint:
...@@ -766,48 +786,15 @@ def _load_pretrained(pretrained, ...@@ -766,48 +786,15 @@ def _load_pretrained(pretrained,
return return
def cae_small_patch16_224(**kwargs): def cae_base_patch16_224(pretrained=True, use_ssld=False, **kwargs):
config = kwargs.copy() config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval') enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key') model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema') model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb') abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias') rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained') if pretrained in config:
pretrained = config.pop('pretrained')
model = VisionTransformer(
patch_size=16,
embed_dim=384,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_base_patch16_224(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer( model = VisionTransformer(
patch_size=16, patch_size=16,
...@@ -825,6 +812,7 @@ def cae_base_patch16_224(**kwargs): ...@@ -825,6 +812,7 @@ def cae_base_patch16_224(**kwargs):
_load_pretrained( _load_pretrained(
pretrained, pretrained,
MODEL_URLS["cae_base_patch16_224"],
model, model,
model_keys, model_keys,
model_ema_configs, model_ema_configs,
...@@ -835,124 +823,17 @@ def cae_base_patch16_224(**kwargs): ...@@ -835,124 +823,17 @@ def cae_base_patch16_224(**kwargs):
return model return model
def cae_base_patch16_384(**kwargs): def cae_large_patch16_224(pretrained=True, use_ssld=False, **kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
img_size=384,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_large_patch16_224(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_large_patch16_384(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
img_size=384,
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_large_patch16_512(**kwargs):
config = kwargs.copy() config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval') enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key') model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema') model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb') abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias') rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained') if pretrained in config:
pretrained = config.pop('pretrained')
model = VisionTransformer( model = VisionTransformer(
img_size=512,
patch_size=16, patch_size=16,
embed_dim=1024, embed_dim=1024,
depth=24, depth=24,
...@@ -968,6 +849,7 @@ def cae_large_patch16_512(**kwargs): ...@@ -968,6 +849,7 @@ def cae_large_patch16_512(**kwargs):
_load_pretrained( _load_pretrained(
pretrained, pretrained,
MODEL_URLS["cae_large_patch16_224"],
model, model,
model_keys, model_keys,
model_ema_configs, model_ema_configs,
......
...@@ -4,10 +4,10 @@ Global: ...@@ -4,10 +4,10 @@ Global:
pretrained_model: null pretrained_model: null
output_dir: ./output/ output_dir: ./output/
device: gpu device: gpu
save_interval: 1 save_interval: 20
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 200 epochs: 100
print_batch_step: 10 print_batch_step: 10
use_visualdl: False use_visualdl: False
# used for static mode and model export # used for static mode and model export
...@@ -17,7 +17,7 @@ Global: ...@@ -17,7 +17,7 @@ Global:
# model architecture # model architecture
Arch: Arch:
name: cae_base_patch16_224 name: cae_base_patch16_224
class_num: 4 class_num: 102
drop_rate: 0.0 drop_rate: 0.0
drop_path_rate: 0.1 drop_path_rate: 0.1
attn_drop_rate: 0.0 attn_drop_rate: 0.0
...@@ -39,7 +39,7 @@ Arch: ...@@ -39,7 +39,7 @@ Arch:
enable_model_ema: False enable_model_ema: False
model_ema_decay: 0.9999 model_ema_decay: 0.9999
model_ema_force_cpu: False model_ema_force_cpu: False
pretrained: ./pretrained/vit_base_cae_pretrained.pdparams pretrained: True
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
...@@ -60,7 +60,7 @@ Optimizer: ...@@ -60,7 +60,7 @@ Optimizer:
layerwise_decay: 0.65 layerwise_decay: 0.65
lr: lr:
name: Cosine name: Cosine
learning_rate: 0.004 learning_rate: 0.001
eta_min: 1e-6 eta_min: 1e-6
warmup_epoch: 10 warmup_epoch: 10
warmup_start_lr: 1e-6 warmup_start_lr: 1e-6
...@@ -71,14 +71,14 @@ DataLoader: ...@@ -71,14 +71,14 @@ DataLoader:
Train: Train:
dataset: dataset:
name: ImageNetDataset name: ImageNetDataset
image_root: ./dataset/paddle-job-153869-0/train_eval_data/images image_root: ./dataset/flowers102/
cls_label_path: ./dataset/paddle-job-153869-0/train_eval_data/train_data_list.txt cls_label_path: ./dataset/flowers102/train_list.txt
batch_transform_ops: batch_transform_ops:
- MixupCutmixHybrid: - MixupCutmixHybrid:
mixup_alpha: 0.8 mixup_alpha: 0.8
cutmix_alpha: 1.0 cutmix_alpha: 1.0
switch_prob: 0.5 switch_prob: 0.5
num_classes: 4 num_classes: 102
transform_ops: transform_ops:
- DecodeImage: - DecodeImage:
to_rgb: True to_rgb: True
...@@ -99,11 +99,10 @@ DataLoader: ...@@ -99,11 +99,10 @@ DataLoader:
sl: 0.02 sl: 0.02
sh: 0.3 sh: 0.3
r1: 0.3 r1: 0.3
delimiter: ' '
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 128 batch_size: 2
drop_last: True drop_last: True
shuffle: True shuffle: True
loader: loader:
...@@ -113,8 +112,8 @@ DataLoader: ...@@ -113,8 +112,8 @@ DataLoader:
Eval: Eval:
dataset: dataset:
name: ImageNetDataset name: ImageNetDataset
image_root: ./dataset/paddle-job-153869-0/train_eval_data/images image_root: ./dataset/flowers102/
cls_label_path: ./dataset/paddle-job-153869-0/train_eval_data/eval_data_list.txt cls_label_path: ./dataset/flowers102/val_list.txt
transform_ops: transform_ops:
- DecodeImage: - DecodeImage:
to_rgb: True to_rgb: True
...@@ -128,11 +127,9 @@ DataLoader: ...@@ -128,11 +127,9 @@ DataLoader:
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225] std: [0.229, 0.224, 0.225]
order: '' order: ''
delimiter: ' '
sampler: sampler:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 128 batch_size: 2
drop_last: False drop_last: False
shuffle: False shuffle: False
loader: loader:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册