提交 90d591e4 编写于 作者: 悟、's avatar 悟、

update ppcls

上级 7b50ce65
......@@ -69,7 +69,7 @@ from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG
from .model_zoo.van import VAN_tiny
from .model_zoo.peleenet import PeleeNet
from .model_zoo.convnext import ConvNeXt_tiny
from .model_zoo.cae import cae_base_patch16_224, cae_base_patch16_384, cae_large_patch16_224, cae_large_patch16_384, cae_large_patch16_512, cae_small_patch16_224
from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224
from .variant_models.resnet_variant import ResNet50_last_stage_stride1
from .variant_models.vgg_variant import VGG19Sigmoid
......
......@@ -25,6 +25,17 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ....utils.download import get_weights_path_from_url
MODEL_URLS = {
"cae_base_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/cae_base_patch16_224_pretrained.pdparams",
"cae_large_patch16_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/cae_large_patch16_224_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
def _ntuple(n):
def parse(x):
......@@ -615,13 +626,22 @@ def _enable_linear_eval(model):
def _load_pretrained(pretrained,
pretrained_url,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False):
checkpoint = paddle.load(pretrained)
if pretrained is False:
pass
elif pretrained is True:
local_weight_path = get_weights_path_from_url(pretrained_url).replace(
".pdparams", "")
checkpoint = paddle.load(local_weight_path + ".pdparams")
elif isinstance(pretrained, str):
checkpoint = paddle.load(local_weight_path + ".pdparams")
checkpoint_model = None
for model_key in model_keys.split('|'):
if model_key in checkpoint:
......@@ -766,48 +786,15 @@ def _load_pretrained(pretrained,
return
def cae_small_patch16_224(**kwargs):
def cae_base_patch16_224(pretrained=True, use_ssld=False, **kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
patch_size=16,
embed_dim=384,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_base_patch16_224(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
if pretrained in config:
pretrained = config.pop('pretrained')
model = VisionTransformer(
patch_size=16,
......@@ -825,6 +812,7 @@ def cae_base_patch16_224(**kwargs):
_load_pretrained(
pretrained,
MODEL_URLS["cae_base_patch16_224"],
model,
model_keys,
model_ema_configs,
......@@ -835,124 +823,17 @@ def cae_base_patch16_224(**kwargs):
return model
def cae_base_patch16_384(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
img_size=384,
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_large_patch16_224(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_large_patch16_384(**kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
model = VisionTransformer(
img_size=384,
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(
nn.LayerNorm, epsilon=1e-6),
**kwargs)
if enable_linear_eval:
_enable_linear_eval(model)
_load_pretrained(
pretrained,
model,
model_keys,
model_ema_configs,
abs_pos_emb,
rel_pos_bias,
use_ssld=False)
return model
def cae_large_patch16_512(**kwargs):
def cae_large_patch16_224(pretrained=True, use_ssld=False, **kwargs):
config = kwargs.copy()
enable_linear_eval = config.pop('enable_linear_eval')
model_keys = config.pop('model_key')
model_ema_configs = config.pop('model_ema')
abs_pos_emb = config.pop('abs_pos_emb')
rel_pos_bias = config.pop('rel_pos_bias')
pretrained = config.pop('pretrained')
if pretrained in config:
pretrained = config.pop('pretrained')
model = VisionTransformer(
img_size=512,
patch_size=16,
embed_dim=1024,
depth=24,
......@@ -968,6 +849,7 @@ def cae_large_patch16_512(**kwargs):
_load_pretrained(
pretrained,
MODEL_URLS["cae_large_patch16_224"],
model,
model_keys,
model_ema_configs,
......
......@@ -4,10 +4,10 @@ Global:
pretrained_model: null
output_dir: ./output/
device: gpu
save_interval: 1
save_interval: 20
eval_during_train: True
eval_interval: 1
epochs: 200
epochs: 100
print_batch_step: 10
use_visualdl: False
# used for static mode and model export
......@@ -17,7 +17,7 @@ Global:
# model architecture
Arch:
name: cae_base_patch16_224
class_num: 4
class_num: 102
drop_rate: 0.0
drop_path_rate: 0.1
attn_drop_rate: 0.0
......@@ -39,7 +39,7 @@ Arch:
enable_model_ema: False
model_ema_decay: 0.9999
model_ema_force_cpu: False
pretrained: ./pretrained/vit_base_cae_pretrained.pdparams
pretrained: True
# loss function config for traing/eval process
Loss:
......@@ -60,7 +60,7 @@ Optimizer:
layerwise_decay: 0.65
lr:
name: Cosine
learning_rate: 0.004
learning_rate: 0.001
eta_min: 1e-6
warmup_epoch: 10
warmup_start_lr: 1e-6
......@@ -71,14 +71,14 @@ DataLoader:
Train:
dataset:
name: ImageNetDataset
image_root: ./dataset/paddle-job-153869-0/train_eval_data/images
cls_label_path: ./dataset/paddle-job-153869-0/train_eval_data/train_data_list.txt
image_root: ./dataset/flowers102/
cls_label_path: ./dataset/flowers102/train_list.txt
batch_transform_ops:
- MixupCutmixHybrid:
mixup_alpha: 0.8
cutmix_alpha: 1.0
switch_prob: 0.5
num_classes: 4
num_classes: 102
transform_ops:
- DecodeImage:
to_rgb: True
......@@ -99,11 +99,10 @@ DataLoader:
sl: 0.02
sh: 0.3
r1: 0.3
delimiter: ' '
sampler:
name: DistributedBatchSampler
batch_size: 128
batch_size: 2
drop_last: True
shuffle: True
loader:
......@@ -113,8 +112,8 @@ DataLoader:
Eval:
dataset:
name: ImageNetDataset
image_root: ./dataset/paddle-job-153869-0/train_eval_data/images
cls_label_path: ./dataset/paddle-job-153869-0/train_eval_data/eval_data_list.txt
image_root: ./dataset/flowers102/
cls_label_path: ./dataset/flowers102/val_list.txt
transform_ops:
- DecodeImage:
to_rgb: True
......@@ -128,11 +127,9 @@ DataLoader:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
delimiter: ' '
sampler:
name: DistributedBatchSampler
batch_size: 128
batch_size: 2
drop_last: False
shuffle: False
loader:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册