提交 23f5af9f 编写于 作者: Z zh-hike 提交者: zengshao0622

add field vit to foundationvit's name

上级 dec7b024
...@@ -71,7 +71,7 @@ from .model_zoo.mobilevit import MobileViT_XXS, MobileViT_XS, MobileViT_S ...@@ -71,7 +71,7 @@ from .model_zoo.mobilevit import MobileViT_XXS, MobileViT_XS, MobileViT_S
from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3, RepVGG_B3g4, RepVGG_D2se from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3, RepVGG_B3g4, RepVGG_D2se
from .model_zoo.van import VAN_B0, VAN_B1, VAN_B2, VAN_B3 from .model_zoo.van import VAN_B0, VAN_B1, VAN_B2, VAN_B3
from .model_zoo.peleenet import PeleeNet from .model_zoo.peleenet import PeleeNet
from .model_zoo.foundation_vit import CLIP_base_patch32_224, CLIP_base_patch16_224, CLIP_large_patch14_336, CLIP_large_patch14_224, BEiTv2_base_patch16_224, BEiTv2_large_patch16_224, CAE_base_patch16_224, EVA_huge_patch14, MOCOV3_small, MOCOV3_base, MAE_huge_patch14, MAE_large_patch16, MAE_base_patch16 from .model_zoo.foundation_vit import CLIP_vit_base_patch32_224, CLIP_vit_base_patch16_224, CLIP_vit_large_patch14_336, CLIP_vit_large_patch14_224, BEiTv2_vit_base_patch16_224, BEiTv2_vit_large_patch16_224, CAE_vit_base_patch16_224, EVA_vit_huge_patch14, MOCOV3_vit_small, MOCOV3_vit_base, MAE_vit_huge_patch14, MAE_vit_large_patch16, MAE_vit_base_patch16
from .model_zoo.convnext import ConvNeXt_tiny, ConvNeXt_small, ConvNeXt_base_224, ConvNeXt_base_384, ConvNeXt_large_224, ConvNeXt_large_384 from .model_zoo.convnext import ConvNeXt_tiny, ConvNeXt_small, ConvNeXt_base_224, ConvNeXt_base_384, ConvNeXt_large_224, ConvNeXt_large_384
from .model_zoo.nextvit import NextViT_small_224, NextViT_base_224, NextViT_large_224, NextViT_small_384, NextViT_base_384, NextViT_large_384 from .model_zoo.nextvit import NextViT_small_224, NextViT_base_224, NextViT_large_224, NextViT_small_384, NextViT_base_384, NextViT_large_384
from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224 from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224
......
...@@ -26,19 +26,19 @@ from paddle.nn.initializer import TruncatedNormal, Constant, Normal ...@@ -26,19 +26,19 @@ from paddle.nn.initializer import TruncatedNormal, Constant, Normal
from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = { MODEL_URLS = {
"CLIP_base_patch32_224": None, "CLIP_vit_base_patch32_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch32_224.pdparams",
"CLIP_base_patch16_224": None, "CLIP_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch16_224.pdparams",
"CLIP_large_patch14_336": None, "CLIP_vit_large_patch14_336": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_336.pdparams",
"CLIP_large_patch14_224": None, "CLIP_vit_large_patch14_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_224.pdparams",
"BEiTv2_base_patch16_224": None, "BEiTv2_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_base_patch16_224.pdparams",
"BEiTv2_large_patch16_224": None, "BEiTv2_vit_large_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224.pdparams",
"CAE_base_patch16_224": None, "CAE_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CAE_vit_base_patch16_224.pdparams",
'EVA_huge_patch14':None, 'EVA_vit_huge_patch14':"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_huge_patch14.pdparams",
"MOCOV3_small": None, "MOCOV3_vit_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MOCOV3_vit_small.pdparams",
"MOCOV3_base": None, "MOCOV3_vit_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MOCOV3_vit_base.pdparams",
"MAE_huge_patch14": None, "MAE_vit_huge_patch14": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_huge_patch14.pdparams",
"MAE_large_patch16": None, "MAE_vit_large_patch16": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_large_patch16.pdparams",
"MAE_base_patch16": None "MAE_vit_base_patch16": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_base_patch16.pdparams",
} }
__all__ = list(MODEL_URLS.keys()) __all__ = list(MODEL_URLS.keys())
...@@ -48,8 +48,8 @@ _model_diff = None ...@@ -48,8 +48,8 @@ _model_diff = None
_CLIP_diff = { _CLIP_diff = {
'add_layer_norm_before_encoder': [ 'add_layer_norm_before_encoder': [
'base_patch32_224', 'base_patch16_224', 'large_patch14_336', 'base_vit_patch32_224', 'base_vit_patch16_224', 'large_vit_patch14_336',
'large_patch14_224' 'large_vit_patch14_224'
], ],
'add_relative_position_bias_in_msa': [], 'add_relative_position_bias_in_msa': [],
'add_shared_rel_pos_bias': [], 'add_shared_rel_pos_bias': [],
...@@ -57,8 +57,8 @@ _CLIP_diff = { ...@@ -57,8 +57,8 @@ _CLIP_diff = {
'remove_cls_token': [], 'remove_cls_token': [],
'remove_abs_pos_emb': [], 'remove_abs_pos_emb': [],
'replace_mlp_GELU': [ 'replace_mlp_GELU': [
'base_patch32_224', 'base_patch16_224', 'large_patch14_336', 'base_vit_patch32_224', 'base_vit_patch16_224', 'large_vit_patch14_336',
'large_patch14_224' 'large_vit_patch14_224'
], ],
'head': { 'head': {
'fc_norm': [], 'fc_norm': [],
...@@ -87,7 +87,7 @@ _CoCa_diff = { ...@@ -87,7 +87,7 @@ _CoCa_diff = {
'add_relative_position_bias_in_msa': [], 'add_relative_position_bias_in_msa': [],
'add_shared_rel_pos_bias': [], 'add_shared_rel_pos_bias': [],
'add_mul_gamma_to_msa_mlp': [], 'add_mul_gamma_to_msa_mlp': [],
'remove_cls_token': ['small_patch16_224'], 'remove_cls_token': ['small_vit_patch16_224'],
'remove_abs_pos_emb': [], 'remove_abs_pos_emb': [],
'replace_mlp_GELU': [], 'replace_mlp_GELU': [],
'head': { 'head': {
...@@ -100,11 +100,11 @@ _CoCa_diff = { ...@@ -100,11 +100,11 @@ _CoCa_diff = {
_BEiTv2_diff = { _BEiTv2_diff = {
'add_layer_norm_before_encoder': [], 'add_layer_norm_before_encoder': [],
'add_relative_position_bias_in_msa': 'add_relative_position_bias_in_msa':
['base_patch16_224', 'large_patch16_224'], ['base_vit_patch16_224', 'large_vit_patch16_224'],
'add_shared_rel_pos_bias': [], 'add_shared_rel_pos_bias': [],
'add_mul_gamma_to_msa_mlp': ['base_patch16_224', 'large_patch16_224'], 'add_mul_gamma_to_msa_mlp': ['base_vit_patch16_224', 'large_vit_patch16_224'],
'remove_cls_token': [], 'remove_cls_token': [],
'remove_abs_pos_emb': ['base_patch16_224', 'large_patch16_224'], 'remove_abs_pos_emb': ['base_vit_patch16_224', 'large_vit_patch16_224'],
'replace_mlp_GELU': [], 'replace_mlp_GELU': [],
'head': { 'head': {
'fc_norm': [], 'fc_norm': [],
...@@ -115,9 +115,9 @@ _BEiTv2_diff = { ...@@ -115,9 +115,9 @@ _BEiTv2_diff = {
_CAE_diff = { _CAE_diff = {
'add_layer_norm_before_encoder': [], 'add_layer_norm_before_encoder': [],
'add_relative_position_bias_in_msa': ['base_patch16_224'], 'add_relative_position_bias_in_msa': ['base_vit_patch16_224'],
'add_shared_rel_pos_bias': [], 'add_shared_rel_pos_bias': [],
'add_mul_gamma_to_msa_mlp': ['base_patch16_224'], 'add_mul_gamma_to_msa_mlp': ['base_vit_patch16_224'],
'remove_cls_token': [], 'remove_cls_token': [],
'remove_abs_pos_emb': [], 'remove_abs_pos_emb': [],
'replace_mlp_GELU': [], 'replace_mlp_GELU': [],
...@@ -137,7 +137,7 @@ _EVA_diff = { ...@@ -137,7 +137,7 @@ _EVA_diff = {
'remove_abs_pos_emb': [], 'remove_abs_pos_emb': [],
'replace_mlp_GELU': [], 'replace_mlp_GELU': [],
'head': { 'head': {
'fc_norm': ['huge_patch14'], 'fc_norm': ['huge_vit_patch14'],
'return_all_tokens': [], 'return_all_tokens': [],
'return_patch_tokens': [], 'return_patch_tokens': [],
} }
...@@ -152,7 +152,7 @@ _MAE_diff = { ...@@ -152,7 +152,7 @@ _MAE_diff = {
'remove_abs_pos_emb': [], 'remove_abs_pos_emb': [],
'replace_mlp_GELU': [], 'replace_mlp_GELU': [],
'head': { 'head': {
'fc_norm': ['huge_patch14'], 'fc_norm': ['huge_vit_patch14'],
'return_all_tokens': [], 'return_all_tokens': [],
'return_patch_tokens': [], 'return_patch_tokens': [],
} }
...@@ -659,7 +659,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): ...@@ -659,7 +659,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False):
) )
def CLIP_base_patch32_224(pretrained=False, use_ssld=False, **kwargs): def CLIP_vit_base_patch32_224(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -677,7 +677,7 @@ def CLIP_base_patch32_224(pretrained=False, use_ssld=False, **kwargs): ...@@ -677,7 +677,7 @@ def CLIP_base_patch32_224(pretrained=False, use_ssld=False, **kwargs):
return model return model
def CLIP_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): def CLIP_vit_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -695,7 +695,7 @@ def CLIP_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): ...@@ -695,7 +695,7 @@ def CLIP_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
return model return model
def CLIP_large_patch14_336(pretrained=False, use_ssld=False, **kwargs): def CLIP_vit_large_patch14_336(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -713,7 +713,7 @@ def CLIP_large_patch14_336(pretrained=False, use_ssld=False, **kwargs): ...@@ -713,7 +713,7 @@ def CLIP_large_patch14_336(pretrained=False, use_ssld=False, **kwargs):
return model return model
def CLIP_large_patch14_224(pretrained=False, use_ssld=False, **kwargs): def CLIP_vit_large_patch14_224(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -731,7 +731,7 @@ def CLIP_large_patch14_224(pretrained=False, use_ssld=False, **kwargs): ...@@ -731,7 +731,7 @@ def CLIP_large_patch14_224(pretrained=False, use_ssld=False, **kwargs):
return model return model
def BEiTv2_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): def BEiTv2_vit_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -749,7 +749,7 @@ def BEiTv2_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): ...@@ -749,7 +749,7 @@ def BEiTv2_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
return model return model
def BEiTv2_large_patch16_224(pretrained=False, use_ssld=False, **kwargs): def BEiTv2_vit_large_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -767,7 +767,7 @@ def BEiTv2_large_patch16_224(pretrained=False, use_ssld=False, **kwargs): ...@@ -767,7 +767,7 @@ def BEiTv2_large_patch16_224(pretrained=False, use_ssld=False, **kwargs):
return model return model
def MOCOV3_small(pretrained=False, use_ssld=False, **kwargs): def MOCOV3_vit_small(pretrained=False, use_ssld=False, **kwargs):
""" """
vit small in mocov3 vit small in mocov3
""" """
...@@ -786,7 +786,7 @@ def MOCOV3_small(pretrained=False, use_ssld=False, **kwargs): ...@@ -786,7 +786,7 @@ def MOCOV3_small(pretrained=False, use_ssld=False, **kwargs):
return model return model
def MOCOV3_base(pretrained=False, use_ssld=False, **kwargs): def MOCOV3_vit_base(pretrained=False, use_ssld=False, **kwargs):
""" """
vit base in mocov3 vit base in mocov3
""" """
...@@ -805,7 +805,7 @@ def MOCOV3_base(pretrained=False, use_ssld=False, **kwargs): ...@@ -805,7 +805,7 @@ def MOCOV3_base(pretrained=False, use_ssld=False, **kwargs):
return model return model
def MAE_base_patch16(pretrained=False, use_ssld=False, **kwargs): def MAE_vit_base_patch16(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -821,7 +821,7 @@ def MAE_base_patch16(pretrained=False, use_ssld=False, **kwargs): ...@@ -821,7 +821,7 @@ def MAE_base_patch16(pretrained=False, use_ssld=False, **kwargs):
return model return model
def MAE_large_patch16(pretrained=False, use_ssld=False, **kwargs): def MAE_vit_large_patch16(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -837,7 +837,7 @@ def MAE_large_patch16(pretrained=False, use_ssld=False, **kwargs): ...@@ -837,7 +837,7 @@ def MAE_large_patch16(pretrained=False, use_ssld=False, **kwargs):
return model return model
def MAE_huge_patch14(pretrained=False, use_ssld=False, **kwargs): def MAE_vit_huge_patch14(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -853,7 +853,7 @@ def MAE_huge_patch14(pretrained=False, use_ssld=False, **kwargs): ...@@ -853,7 +853,7 @@ def MAE_huge_patch14(pretrained=False, use_ssld=False, **kwargs):
return model return model
def EVA_huge_patch14(pretrained=False, use_ssld=False, **kwargs): def EVA_vit_huge_patch14(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
...@@ -871,7 +871,7 @@ def EVA_huge_patch14(pretrained=False, use_ssld=False, **kwargs): ...@@ -871,7 +871,7 @@ def EVA_huge_patch14(pretrained=False, use_ssld=False, **kwargs):
return model return model
def CAE_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): def CAE_vit_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model_name = sys._getframe().f_code.co_name model_name = sys._getframe().f_code.co_name
model = VisionTransformer( model = VisionTransformer(
model_name=model_name, model_name=model_name,
......
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ..model_zoo.foundation_vit import CLIP_large_patch14_224, _load_pretrained from ..model_zoo.foundation_vit import CLIP_vit_large_patch14_224, _load_pretrained
MODEL_URLS = { MODEL_URLS = {
"CLIP_large_patch14_224_aesthetic": "CLIP_large_patch14_224_aesthetic":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册