From 23f5af9f2aa1caa9fd84ea2744454255872044ec Mon Sep 17 00:00:00 2001 From: zh-hike <1583124882@qq.com> Date: Tue, 7 Feb 2023 09:25:23 +0000 Subject: [PATCH] add field vit to foundationvit's name --- ppcls/arch/backbone/__init__.py | 2 +- .../arch/backbone/model_zoo/foundation_vit.py | 76 +++++++++---------- .../variant_models/foundation_vit_variant.py | 2 +- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/ppcls/arch/backbone/__init__.py b/ppcls/arch/backbone/__init__.py index c1f19313..60eb9c7f 100644 --- a/ppcls/arch/backbone/__init__.py +++ b/ppcls/arch/backbone/__init__.py @@ -71,7 +71,7 @@ from .model_zoo.mobilevit import MobileViT_XXS, MobileViT_XS, MobileViT_S from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3, RepVGG_B3g4, RepVGG_D2se from .model_zoo.van import VAN_B0, VAN_B1, VAN_B2, VAN_B3 from .model_zoo.peleenet import PeleeNet -from .model_zoo.foundation_vit import CLIP_base_patch32_224, CLIP_base_patch16_224, CLIP_large_patch14_336, CLIP_large_patch14_224, BEiTv2_base_patch16_224, BEiTv2_large_patch16_224, CAE_base_patch16_224, EVA_huge_patch14, MOCOV3_small, MOCOV3_base, MAE_huge_patch14, MAE_large_patch16, MAE_base_patch16 +from .model_zoo.foundation_vit import CLIP_vit_base_patch32_224, CLIP_vit_base_patch16_224, CLIP_vit_large_patch14_336, CLIP_vit_large_patch14_224, BEiTv2_vit_base_patch16_224, BEiTv2_vit_large_patch16_224, CAE_vit_base_patch16_224, EVA_vit_huge_patch14, MOCOV3_vit_small, MOCOV3_vit_base, MAE_vit_huge_patch14, MAE_vit_large_patch16, MAE_vit_base_patch16 from .model_zoo.convnext import ConvNeXt_tiny, ConvNeXt_small, ConvNeXt_base_224, ConvNeXt_base_384, ConvNeXt_large_224, ConvNeXt_large_384 from .model_zoo.nextvit import NextViT_small_224, NextViT_base_224, NextViT_large_224, NextViT_small_384, NextViT_base_384, NextViT_large_384 from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224 diff --git a/ppcls/arch/backbone/model_zoo/foundation_vit.py b/ppcls/arch/backbone/model_zoo/foundation_vit.py index 1233db81..61e36371 100644 --- a/ppcls/arch/backbone/model_zoo/foundation_vit.py +++ b/ppcls/arch/backbone/model_zoo/foundation_vit.py @@ -26,19 +26,19 @@ from paddle.nn.initializer import TruncatedNormal, Constant, Normal from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { - "CLIP_base_patch32_224": None, - "CLIP_base_patch16_224": None, - "CLIP_large_patch14_336": None, - "CLIP_large_patch14_224": None, - "BEiTv2_base_patch16_224": None, - "BEiTv2_large_patch16_224": None, - "CAE_base_patch16_224": None, - 'EVA_huge_patch14':None, - "MOCOV3_small": None, - "MOCOV3_base": None, - "MAE_huge_patch14": None, - "MAE_large_patch16": None, - "MAE_base_patch16": None + "CLIP_vit_base_patch32_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch32_224.pdparams", + "CLIP_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch16_224.pdparams", + "CLIP_vit_large_patch14_336": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_336.pdparams", + "CLIP_vit_large_patch14_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_224.pdparams", + "BEiTv2_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_base_patch16_224.pdparams", + "BEiTv2_vit_large_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224.pdparams", + "CAE_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CAE_vit_base_patch16_224.pdparams", + 'EVA_vit_huge_patch14':"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_huge_patch14.pdparams", + "MOCOV3_vit_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MOCOV3_vit_small.pdparams", + "MOCOV3_vit_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MOCOV3_vit_base.pdparams", + "MAE_vit_huge_patch14": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_huge_patch14.pdparams", + "MAE_vit_large_patch16": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_large_patch16.pdparams", + "MAE_vit_base_patch16": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_base_patch16.pdparams", } __all__ = list(MODEL_URLS.keys()) @@ -48,8 +48,8 @@ _model_diff = None _CLIP_diff = { 'add_layer_norm_before_encoder': [ - 'base_patch32_224', 'base_patch16_224', 'large_patch14_336', - 'large_patch14_224' + 'base_vit_patch32_224', 'base_vit_patch16_224', 'large_vit_patch14_336', + 'large_vit_patch14_224' ], 'add_relative_position_bias_in_msa': [], 'add_shared_rel_pos_bias': [], @@ -57,8 +57,8 @@ _CLIP_diff = { 'remove_cls_token': [], 'remove_abs_pos_emb': [], 'replace_mlp_GELU': [ - 'base_patch32_224', 'base_patch16_224', 'large_patch14_336', - 'large_patch14_224' + 'base_vit_patch32_224', 'base_vit_patch16_224', 'large_vit_patch14_336', + 'large_vit_patch14_224' ], 'head': { 'fc_norm': [], @@ -87,7 +87,7 @@ _CoCa_diff = { 'add_relative_position_bias_in_msa': [], 'add_shared_rel_pos_bias': [], 'add_mul_gamma_to_msa_mlp': [], - 'remove_cls_token': ['small_patch16_224'], + 'remove_cls_token': ['small_vit_patch16_224'], 'remove_abs_pos_emb': [], 'replace_mlp_GELU': [], 'head': { @@ -100,11 +100,11 @@ _CoCa_diff = { _BEiTv2_diff = { 'add_layer_norm_before_encoder': [], 'add_relative_position_bias_in_msa': - ['base_patch16_224', 'large_patch16_224'], + ['base_vit_patch16_224', 'large_vit_patch16_224'], 'add_shared_rel_pos_bias': [], - 'add_mul_gamma_to_msa_mlp': ['base_patch16_224', 'large_patch16_224'], + 'add_mul_gamma_to_msa_mlp': ['base_vit_patch16_224', 'large_vit_patch16_224'], 'remove_cls_token': [], - 'remove_abs_pos_emb': ['base_patch16_224', 'large_patch16_224'], + 'remove_abs_pos_emb': ['base_vit_patch16_224', 'large_vit_patch16_224'], 'replace_mlp_GELU': [], 'head': { 'fc_norm': [], @@ -115,9 +115,9 @@ _BEiTv2_diff = { _CAE_diff = { 'add_layer_norm_before_encoder': [], - 'add_relative_position_bias_in_msa': ['base_patch16_224'], + 'add_relative_position_bias_in_msa': ['base_vit_patch16_224'], 'add_shared_rel_pos_bias': [], - 'add_mul_gamma_to_msa_mlp': ['base_patch16_224'], + 'add_mul_gamma_to_msa_mlp': ['base_vit_patch16_224'], 'remove_cls_token': [], 'remove_abs_pos_emb': [], 'replace_mlp_GELU': [], @@ -137,7 +137,7 @@ _EVA_diff = { 'remove_abs_pos_emb': [], 'replace_mlp_GELU': [], 'head': { - 'fc_norm': ['huge_patch14'], + 'fc_norm': ['huge_vit_patch14'], 'return_all_tokens': [], 'return_patch_tokens': [], } @@ -152,7 +152,7 @@ _MAE_diff = { 'remove_abs_pos_emb': [], 'replace_mlp_GELU': [], 'head': { - 'fc_norm': ['huge_patch14'], + 'fc_norm': ['huge_vit_patch14'], 'return_all_tokens': [], 'return_patch_tokens': [], } @@ -659,7 +659,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): ) -def CLIP_base_patch32_224(pretrained=False, use_ssld=False, **kwargs): +def CLIP_vit_base_patch32_224(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -677,7 +677,7 @@ def CLIP_base_patch32_224(pretrained=False, use_ssld=False, **kwargs): return model -def CLIP_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def CLIP_vit_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -695,7 +695,7 @@ def CLIP_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): return model -def CLIP_large_patch14_336(pretrained=False, use_ssld=False, **kwargs): +def CLIP_vit_large_patch14_336(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -713,7 +713,7 @@ def CLIP_large_patch14_336(pretrained=False, use_ssld=False, **kwargs): return model -def CLIP_large_patch14_224(pretrained=False, use_ssld=False, **kwargs): +def CLIP_vit_large_patch14_224(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -731,7 +731,7 @@ def CLIP_large_patch14_224(pretrained=False, use_ssld=False, **kwargs): return model -def BEiTv2_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def BEiTv2_vit_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -749,7 +749,7 @@ def BEiTv2_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): return model -def BEiTv2_large_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def BEiTv2_vit_large_patch16_224(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -767,7 +767,7 @@ def BEiTv2_large_patch16_224(pretrained=False, use_ssld=False, **kwargs): return model -def MOCOV3_small(pretrained=False, use_ssld=False, **kwargs): +def MOCOV3_vit_small(pretrained=False, use_ssld=False, **kwargs): """ vit small in mocov3 """ @@ -786,7 +786,7 @@ def MOCOV3_small(pretrained=False, use_ssld=False, **kwargs): return model -def MOCOV3_base(pretrained=False, use_ssld=False, **kwargs): +def MOCOV3_vit_base(pretrained=False, use_ssld=False, **kwargs): """ vit base in mocov3 """ @@ -805,7 +805,7 @@ def MOCOV3_base(pretrained=False, use_ssld=False, **kwargs): return model -def MAE_base_patch16(pretrained=False, use_ssld=False, **kwargs): +def MAE_vit_base_patch16(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -821,7 +821,7 @@ def MAE_base_patch16(pretrained=False, use_ssld=False, **kwargs): return model -def MAE_large_patch16(pretrained=False, use_ssld=False, **kwargs): +def MAE_vit_large_patch16(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -837,7 +837,7 @@ def MAE_large_patch16(pretrained=False, use_ssld=False, **kwargs): return model -def MAE_huge_patch14(pretrained=False, use_ssld=False, **kwargs): +def MAE_vit_huge_patch14(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -853,7 +853,7 @@ def MAE_huge_patch14(pretrained=False, use_ssld=False, **kwargs): return model -def EVA_huge_patch14(pretrained=False, use_ssld=False, **kwargs): +def EVA_vit_huge_patch14(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, @@ -871,7 +871,7 @@ def EVA_huge_patch14(pretrained=False, use_ssld=False, **kwargs): return model -def CAE_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): +def CAE_vit_base_patch16_224(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, diff --git a/ppcls/arch/backbone/variant_models/foundation_vit_variant.py b/ppcls/arch/backbone/variant_models/foundation_vit_variant.py index 1d46e5a9..c5866d8b 100644 --- a/ppcls/arch/backbone/variant_models/foundation_vit_variant.py +++ b/ppcls/arch/backbone/variant_models/foundation_vit_variant.py @@ -1,7 +1,7 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F -from ..model_zoo.foundation_vit import CLIP_large_patch14_224, _load_pretrained +from ..model_zoo.foundation_vit import CLIP_vit_large_patch14_224, _load_pretrained MODEL_URLS = { "CLIP_large_patch14_224_aesthetic": -- GitLab