From d7bd27537914752bf097320fc7c635dbd89f56e9 Mon Sep 17 00:00:00 2001 From: zh-hike <1583124882@qq.com> Date: Fri, 14 Apr 2023 02:30:57 +0000 Subject: [PATCH] update foundation_vit from EVA_vit_huge to EVA_vit_giant --- .../models/Foundation_models/FoundationViT.md | 38 +++++++++---------- ppcls/arch/backbone/__init__.py | 2 +- .../arch/backbone/model_zoo/foundation_vit.py | 6 +-- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/zh_CN/models/Foundation_models/FoundationViT.md b/docs/zh_CN/models/Foundation_models/FoundationViT.md index ca4e7eca..e34bf2ba 100644 --- a/docs/zh_CN/models/Foundation_models/FoundationViT.md +++ b/docs/zh_CN/models/Foundation_models/FoundationViT.md @@ -13,7 +13,7 @@ ## 2. 使用说明 -以模型`CLIP_vit_base_patch16_224`为例,使用该模型以及对应的预训练权重进行特征提取的代码如下: +以模型 `CLIP_vit_base_patch16_224`为例,使用该模型以及对应的预训练权重进行特征提取的代码如下: ```python from ppcls.utils import config @@ -31,23 +31,23 @@ output = model(inputs) # the output of model embeding 目前支持的视觉大模型以及预训练权重如下: -| 系列 | 模型 | 模型大小 | embedding_size | 预训练数据集 | 权重下载 | -| :----: | :--------------------------------: | :------: | :------------: | :----------------------------------------------: | ------------------------------------------------------------ | -| CLIP | CLIP_vit_base_patch16_224 | 85M | 768 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch16_224.pdparams) | -| CLIP | CLIP_vit_base_patch32_224 | 87M | 768 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch32_224.pdparams) | -| CLIP | CLIP_vit_large_patch14_224 | 302M | 1024 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_224.pdparams) | -| CLIP | CLIP_vit_large_patch14_336 | 302M | 1024 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_336.pdparams) | -| BEiTv2 | BEiTv2_vit_base_patch16_224 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_base_patch16_224.pdparams) | -| BEiTv2 | BEiTv2_vit_base_patch16_224_ft21k | 85M | 768 | ImageNet-1k、ImageNet-21k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_base_patch16_224_ft21k.pdparams) | -| BEiTv2 | BEiTv2_vit_large_patch16_224 | 303M | 1024 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224.pdparams) | -| BEiTv2 | BEiTv2_vit_large_patch16_224_ft21k | 303M | 1024 | ImageNet-1k、ImageNet-21k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224_ft21k.pdparams) | -| MoCoV3 | MoCoV3_vit_small | 21M | 384 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MoCoV3_vit_small.pdparams) | -| MoCoV3 | MoCoV3_vit_base | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MoCoV3_vit_base.pdparams) | -| MAE | MAE_vit_base_patch16 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_base_patch16.pdparams) | -| MAE | MAE_vit_large_patch16 | 303M | 1024 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_large_patch16.pdparams) | -| MAE | MAE_vit_huge_patch14 | 630M | 1280 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_huge_patch14.pdparams) | -| EVA | EVA_vit_huge_patch14 | 1010M | 1408 | ImageNet-21k, CC12M, CC2M, Object365,COCO, ADE | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_huge_patch14.pdparams) | -| CAE | CAE_vit_base_patch16_224 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CAE_vit_base_patch16_224.pdparams) | +| 系列 | 模型 | 模型大小 | embedding_size | 预训练数据集 | 权重下载 | +| :----: | :--------------------------------: | :------: | :------------: | :----------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------- | +| CLIP | CLIP_vit_base_patch16_224 | 85M | 768 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch16_224.pdparams) | +| CLIP | CLIP_vit_base_patch32_224 | 87M | 768 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_base_patch32_224.pdparams) | +| CLIP | CLIP_vit_large_patch14_224 | 302M | 1024 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_224.pdparams) | +| CLIP | CLIP_vit_large_patch14_336 | 302M | 1024 | WIT | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CLIP_vit_large_patch14_336.pdparams) | +| BEiTv2 | BEiTv2_vit_base_patch16_224 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_base_patch16_224.pdparams) | +| BEiTv2 | BEiTv2_vit_base_patch16_224_ft21k | 85M | 768 | ImageNet-1k、ImageNet-21k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_base_patch16_224_ft21k.pdparams) | +| BEiTv2 | BEiTv2_vit_large_patch16_224 | 303M | 1024 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224.pdparams) | +| BEiTv2 | BEiTv2_vit_large_patch16_224_ft21k | 303M | 1024 | ImageNet-1k、ImageNet-21k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224_ft21k.pdparams) | +| MoCoV3 | MoCoV3_vit_small | 21M | 384 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MoCoV3_vit_small.pdparams) | +| MoCoV3 | MoCoV3_vit_base | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MoCoV3_vit_base.pdparams) | +| MAE | MAE_vit_base_patch16 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_base_patch16.pdparams) | +| MAE | MAE_vit_large_patch16 | 303M | 1024 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_large_patch16.pdparams) | +| MAE | MAE_vit_huge_patch14 | 630M | 1280 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MAE_vit_huge_patch14.pdparams) | +| EVA | EVA_vit_giant_patch14 | 1010M | 1408 | ImageNet-21k, CC12M, CC2M, Object365,COCO, ADE | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_giant_patch14.pdparams) | +| CAE | CAE_vit_base_patch16_224 | 85M | 768 | ImageNet-1k | [下载地址](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CAE_vit_base_patch16_224.pdparams) | ## 4. 参考文献 @@ -56,4 +56,4 @@ output = model(inputs) # the output of model embeding 3. [BEiT v2: Masked Image Modeling with Vector-Quantized Visual Tokenizers](https://arxiv.org/abs/2208.06366) 4. [CAE: Context Autoencoder for Self-Supervised Representation Learning](https://arxiv.org/abs/2202.03026) 5. [EVA: EVA: Exploring the Limits of Masked Visual Representation Learning at Scale](https://paperswithcode.com/paper/eva-exploring-the-limits-of-masked-visual) -6. [MAE: Masked Autoencoders Are Scalable Vision Learners](https://paperswithcode.com/paper/masked-autoencoders-are-scalable-vision) \ No newline at end of file +6. [MAE: Masked Autoencoders Are Scalable Vision Learners](https://paperswithcode.com/paper/masked-autoencoders-are-scalable-vision) diff --git a/ppcls/arch/backbone/__init__.py b/ppcls/arch/backbone/__init__.py index 55fa1484..0c5682ff 100644 --- a/ppcls/arch/backbone/__init__.py +++ b/ppcls/arch/backbone/__init__.py @@ -71,7 +71,7 @@ from .model_zoo.mobilevit import MobileViT_XXS, MobileViT_XS, MobileViT_S from .model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3, RepVGG_B3g4, RepVGG_D2se from .model_zoo.van import VAN_B0, VAN_B1, VAN_B2, VAN_B3 from .model_zoo.peleenet import PeleeNet -from .model_zoo.foundation_vit import CLIP_vit_base_patch32_224, CLIP_vit_base_patch16_224, CLIP_vit_large_patch14_336, CLIP_vit_large_patch14_224, BEiTv2_vit_base_patch16_224, BEiTv2_vit_large_patch16_224, CAE_vit_base_patch16_224, EVA_vit_huge_patch14, MOCOV3_vit_small, MOCOV3_vit_base, MAE_vit_huge_patch14, MAE_vit_large_patch16, MAE_vit_base_patch16 +from .model_zoo.foundation_vit import CLIP_vit_base_patch32_224, CLIP_vit_base_patch16_224, CLIP_vit_large_patch14_336, CLIP_vit_large_patch14_224, BEiTv2_vit_base_patch16_224, BEiTv2_vit_large_patch16_224, CAE_vit_base_patch16_224, EVA_vit_giant_patch14, MOCOV3_vit_small, MOCOV3_vit_base, MAE_vit_huge_patch14, MAE_vit_large_patch16, MAE_vit_base_patch16 from .model_zoo.convnext import ConvNeXt_tiny, ConvNeXt_small, ConvNeXt_base_224, ConvNeXt_base_384, ConvNeXt_large_224, ConvNeXt_large_384 from .model_zoo.nextvit import NextViT_small_224, NextViT_base_224, NextViT_large_224, NextViT_small_384, NextViT_base_384, NextViT_large_384 from .model_zoo.cae import cae_base_patch16_224, cae_large_patch16_224 diff --git a/ppcls/arch/backbone/model_zoo/foundation_vit.py b/ppcls/arch/backbone/model_zoo/foundation_vit.py index 66000516..39c2833d 100644 --- a/ppcls/arch/backbone/model_zoo/foundation_vit.py +++ b/ppcls/arch/backbone/model_zoo/foundation_vit.py @@ -40,8 +40,8 @@ MODEL_URLS = { "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/BEiTv2_vit_large_patch16_224.pdparams", "CAE_vit_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/CAE_vit_base_patch16_224.pdparams", - 'EVA_vit_huge_patch14': - "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_huge_patch14.pdparams", + 'EVA_vit_giant_patch14': + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/EVA_vit_giant_patch14.pdparams", "MOCOV3_vit_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/foundation_models/MOCOV3_vit_small.pdparams", "MOCOV3_vit_base": @@ -867,7 +867,7 @@ def MAE_vit_huge_patch14(pretrained=False, use_ssld=False, **kwargs): return model -def EVA_vit_huge_patch14(pretrained=False, use_ssld=False, **kwargs): +def EVA_vit_giant_patch14(pretrained=False, use_ssld=False, **kwargs): model_name = sys._getframe().f_code.co_name model = VisionTransformer( model_name=model_name, -- GitLab