diff --git a/docs/zh_CN/models/ImageNet1k/ConvNeXt.md b/docs/zh_CN/models/ImageNet1k/ConvNeXt.md index cfc7b701137676b6ebb654e748fd0d9d6989171d..4b976c1b932a2d7bde744ceb072a74a8298d926b 100644 --- a/docs/zh_CN/models/ImageNet1k/ConvNeXt.md +++ b/docs/zh_CN/models/ImageNet1k/ConvNeXt.md @@ -35,6 +35,11 @@ ConvNeXt(Cross Stage Partial Network)系列模型是 Meta 在 2022 年提出 | Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) | |:--:|:--:|:--:|:--:|:--:|:--:|:--:| | ConvNeXt_tiny | 0.8203 | 0.9590 | 0.821 | - | 4.458 | 28.583 | +| ConvNeXt_small | 0.8313 | 0.9643 | 0.831 | - | 8.688 | 50.210 | +| ConvNeXt_base_224 | 0.8384 | 0.9676 | 0.838 | - | 15.360 | 88.573 | +| ConvNeXt_base_384 | 0.8490 | 0.9727 | 0.851 | - | 45.138 | 88.573 | +| ConvNeXt_large_224 | 0.8426 | 0.9690 | 0.843 | - | 34.340 | 197.740 | +| ConvNeXt_large_384 | 0.8527 | 0.9749 | 0.855 | - | 101.001 | 197.740 | ### 1.3 Benchmark diff --git a/docs/zh_CN/models/ImageNet1k/README.md b/docs/zh_CN/models/ImageNet1k/README.md index a9b2149014d696d8d4078328b92910afa92368c0..92a7e473ac6f11a1b1c771cfab3760cd0c37bed4 100644 --- a/docs/zh_CN/models/ImageNet1k/README.md +++ b/docs/zh_CN/models/ImageNet1k/README.md @@ -329,8 +329,11 @@ ResNeSt 系列模型的精度、速度指标如下表所示,更多关于该系 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| -| ResNeSt50_
fast_1s1x64d | 0.8035 | 0.9528 | 2.73 | 5.33 | 8.24 | 4.36 | 26.27 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_fast_1s1x64d_infer.tar) | -| ResNeSt50 | 0.8083 | 0.9542 | 7.36 | 10.23 | 13.84 | 5.40 | 27.54 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_infer.tar) | +| ResNeSt50_
fast_1s1x64d | 0.8061 | 0.9527 | 2.73 | 5.33 | 8.24 | 4.36 | 26.27 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_fast_1s1x64d_infer.tar) | +| ResNeSt50 | 0.8102 | 0.9546 | 7.36 | 10.23 | 13.84 | 5.40 | 27.54 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_infer.tar) | +| ResNeSt101 | 0.8279 | 0.9642 | | | | 10.25 | 48.40 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt101_infer.tar) | +| ResNeSt200 | 0.8418 | 0.9698 | | | | 17.50 | 70.41 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt200_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt200_infer.tar) | +| ResNeSt269 | 0.8444 |0.9698 | | | | 22.54 | 111.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt269_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt269_infer.tar) | @@ -340,7 +343,19 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| -| RegNetX_4GF | 0.785 | 0.9416 | 6.46 | 8.48 | 11.45 | 4.00 | 22.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_4GF_infer.tar) | +| RegNetX_200MF | 0.680 | 0.8842 | | | | 0.20 | 2.74 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_200MF_infer.tar) | +| RegNetX_400MF | 0.723 | 0.9078 | | | | 0.40 | 5.19 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_400MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_400MF_infer.tar) | +| RegNetX_600MF | 0.737 | 0.9198 | | | | 0.61 | 6.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_600MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_600MF_infer.tar) | +| RegNetX_800MF | 0.751 | 0.9250 | | | | 0.81 | 7.30 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_800MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_800MF_infer.tar) | +| RegNetX_1600MF | 0.767 | 0.9329 | | | | 1.62 | 9.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_1600MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_1600MF_infer.tar) | +| RegNetX_3200MF | 0.781 | 0.9413 | | | | 3.20 | 15.36 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_3200MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_3200MF_infer.tar) | +| RegNetX_4GF | 0.785 | 0.9416 | 6.46 | 8.48 | 11.45 | 3.99 | 22.16 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_4GF_infer.tar) | +| RegNetX_6400MF | 0.790 | 0.9461 | | | | 6.49 | 26.28 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_6400MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_6400MF_infer.tar) | +| RegNetX_8GF | 0.793 | 0.9464 | | | | 8.02 | 39.66 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_8GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_8GF_infer.tar) | +| RegNetX_12GF | 0.797 | 0.9501 | | | | 12.13 | 46.20 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_12GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_12GF_infer.tar) | +| RegNetX_16GF | 0.801 | 0.9505 | | | | 15.99 | 54.39 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_16GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_16GF_infer.tar) | +| RegNetX_32GF | 0.803 | 0.9526 | | | | 32.33 | 130.67 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_32GF_infer.tar) | + @@ -359,7 +374,9 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系 | RepVGG_B1g2 | 0.7732 | 0.9359 | | | | 8.82 | 41.36 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B1g2_infer.tar) | | RepVGG_B1g4 | 0.7675 | 0.9335 | | | | 7.31 | 36.13 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B1g4_infer.tar) | | RepVGG_B2g4 | 0.7881 | 0.9448 | | | | 11.34 | 55.78 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B2g4_infer.tar) | -| RepVGG_B3g4 | 0.7965 | 0.9485 | | | | 16.07 | 75.63 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B3g4_infer.tar) | +| RepVGG_B3 | 0.8031 | 0.9517 | | | | 29.16 | 123.19 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B3_infer.tar) | +| RepVGG_B3g4 | 0.8005 | 0.9502 | | | | 17.89 | 83.93 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B3g4_infer.tar) | +| RepVGG_D2se | 0.8339 | 0.9665 | | | | 36.54 | 133.47 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_D2se_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_D2se_infer.tar) | @@ -441,6 +458,11 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | | ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | ConvNeXt_tiny | 0.8203 | 0.9590 | - | - | - | 4.458 | 28.583 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_tiny_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_tiny_infer.tar) | +| ConvNeXt_small | 0.8313 | 0.9643 | - | - | - | 8.688 | 50.210 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_small_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_small_infer.tar) | +| ConvNeXt_base_224 | 0.8384 | 0.9676 | - | - | - | 15.360 | 88.573 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_224_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_base_224_infer.tar) | +| ConvNeXt_base_384 | 0.8490 | 0.9727 | - | - | - | 45.138 | 88.573 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_base_384_infer.tar) | +| ConvNeXt_large_224 | 0.8426 | 0.9690 | - | - | - | 34.340 | 197.740 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_224_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_large_224_infer.tar) | +| ConvNeXt_large_384 | 0.8527 | 0.9749 | - | - | - | 101.001 | 197.740 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_large_384_infer.tar) | @@ -451,6 +473,9 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | | ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | VAN_B0 | 0.7535 | 0.9299 | - | - | - | 0.880 | 4.110 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B0_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B0_infer.tar) | +| VAN_B1 | 0.8102 | 0.9562 | - | - | - | 2.518 | 13.869 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B1_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B1_infer.tar) | +| VAN_B2 | 0.8280 | 0.9620 | - | - | - | 5.032 | 26.592 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B2_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B2_infer.tar) | +| VAN_B3 | 0.8389 | 0.9668 | - | - | - | 8.987 | 44.790 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B3_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B3_infer.tar) | @@ -699,7 +724,8 @@ DeiT(Data-efficient Image Transformers)系列模型的精度、速度指标 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | | ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| TNT_small | 0.8121 |0.9563 | | | 4.83 | 23.68 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/TNT_small_infer.tar) | +| TNT_small | 0.8148 |0.9580 | | | 4.83 | 23.69 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/TNT_small_infer.tar) | +| TNT_base | 0.8276 |0.9617 | | | 13.40 | 65.30 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_base_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/TNT_base_infer.tar) | **注**:TNT 模型的数据预处理部分 `NormalizeImage` 中的 `mean` 与 `std` 均为 0.5。 diff --git a/docs/zh_CN/models/ImageNet1k/RegNet.md b/docs/zh_CN/models/ImageNet1k/RegNet.md index db138dec3c17e141af4a11d9fb7768803f4a09c0..7bde331682a2070418a46ce3f033a181d70a6738 100644 --- a/docs/zh_CN/models/ImageNet1k/RegNet.md +++ b/docs/zh_CN/models/ImageNet1k/RegNet.md @@ -35,7 +35,18 @@ RegNet 是由 facebook 于 2020 年提出,旨在深化设计空间理念的概 | Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) | |:--:|:--:|:--:|:--:|:--:|:--:|:--:| -| RegNetX_4GF | 0.7850 | 0.9416| 0.7860 | -| 8.0 | 22.1 | +| RegNetX_200MF | 0.6804 | 0.8842| 0.6821 | -| 0.2 | 2.7 | +| RegNetX_400MF | 0.7225 | 0.9078| 0.7228 | -| 0.4 | 5.2 | +| RegNetX_600MF | 0.7366 | 0.9198| 0.7286 | -| 0.6 | 6.2 | +| RegNetX_800MF | 0.7512 | 0.9250| 0.7494 | -| 0.8 | 7.3 | +| RegNetX_1600MF | 0.7673 | 0.9329| 0.7671 | -| 1.6 | 9.2 | +| RegNetX_3200MF | 0.7809 | 0.9413| 0.7819 | -| 3.2 | 15.3 | +| RegNetX_4GF | 0.7850 | 0.9416| 0.7860 | -| 4.0 | 22.2 | +| RegNetX_6400MF | 0.7897 | 0.9461| 0.7915 | -| 6.5 | 26.2 | +| RegNetX_8GF | 0.7928 | 0.9464| 0.7938 | -| 8.0 | 39.7 | +| RegNetX_12GF | 0.7972 | 0.9501| 0.8000 | -| 12.1 | 46.2 | +| RegNetX_16GF | 0.8013 | 0.9505| 0.8012 | -| 16.0 | 54.4 | +| RegNetX_32GF | 0.8032 | 0.9526| 0.8052 | -| 32.33 | 130.67 | ### 1.3 Benchmark diff --git a/docs/zh_CN/models/ImageNet1k/RepVGG.md b/docs/zh_CN/models/ImageNet1k/RepVGG.md index afdb9efd455c09a59d0e80a597e2ff5451ccad71..8542d7fe81d29f25d6843584e7daacdefd916592 100644 --- a/docs/zh_CN/models/ImageNet1k/RepVGG.md +++ b/docs/zh_CN/models/ImageNet1k/RepVGG.md @@ -41,7 +41,9 @@ RepVGG(Making VGG-style ConvNets Great Again)系列模型是由清华大学(丁 | RepVGG_B1g2 | 0.7732 | 0.9359 | 0.7778 | - | - | - | | RepVGG_B1g4 | 0.7675 | 0.9335 | 0.7758 | - | - | - | | RepVGG_B2g4 | 0.7881 | 0.9448 | 0.7938 | - | - | - | -| RepVGG_B3g4 | 0.7965 | 0.9485 | 0.8021 | - | - | - | +| RepVGG_B3 | 0.8031 | 0.9517 | 0.8052 | - | - | - | +| RepVGG_B3g4 | 0.8005 | 0.9502 | 0.8021 | - | - | - | +| RepVGG_D2se | 0.8339 | 0.9665 | 0.8355 | - | - | - | 关于 Params、FLOPs、Inference speed 等信息,敬请期待。 diff --git a/docs/zh_CN/models/ImageNet1k/ResNeSt.md b/docs/zh_CN/models/ImageNet1k/ResNeSt.md index 8a7664474015337d41cce21bd7539f7f6bf400db..51c7255b6178bcf20bc28c43f3e43e2a96ef874e 100644 --- a/docs/zh_CN/models/ImageNet1k/ResNeSt.md +++ b/docs/zh_CN/models/ImageNet1k/ResNeSt.md @@ -35,8 +35,11 @@ ResNeSt 系列模型是在 2020 年提出的,在原有的 resnet 网络结构 | Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) | |:--:|:--:|:--:|:--:|:--:|:--:|:--:| -| ResNeSt50_fast_1s1x64d | 0.8035 | 0.9528| 0.8035 | -| 8.68 | 26.3 | -| ResNeSt50 | 0.8083 | 0.9542| 0.8113 | -| 10.78 | 27.5 | +| ResNeSt50_fast_1s1x64d | 0.8061 | 0.9527| 0.8035 | -| 5.40 | 26.3 | +| ResNeSt50 | 0.8102 | 0.9546| 0.8103 | -| 5.40 | 27.5 | +| ResNeSt101 | 0.8279 | 0.9642| 0.8283 | -| 10.25 | 48.4 | +| ResNeSt200 | 0.8418 | 0.9698| 0.8384 | -| 17.50 | 70.4 | +| ResNeSt269 | 0.8444 | 0.9698| 0.8454 | -| 22.54 | 111.2 | ### 1.3 Benchmark diff --git a/docs/zh_CN/models/ImageNet1k/TNT.md b/docs/zh_CN/models/ImageNet1k/TNT.md index 1d0f7b211cdaf75e18c9c4c666c08ffc37dfcd8d..4705e1aeb381c4f3da3223f14581270bda9f58ab 100644 --- a/docs/zh_CN/models/ImageNet1k/TNT.md +++ b/docs/zh_CN/models/ImageNet1k/TNT.md @@ -34,7 +34,8 @@ PaddleClas 所提供的该系列模型的预训练模型权重,均是基于其 | Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) | |:--:|:--:|:--:|:--:|:--:|:--:|:--:| -| TNT_small | 0.8121 | 0.9563 | - | - | 5.2 | 23.8 | +| TNT_small | 0.8148 | 0.9580 | 0.815 | - | 4.8 | 23.7 | +| TNT_base | 0.8276 | 0.9617 | 0.829 | - | 13.4 | 65.3 | **备注:** PaddleClas 所提供的该系列模型的预训练模型权重,均是基于其官方提供的权重转得。 diff --git a/docs/zh_CN/models/ImageNet1k/VAN.md b/docs/zh_CN/models/ImageNet1k/VAN.md index e85320e34bbf90680362c5cae8f354fe8446c76f..c3c7946514ba43ef1846097ce47991cfad39dadd 100644 --- a/docs/zh_CN/models/ImageNet1k/VAN.md +++ b/docs/zh_CN/models/ImageNet1k/VAN.md @@ -35,6 +35,9 @@ VAN(Visual Attention Network)系列模型是在 2022 年提出的 CNN 架构 | Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) | |:--:|:--:|:--:|:--:|:--:|:--:|:--:| | VAN-B0 | 0.7535 | 0.9299 | 0.754 | - | 0.880 | 4.110 | +| VAN-B1 | 0.8102 | 0.9562 | 0.811 | - | 2.518 | 13.869 | +| VAN-B2 | 0.8280 | 0.9620 | 0.828 | - | 5.032 | 26.592 | +| VAN-B3 | 0.8389 | 0.9668 | 0.839 | - | 8.987 | 44.790 | ### 1.3 Benchmark diff --git a/ppcls/arch/backbone/model_zoo/convnext.py b/ppcls/arch/backbone/model_zoo/convnext.py index d089d5e01cc4313a3cf61a1c4f88a6fedae21ce9..13e73de2290d449ce057ef6aee86a57220487e6b 100644 --- a/ppcls/arch/backbone/model_zoo/convnext.py +++ b/ppcls/arch/backbone/model_zoo/convnext.py @@ -23,6 +23,16 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro MODEL_URLS = { "ConvNeXt_tiny": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_tiny_pretrained.pdparams", + "ConvNeXt_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_small_pretrained.pdparams", + "ConvNeXt_base_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_224_pretrained.pdparams", + "ConvNeXt_base_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_384_pretrained.pdparams", + "ConvNeXt_large_224": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_224_pretrained.pdparams", + "ConvNeXt_large_384": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_384_pretrained.pdparams" } __all__ = list(MODEL_URLS.keys()) @@ -231,3 +241,42 @@ def ConvNeXt_tiny(pretrained=False, use_ssld=False, **kwargs): _load_pretrained( pretrained, model, MODEL_URLS["ConvNeXt_tiny"], use_ssld=use_ssld) return model + + +def ConvNeXt_small(pretrained=False, use_ssld=False, **kwargs): + model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ConvNeXt_small"], use_ssld=use_ssld) + return model + + +def ConvNeXt_base_224(pretrained=False, use_ssld=False, **kwargs): + model = ConvNeXt( + depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ConvNeXt_base_224"], use_ssld=use_ssld) + return model + + +def ConvNeXt_base_384(pretrained=False, use_ssld=False, **kwargs): + model = ConvNeXt( + depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ConvNeXt_base_384"], use_ssld=use_ssld) + return model + + +def ConvNeXt_large_224(pretrained=False, use_ssld=False, **kwargs): + model = ConvNeXt( + depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ConvNeXt_large_224"], use_ssld=use_ssld) + return model + + +def ConvNeXt_large_384(pretrained=False, use_ssld=False, **kwargs): + model = ConvNeXt( + depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ConvNeXt_large_384"], use_ssld=use_ssld) + return model \ No newline at end of file diff --git a/ppcls/arch/backbone/model_zoo/regnet.py b/ppcls/arch/backbone/model_zoo/regnet.py index d55163e92d59afde9baa92a937c58f3cccd71a91..151e39439a0c7560bd51cdbca1e72e6ac38bf702 100644 --- a/ppcls/arch/backbone/model_zoo/regnet.py +++ b/ppcls/arch/backbone/model_zoo/regnet.py @@ -34,8 +34,26 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro MODEL_URLS = { "RegNetX_200MF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams", + "RegNetX_400MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_400MF_pretrained.pdparams", + "RegNetX_600MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_600MF_pretrained.pdparams", + "RegNetX_800MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_800MF_pretrained.pdparams", + "RegNetX_1600MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_1600MF_pretrained.pdparams", + "RegNetX_3200MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_3200MF_pretrained.pdparams", "RegNetX_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams", + "RegNetX_6400MF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_6400MF_pretrained.pdparams", + "RegNetX_8GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_8GF_pretrained.pdparams", + "RegNetX_12GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_12GF_pretrained.pdparams", + "RegNetX_16GF": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_16GF_pretrained.pdparams", "RegNetX_32GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams", "RegNetY_200MF": @@ -43,7 +61,7 @@ MODEL_URLS = { "RegNetY_4GF": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams", "RegNetY_32GF": - "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams", + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams" } __all__ = list(MODEL_URLS.keys()) @@ -106,7 +124,7 @@ class ConvBNLayer(nn.Layer): padding=padding, groups=groups, weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"), - bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0")) + bias_attr=False) bn_name = name + "_bn" self._batch_norm = BatchNorm( num_filters, @@ -354,6 +372,81 @@ def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs): return model +def RegNetX_400MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=24.48, + w_0=24, + w_m=2.54, + d=22, + group_w=16, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_400MF"], use_ssld=use_ssld) + return model + + +def RegNetX_600MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=36.97, + w_0=48, + w_m=2.24, + d=16, + group_w=24, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_600MF"], use_ssld=use_ssld) + return model + + +def RegNetX_800MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=35.73, + w_0=56, + w_m=2.28, + d=16, + group_w=16, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_800MF"], use_ssld=use_ssld) + return model + + +def RegNetX_1600MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=34.01, + w_0=80, + w_m=2.25, + d=18, + group_w=24, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_1600MF"], use_ssld=use_ssld) + return model + + +def RegNetX_3200MF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=26.31, + w_0=88, + w_m=2.25, + d=25, + group_w=48, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_3200MF"], use_ssld=use_ssld) + return model + + def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs): model = RegNet( w_a=38.65, @@ -369,63 +462,75 @@ def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs): return model -def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs): +def RegNetX_6400MF(pretrained=False, use_ssld=False, **kwargs): model = RegNet( - w_a=69.86, - w_0=320, - w_m=2.0, + w_a=60.83, + w_0=184, + w_m=2.07, + d=17, + group_w=56, + bot_mul=1.0, + q=8, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RegNetX_6400MF"], use_ssld=use_ssld) + return model + + +def RegNetX_8GF(pretrained=False, use_ssld=False, **kwargs): + model = RegNet( + w_a=49.56, + w_0=80, + w_m=2.88, d=23, - group_w=168, + group_w=120, bot_mul=1.0, q=8, **kwargs) _load_pretrained( - pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + pretrained, model, MODEL_URLS["RegNetX_8GF"], use_ssld=use_ssld) return model -def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs): +def RegNetX_12GF(pretrained=False, use_ssld=False, **kwargs): model = RegNet( - w_a=36.44, - w_0=24, - w_m=2.49, - d=13, - group_w=8, + w_a=73.36, + w_0=168, + w_m=2.37, + d=19, + group_w=112, bot_mul=1.0, q=8, - se_on=True, **kwargs) _load_pretrained( - pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + pretrained, model, MODEL_URLS["RegNetX_12GF"], use_ssld=use_ssld) return model -def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs): +def RegNetX_16GF(pretrained=False, use_ssld=False, **kwargs): model = RegNet( - w_a=31.41, - w_0=96, - w_m=2.24, + w_a=55.59, + w_0=216, + w_m=2.1, d=22, - group_w=64, + group_w=128, bot_mul=1.0, q=8, - se_on=True, **kwargs) _load_pretrained( - pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) + pretrained, model, MODEL_URLS["RegNetX_16GF"], use_ssld=use_ssld) return model -def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs): +def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs): model = RegNet( - w_a=115.89, - w_0=232, - w_m=2.53, - d=20, - group_w=232, + w_a=69.86, + w_0=320, + w_m=2.0, + d=23, + group_w=168, bot_mul=1.0, q=8, - se_on=True, **kwargs) _load_pretrained( pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld) diff --git a/ppcls/arch/backbone/model_zoo/repvgg.py b/ppcls/arch/backbone/model_zoo/repvgg.py index b30098277b523e50da11108e77127b179345bd34..2dbf7191d617b0796d55c1d5e776477c53564d3f 100644 --- a/ppcls/arch/backbone/model_zoo/repvgg.py +++ b/ppcls/arch/backbone/model_zoo/repvgg.py @@ -17,6 +17,7 @@ import paddle.nn as nn import paddle +import paddle.nn.functional as F import numpy as np from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url @@ -40,8 +41,12 @@ MODEL_URLS = { "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams", "RepVGG_B2g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams", + "RepVGG_B3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams", "RepVGG_B3g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams", + "RepVGG_D2se": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_D2se_pretrained.pdparams" } __all__ = list(MODEL_URLS.keys()) @@ -76,6 +81,33 @@ class ConvBN(nn.Layer): return y +class SEBlock(nn.Layer): + def __init__(self, input_channels, internal_neurons): + super(SEBlock, self).__init__() + self.down = nn.Conv2D( + in_channels=input_channels, + out_channels=internal_neurons, + kernel_size=1, + stride=1, + bias_attr=True) + self.up = nn.Conv2D( + in_channels=internal_neurons, + out_channels=input_channels, + kernel_size=1, + stride=1, + bias_attr=True) + self.input_channels = input_channels + + def forward(self, inputs): + x = F.avg_pool2d(inputs, kernel_size=inputs.shape[3]) + x = self.down(x) + x = F.relu(x) + x = self.up(x) + x = F.sigmoid(x) + x = x.reshape([-1, self.input_channels, 1, 1]) + return inputs * x + + class RepVGGBlock(nn.Layer): def __init__(self, in_channels, @@ -85,7 +117,8 @@ class RepVGGBlock(nn.Layer): padding=0, dilation=1, groups=1, - padding_mode='zeros'): + padding_mode='zeros', + use_se=False): super(RepVGGBlock, self).__init__() self.is_repped = False @@ -105,6 +138,11 @@ class RepVGGBlock(nn.Layer): self.nonlinearity = nn.ReLU() + if use_se: + self.se = SEBlock( + out_channels, internal_neurons=out_channels // 16) + else: + self.se = nn.Identity() self.rbr_identity = nn.BatchNorm2D( num_features=in_channels ) if out_channels == in_channels and stride == 1 else None @@ -132,7 +170,7 @@ class RepVGGBlock(nn.Layer): else: id_out = self.rbr_identity(inputs) return self.nonlinearity( - self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) + self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)) def rep(self): if not hasattr(self, 'rbr_reparam'): @@ -198,14 +236,12 @@ class RepVGG(nn.Layer): num_blocks, width_multiplier=None, override_groups_map=None, - class_num=1000): + class_num=1000, + use_se=False): super(RepVGG, self).__init__() - assert len(width_multiplier) == 4 self.override_groups_map = override_groups_map or dict() - assert 0 not in self.override_groups_map - self.in_planes = min(64, int(64 * width_multiplier[0])) self.stage0 = RepVGGBlock( @@ -213,20 +249,33 @@ class RepVGG(nn.Layer): out_channels=self.in_planes, kernel_size=3, stride=2, - padding=1) + padding=1, + use_se=use_se) self.cur_layer_idx = 1 self.stage1 = self._make_stage( - int(64 * width_multiplier[0]), num_blocks[0], stride=2) + int(64 * width_multiplier[0]), + num_blocks[0], + stride=2, + use_se=use_se) self.stage2 = self._make_stage( - int(128 * width_multiplier[1]), num_blocks[1], stride=2) + int(128 * width_multiplier[1]), + num_blocks[1], + stride=2, + use_se=use_se) self.stage3 = self._make_stage( - int(256 * width_multiplier[2]), num_blocks[2], stride=2) + int(256 * width_multiplier[2]), + num_blocks[2], + stride=2, + use_se=use_se) self.stage4 = self._make_stage( - int(512 * width_multiplier[3]), num_blocks[3], stride=2) + int(512 * width_multiplier[3]), + num_blocks[3], + stride=2, + use_se=use_se) self.gap = nn.AdaptiveAvgPool2D(output_size=1) self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num) - def _make_stage(self, planes, num_blocks, stride): + def _make_stage(self, planes, num_blocks, stride, use_se=False): strides = [stride] + [1] * (num_blocks - 1) blocks = [] for stride in strides: @@ -238,7 +287,8 @@ class RepVGG(nn.Layer): kernel_size=3, stride=stride, padding=1, - groups=cur_groups)) + groups=cur_groups, + use_se=use_se)) self.in_planes = planes self.cur_layer_idx += 1 return nn.Sequential(*blocks) @@ -367,6 +417,17 @@ def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs): return model +def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[4, 6, 16, 1], + width_multiplier=[3, 3, 3, 5], + override_groups_map=None, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld) + return model + + def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs): model = RepVGG( num_blocks=[4, 6, 16, 1], @@ -376,3 +437,15 @@ def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs): _load_pretrained( pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld) return model + + +def RepVGG_D2se(pretrained=False, use_ssld=False, **kwargs): + model = RepVGG( + num_blocks=[8, 14, 24, 1], + width_multiplier=[2.5, 2.5, 2.5, 5], + override_groups_map=None, + use_se=True, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["RepVGG_D2se"], use_ssld=use_ssld) + return model diff --git a/ppcls/arch/backbone/model_zoo/resnest.py b/ppcls/arch/backbone/model_zoo/resnest.py index c9f7850fbade0537704efd0cecadb7df07b28b8e..ab15d65dade8f871969d446467aca41ab3a79249 100644 --- a/ppcls/arch/backbone/model_zoo/resnest.py +++ b/ppcls/arch/backbone/model_zoo/resnest.py @@ -39,6 +39,10 @@ MODEL_URLS = { "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams", "ResNeSt101": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams", + "ResNeSt200": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt200_pretrained.pdparams", + "ResNeSt269": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt269_pretrained.pdparams" } __all__ = list(MODEL_URLS.keys()) @@ -160,8 +164,7 @@ class SplatConv(nn.Layer): padding=0, groups=groups, weight_attr=ParamAttr( - name=name + "_weights", initializer=KaimingNormal()), - bias_attr=False) + name=name + "_weights", initializer=KaimingNormal())) self.rsoftmax = rSoftmax(radix=radix, cardinality=groups) @@ -739,3 +742,39 @@ def ResNeSt101(pretrained=False, use_ssld=False, **kwargs): _load_pretrained( pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld) return model + + +def ResNeSt200(pretrained=False, use_ssld=False, **kwargs): + model = ResNeSt( + layers=[3, 24, 36, 3], + radix=2, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=64, + avg_down=True, + avd=True, + avd_first=False, + final_drop=0.0, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeSt200"], use_ssld=use_ssld) + return model + + +def ResNeSt269(pretrained=False, use_ssld=False, **kwargs): + model = ResNeSt( + layers=[3, 30, 48, 8], + radix=2, + groups=1, + bottleneck_width=64, + deep_stem=True, + stem_width=64, + avg_down=True, + avd=True, + avd_first=False, + final_drop=0.0, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["ResNeSt269"], use_ssld=use_ssld) + return model diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py index b9d8327098db724d3514987ffac6cb4091753caa..8d650085a8c8ff54055b00ee27730321be0a0828 100644 --- a/ppcls/arch/backbone/model_zoo/tnt.py +++ b/ppcls/arch/backbone/model_zoo/tnt.py @@ -20,7 +20,6 @@ import numpy as np import paddle import paddle.nn as nn - from paddle.nn.initializer import TruncatedNormal, Constant from ..base.theseus_layer import Identity @@ -28,7 +27,9 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro MODEL_URLS = { "TNT_small": - "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams" + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams", + "TNT_base": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_base_pretrained.pdparams" } __all__ = MODEL_URLS.keys() @@ -38,6 +39,14 @@ zeros_ = Constant(value=0.) ones_ = Constant(value=1.) +class Identity(nn.Layer): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + def drop_path(x, drop_prob=0., training=False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... @@ -165,8 +174,10 @@ class Block(nn.Layer): act_layer=act_layer, drop=drop) - self.norm1_proj = norm_layer(in_dim) - self.proj = nn.Linear(in_dim * num_pixel, dim) + self.norm1_proj = norm_layer(in_dim * num_pixel) + self.proj = nn.Linear(in_dim * num_pixel, dim, bias_attr=False) + self.norm2_proj = norm_layer(in_dim * num_pixel) + # Outer transformer self.norm_out = norm_layer(dim) self.attn_out = Attention( @@ -196,11 +207,10 @@ class Block(nn.Layer): self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))) # outer B, N, C = patch_embed.shape - norm1_proj = self.norm1_proj(pixel_embed) - norm1_proj = norm1_proj.reshape( - (B, N - 1, norm1_proj.shape[1] * norm1_proj.shape[2])) - patch_embed[:, 1:] = paddle.add(patch_embed[:, 1:], - self.proj(norm1_proj)) + norm1_proj = pixel_embed.reshape(shape=[B, N - 1, C]) + norm1_proj = self.norm1_proj(norm1_proj) + patch_embed[:, 1:] = paddle.add( + patch_embed[:, 1:], self.norm2_proj(self.proj(norm1_proj))) patch_embed = paddle.add( patch_embed, self.drop_path(self.attn_out(self.norm_out(patch_embed)))) @@ -217,6 +227,7 @@ class PixelEmbed(nn.Layer): in_dim=48, stride=4): super().__init__() + self.patch_size = patch_size num_patches = (img_size // patch_size)**2 self.img_size = img_size self.num_patches = num_patches @@ -230,14 +241,12 @@ class PixelEmbed(nn.Layer): def forward(self, x, pixel_pos): B, C, H, W = x.shape assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})." - - x = self.proj(x) - x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size) + x = nn.functional.unfold(x, self.patch_size, self.patch_size) x = x.transpose((0, 2, 1)).reshape( - (-1, self.in_dim, self.new_patch_size, self.new_patch_size)) + (-1, C, self.patch_size, self.patch_size)) + x = self.proj(x) + x = x.reshape((-1, self.in_dim, self.patch_size)).transpose((0, 2, 1)) x = x + pixel_pos - x = x.reshape((-1, self.in_dim, self.new_patch_size * - self.new_patch_size)).transpose((0, 2, 1)) return x @@ -288,8 +297,7 @@ class TNT(nn.Layer): self.add_parameter("patch_pos", self.patch_pos) self.pixel_pos = self.create_parameter( - shape=(1, in_dim, new_patch_size, new_patch_size), - default_initializer=zeros_) + shape=(1, patch_size, in_dim), default_initializer=zeros_) self.add_parameter("pixel_pos", self.pixel_pos) self.pos_drop = nn.Dropout(p=drop_rate) @@ -345,7 +353,6 @@ class TNT(nn.Layer): (self.cls_token.expand((B, -1, -1)), patch_embed), axis=1) patch_embed = patch_embed + self.patch_pos patch_embed = self.pos_drop(patch_embed) - for blk in self.blocks: pixel_embed, patch_embed = blk(pixel_embed, patch_embed) @@ -385,3 +392,17 @@ def TNT_small(pretrained=False, use_ssld=False, **kwargs): _load_pretrained( pretrained, model, MODEL_URLS["TNT_small"], use_ssld=use_ssld) return model + + +def TNT_base(pretrained=False, use_ssld=False, **kwargs): + model = TNT(patch_size=16, + embed_dim=640, + in_dim=40, + depth=12, + num_heads=10, + in_num_head=4, + qkv_bias=False, + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["TNT_base"], use_ssld=use_ssld) + return model diff --git a/ppcls/arch/backbone/model_zoo/van.py b/ppcls/arch/backbone/model_zoo/van.py index ea7c88853d249534e4dd108188b3d96ad04ff708..887555c1017c2b11ab428fb45764a7785ddf1218 100644 --- a/ppcls/arch/backbone/model_zoo/van.py +++ b/ppcls/arch/backbone/model_zoo/van.py @@ -26,6 +26,12 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro MODEL_URLS = { "VAN_B0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B0_pretrained.pdparams", + "VAN_B1": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B1_pretrained.pdparams", + "VAN_B2": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B2_pretrained.pdparams", + "VAN_B3": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B3_pretrained.pdparams" } __all__ = list(MODEL_URLS.keys()) @@ -269,6 +275,7 @@ class VAN(nn.Layer): x, H, W = patch_embed(x) for blk in block: x = blk(x) + x = x.flatten(2) x = swapdim(x, 1, 2) x = norm(x) @@ -317,3 +324,39 @@ def VAN_B0(pretrained=False, use_ssld=False, **kwargs): _load_pretrained( pretrained, model, MODEL_URLS["VAN_B0"], use_ssld=use_ssld) return model + + +def VAN_B1(pretrained=False, use_ssld=False, **kwargs): + model = VAN(embed_dims=[64, 128, 320, 512], + mlp_ratios=[8, 8, 4, 4], + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[2, 2, 4, 2], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["VAN_B1"], use_ssld=use_ssld) + return model + + +def VAN_B2(pretrained=False, use_ssld=False, **kwargs): + model = VAN(embed_dims=[64, 128, 320, 512], + mlp_ratios=[8, 8, 4, 4], + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 3, 12, 3], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["VAN_B2"], use_ssld=use_ssld) + return model + + +def VAN_B3(pretrained=False, use_ssld=False, **kwargs): + model = VAN(embed_dims=[64, 128, 320, 512], + mlp_ratios=[8, 8, 4, 4], + norm_layer=partial( + nn.LayerNorm, epsilon=1e-6), + depths=[3, 5, 27, 3], + **kwargs) + _load_pretrained( + pretrained, model, MODEL_URLS["VAN_B3"], use_ssld=use_ssld) + return model