diff --git a/docs/zh_CN/models/ImageNet1k/ConvNeXt.md b/docs/zh_CN/models/ImageNet1k/ConvNeXt.md
index cfc7b701137676b6ebb654e748fd0d9d6989171d..4b976c1b932a2d7bde744ceb072a74a8298d926b 100644
--- a/docs/zh_CN/models/ImageNet1k/ConvNeXt.md
+++ b/docs/zh_CN/models/ImageNet1k/ConvNeXt.md
@@ -35,6 +35,11 @@ ConvNeXt(Cross Stage Partial Network)系列模型是 Meta 在 2022 年提出
| Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
| ConvNeXt_tiny | 0.8203 | 0.9590 | 0.821 | - | 4.458 | 28.583 |
+| ConvNeXt_small | 0.8313 | 0.9643 | 0.831 | - | 8.688 | 50.210 |
+| ConvNeXt_base_224 | 0.8384 | 0.9676 | 0.838 | - | 15.360 | 88.573 |
+| ConvNeXt_base_384 | 0.8490 | 0.9727 | 0.851 | - | 45.138 | 88.573 |
+| ConvNeXt_large_224 | 0.8426 | 0.9690 | 0.843 | - | 34.340 | 197.740 |
+| ConvNeXt_large_384 | 0.8527 | 0.9749 | 0.855 | - | 101.001 | 197.740 |
### 1.3 Benchmark
diff --git a/docs/zh_CN/models/ImageNet1k/README.md b/docs/zh_CN/models/ImageNet1k/README.md
index a9b2149014d696d8d4078328b92910afa92368c0..92a7e473ac6f11a1b1c771cfab3760cd0c37bed4 100644
--- a/docs/zh_CN/models/ImageNet1k/README.md
+++ b/docs/zh_CN/models/ImageNet1k/README.md
@@ -329,8 +329,11 @@ ResNeSt 系列模型的精度、速度指标如下表所示,更多关于该系
| 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 |
|------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|
-| ResNeSt50_
fast_1s1x64d | 0.8035 | 0.9528 | 2.73 | 5.33 | 8.24 | 4.36 | 26.27 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_fast_1s1x64d_infer.tar) |
-| ResNeSt50 | 0.8083 | 0.9542 | 7.36 | 10.23 | 13.84 | 5.40 | 27.54 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_infer.tar) |
+| ResNeSt50_
fast_1s1x64d | 0.8061 | 0.9527 | 2.73 | 5.33 | 8.24 | 4.36 | 26.27 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_fast_1s1x64d_infer.tar) |
+| ResNeSt50 | 0.8102 | 0.9546 | 7.36 | 10.23 | 13.84 | 5.40 | 27.54 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_infer.tar) |
+| ResNeSt101 | 0.8279 | 0.9642 | | | | 10.25 | 48.40 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt101_infer.tar) |
+| ResNeSt200 | 0.8418 | 0.9698 | | | | 17.50 | 70.41 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt200_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt200_infer.tar) |
+| ResNeSt269 | 0.8444 |0.9698 | | | | 22.54 | 111.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt269_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt269_infer.tar) |
@@ -340,7 +343,19 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系
| 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 |
|------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|
-| RegNetX_4GF | 0.785 | 0.9416 | 6.46 | 8.48 | 11.45 | 4.00 | 22.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_4GF_infer.tar) |
+| RegNetX_200MF | 0.680 | 0.8842 | | | | 0.20 | 2.74 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_200MF_infer.tar) |
+| RegNetX_400MF | 0.723 | 0.9078 | | | | 0.40 | 5.19 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_400MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_400MF_infer.tar) |
+| RegNetX_600MF | 0.737 | 0.9198 | | | | 0.61 | 6.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_600MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_600MF_infer.tar) |
+| RegNetX_800MF | 0.751 | 0.9250 | | | | 0.81 | 7.30 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_800MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_800MF_infer.tar) |
+| RegNetX_1600MF | 0.767 | 0.9329 | | | | 1.62 | 9.23 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_1600MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_1600MF_infer.tar) |
+| RegNetX_3200MF | 0.781 | 0.9413 | | | | 3.20 | 15.36 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_3200MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_3200MF_infer.tar) |
+| RegNetX_4GF | 0.785 | 0.9416 | 6.46 | 8.48 | 11.45 | 3.99 | 22.16 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_4GF_infer.tar) |
+| RegNetX_6400MF | 0.790 | 0.9461 | | | | 6.49 | 26.28 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_6400MF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_6400MF_infer.tar) |
+| RegNetX_8GF | 0.793 | 0.9464 | | | | 8.02 | 39.66 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_8GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_8GF_infer.tar) |
+| RegNetX_12GF | 0.797 | 0.9501 | | | | 12.13 | 46.20 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_12GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_12GF_infer.tar) |
+| RegNetX_16GF | 0.801 | 0.9505 | | | | 15.99 | 54.39 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_16GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_16GF_infer.tar) |
+| RegNetX_32GF | 0.803 | 0.9526 | | | | 32.33 | 130.67 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RegNetX_32GF_infer.tar) |
+
@@ -359,7 +374,9 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系
| RepVGG_B1g2 | 0.7732 | 0.9359 | | | | 8.82 | 41.36 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B1g2_infer.tar) |
| RepVGG_B1g4 | 0.7675 | 0.9335 | | | | 7.31 | 36.13 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B1g4_infer.tar) |
| RepVGG_B2g4 | 0.7881 | 0.9448 | | | | 11.34 | 55.78 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B2g4_infer.tar) |
-| RepVGG_B3g4 | 0.7965 | 0.9485 | | | | 16.07 | 75.63 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B3g4_infer.tar) |
+| RepVGG_B3 | 0.8031 | 0.9517 | | | | 29.16 | 123.19 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B3_infer.tar) |
+| RepVGG_B3g4 | 0.8005 | 0.9502 | | | | 17.89 | 83.93 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_B3g4_infer.tar) |
+| RepVGG_D2se | 0.8339 | 0.9665 | | | | 36.54 | 133.47 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_D2se_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_D2se_infer.tar) |
@@ -441,6 +458,11 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系
| 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 |
| ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| ConvNeXt_tiny | 0.8203 | 0.9590 | - | - | - | 4.458 | 28.583 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_tiny_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_tiny_infer.tar) |
+| ConvNeXt_small | 0.8313 | 0.9643 | - | - | - | 8.688 | 50.210 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_small_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_small_infer.tar) |
+| ConvNeXt_base_224 | 0.8384 | 0.9676 | - | - | - | 15.360 | 88.573 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_224_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_base_224_infer.tar) |
+| ConvNeXt_base_384 | 0.8490 | 0.9727 | - | - | - | 45.138 | 88.573 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_base_384_infer.tar) |
+| ConvNeXt_large_224 | 0.8426 | 0.9690 | - | - | - | 34.340 | 197.740 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_224_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_large_224_infer.tar) |
+| ConvNeXt_large_384 | 0.8527 | 0.9749 | - | - | - | 101.001 | 197.740 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ConvNeXt_large_384_infer.tar) |
@@ -451,6 +473,9 @@ RegNet 系列模型的精度、速度指标如下表所示,更多关于该系
| 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 |
| ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| VAN_B0 | 0.7535 | 0.9299 | - | - | - | 0.880 | 4.110 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B0_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B0_infer.tar) |
+| VAN_B1 | 0.8102 | 0.9562 | - | - | - | 2.518 | 13.869 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B1_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B1_infer.tar) |
+| VAN_B2 | 0.8280 | 0.9620 | - | - | - | 5.032 | 26.592 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B2_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B2_infer.tar) |
+| VAN_B3 | 0.8389 | 0.9668 | - | - | - | 8.987 | 44.790 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B3_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VAN_B3_infer.tar) |
@@ -699,7 +724,8 @@ DeiT(Data-efficient Image Transformers)系列模型的精度、速度指标
| 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 |
| ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| TNT_small | 0.8121 |0.9563 | | | 4.83 | 23.68 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/TNT_small_infer.tar) |
+| TNT_small | 0.8148 |0.9580 | | | 4.83 | 23.69 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/TNT_small_infer.tar) |
+| TNT_base | 0.8276 |0.9617 | | | 13.40 | 65.30 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_base_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/TNT_base_infer.tar) |
**注**:TNT 模型的数据预处理部分 `NormalizeImage` 中的 `mean` 与 `std` 均为 0.5。
diff --git a/docs/zh_CN/models/ImageNet1k/RegNet.md b/docs/zh_CN/models/ImageNet1k/RegNet.md
index db138dec3c17e141af4a11d9fb7768803f4a09c0..7bde331682a2070418a46ce3f033a181d70a6738 100644
--- a/docs/zh_CN/models/ImageNet1k/RegNet.md
+++ b/docs/zh_CN/models/ImageNet1k/RegNet.md
@@ -35,7 +35,18 @@ RegNet 是由 facebook 于 2020 年提出,旨在深化设计空间理念的概
| Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
-| RegNetX_4GF | 0.7850 | 0.9416| 0.7860 | -| 8.0 | 22.1 |
+| RegNetX_200MF | 0.6804 | 0.8842| 0.6821 | -| 0.2 | 2.7 |
+| RegNetX_400MF | 0.7225 | 0.9078| 0.7228 | -| 0.4 | 5.2 |
+| RegNetX_600MF | 0.7366 | 0.9198| 0.7286 | -| 0.6 | 6.2 |
+| RegNetX_800MF | 0.7512 | 0.9250| 0.7494 | -| 0.8 | 7.3 |
+| RegNetX_1600MF | 0.7673 | 0.9329| 0.7671 | -| 1.6 | 9.2 |
+| RegNetX_3200MF | 0.7809 | 0.9413| 0.7819 | -| 3.2 | 15.3 |
+| RegNetX_4GF | 0.7850 | 0.9416| 0.7860 | -| 4.0 | 22.2 |
+| RegNetX_6400MF | 0.7897 | 0.9461| 0.7915 | -| 6.5 | 26.2 |
+| RegNetX_8GF | 0.7928 | 0.9464| 0.7938 | -| 8.0 | 39.7 |
+| RegNetX_12GF | 0.7972 | 0.9501| 0.8000 | -| 12.1 | 46.2 |
+| RegNetX_16GF | 0.8013 | 0.9505| 0.8012 | -| 16.0 | 54.4 |
+| RegNetX_32GF | 0.8032 | 0.9526| 0.8052 | -| 32.33 | 130.67 |
### 1.3 Benchmark
diff --git a/docs/zh_CN/models/ImageNet1k/RepVGG.md b/docs/zh_CN/models/ImageNet1k/RepVGG.md
index afdb9efd455c09a59d0e80a597e2ff5451ccad71..8542d7fe81d29f25d6843584e7daacdefd916592 100644
--- a/docs/zh_CN/models/ImageNet1k/RepVGG.md
+++ b/docs/zh_CN/models/ImageNet1k/RepVGG.md
@@ -41,7 +41,9 @@ RepVGG(Making VGG-style ConvNets Great Again)系列模型是由清华大学(丁
| RepVGG_B1g2 | 0.7732 | 0.9359 | 0.7778 | - | - | - |
| RepVGG_B1g4 | 0.7675 | 0.9335 | 0.7758 | - | - | - |
| RepVGG_B2g4 | 0.7881 | 0.9448 | 0.7938 | - | - | - |
-| RepVGG_B3g4 | 0.7965 | 0.9485 | 0.8021 | - | - | - |
+| RepVGG_B3 | 0.8031 | 0.9517 | 0.8052 | - | - | - |
+| RepVGG_B3g4 | 0.8005 | 0.9502 | 0.8021 | - | - | - |
+| RepVGG_D2se | 0.8339 | 0.9665 | 0.8355 | - | - | - |
关于 Params、FLOPs、Inference speed 等信息,敬请期待。
diff --git a/docs/zh_CN/models/ImageNet1k/ResNeSt.md b/docs/zh_CN/models/ImageNet1k/ResNeSt.md
index 8a7664474015337d41cce21bd7539f7f6bf400db..51c7255b6178bcf20bc28c43f3e43e2a96ef874e 100644
--- a/docs/zh_CN/models/ImageNet1k/ResNeSt.md
+++ b/docs/zh_CN/models/ImageNet1k/ResNeSt.md
@@ -35,8 +35,11 @@ ResNeSt 系列模型是在 2020 年提出的,在原有的 resnet 网络结构
| Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
-| ResNeSt50_fast_1s1x64d | 0.8035 | 0.9528| 0.8035 | -| 8.68 | 26.3 |
-| ResNeSt50 | 0.8083 | 0.9542| 0.8113 | -| 10.78 | 27.5 |
+| ResNeSt50_fast_1s1x64d | 0.8061 | 0.9527| 0.8035 | -| 5.40 | 26.3 |
+| ResNeSt50 | 0.8102 | 0.9546| 0.8103 | -| 5.40 | 27.5 |
+| ResNeSt101 | 0.8279 | 0.9642| 0.8283 | -| 10.25 | 48.4 |
+| ResNeSt200 | 0.8418 | 0.9698| 0.8384 | -| 17.50 | 70.4 |
+| ResNeSt269 | 0.8444 | 0.9698| 0.8454 | -| 22.54 | 111.2 |
### 1.3 Benchmark
diff --git a/docs/zh_CN/models/ImageNet1k/TNT.md b/docs/zh_CN/models/ImageNet1k/TNT.md
index 1d0f7b211cdaf75e18c9c4c666c08ffc37dfcd8d..4705e1aeb381c4f3da3223f14581270bda9f58ab 100644
--- a/docs/zh_CN/models/ImageNet1k/TNT.md
+++ b/docs/zh_CN/models/ImageNet1k/TNT.md
@@ -34,7 +34,8 @@ PaddleClas 所提供的该系列模型的预训练模型权重,均是基于其
| Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
-| TNT_small | 0.8121 | 0.9563 | - | - | 5.2 | 23.8 |
+| TNT_small | 0.8148 | 0.9580 | 0.815 | - | 4.8 | 23.7 |
+| TNT_base | 0.8276 | 0.9617 | 0.829 | - | 13.4 | 65.3 |
**备注:** PaddleClas 所提供的该系列模型的预训练模型权重,均是基于其官方提供的权重转得。
diff --git a/docs/zh_CN/models/ImageNet1k/VAN.md b/docs/zh_CN/models/ImageNet1k/VAN.md
index e85320e34bbf90680362c5cae8f354fe8446c76f..c3c7946514ba43ef1846097ce47991cfad39dadd 100644
--- a/docs/zh_CN/models/ImageNet1k/VAN.md
+++ b/docs/zh_CN/models/ImageNet1k/VAN.md
@@ -35,6 +35,9 @@ VAN(Visual Attention Network)系列模型是在 2022 年提出的 CNN 架构
| Models | Top1 | Top5 | Reference
top1 | Reference
top5 | FLOPs
(G) | Params
(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
| VAN-B0 | 0.7535 | 0.9299 | 0.754 | - | 0.880 | 4.110 |
+| VAN-B1 | 0.8102 | 0.9562 | 0.811 | - | 2.518 | 13.869 |
+| VAN-B2 | 0.8280 | 0.9620 | 0.828 | - | 5.032 | 26.592 |
+| VAN-B3 | 0.8389 | 0.9668 | 0.839 | - | 8.987 | 44.790 |
### 1.3 Benchmark
diff --git a/ppcls/arch/backbone/model_zoo/convnext.py b/ppcls/arch/backbone/model_zoo/convnext.py
index d089d5e01cc4313a3cf61a1c4f88a6fedae21ce9..13e73de2290d449ce057ef6aee86a57220487e6b 100644
--- a/ppcls/arch/backbone/model_zoo/convnext.py
+++ b/ppcls/arch/backbone/model_zoo/convnext.py
@@ -23,6 +23,16 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro
MODEL_URLS = {
"ConvNeXt_tiny":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_tiny_pretrained.pdparams",
+ "ConvNeXt_small":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_small_pretrained.pdparams",
+ "ConvNeXt_base_224":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_224_pretrained.pdparams",
+ "ConvNeXt_base_384":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_base_384_pretrained.pdparams",
+ "ConvNeXt_large_224":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_224_pretrained.pdparams",
+ "ConvNeXt_large_384":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ConvNeXt_large_384_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
@@ -231,3 +241,42 @@ def ConvNeXt_tiny(pretrained=False, use_ssld=False, **kwargs):
_load_pretrained(
pretrained, model, MODEL_URLS["ConvNeXt_tiny"], use_ssld=use_ssld)
return model
+
+
+def ConvNeXt_small(pretrained=False, use_ssld=False, **kwargs):
+ model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ConvNeXt_small"], use_ssld=use_ssld)
+ return model
+
+
+def ConvNeXt_base_224(pretrained=False, use_ssld=False, **kwargs):
+ model = ConvNeXt(
+ depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ConvNeXt_base_224"], use_ssld=use_ssld)
+ return model
+
+
+def ConvNeXt_base_384(pretrained=False, use_ssld=False, **kwargs):
+ model = ConvNeXt(
+ depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ConvNeXt_base_384"], use_ssld=use_ssld)
+ return model
+
+
+def ConvNeXt_large_224(pretrained=False, use_ssld=False, **kwargs):
+ model = ConvNeXt(
+ depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ConvNeXt_large_224"], use_ssld=use_ssld)
+ return model
+
+
+def ConvNeXt_large_384(pretrained=False, use_ssld=False, **kwargs):
+ model = ConvNeXt(
+ depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ConvNeXt_large_384"], use_ssld=use_ssld)
+ return model
\ No newline at end of file
diff --git a/ppcls/arch/backbone/model_zoo/regnet.py b/ppcls/arch/backbone/model_zoo/regnet.py
index d55163e92d59afde9baa92a937c58f3cccd71a91..151e39439a0c7560bd51cdbca1e72e6ac38bf702 100644
--- a/ppcls/arch/backbone/model_zoo/regnet.py
+++ b/ppcls/arch/backbone/model_zoo/regnet.py
@@ -34,8 +34,26 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro
MODEL_URLS = {
"RegNetX_200MF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_200MF_pretrained.pdparams",
+ "RegNetX_400MF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_400MF_pretrained.pdparams",
+ "RegNetX_600MF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_600MF_pretrained.pdparams",
+ "RegNetX_800MF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_800MF_pretrained.pdparams",
+ "RegNetX_1600MF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_1600MF_pretrained.pdparams",
+ "RegNetX_3200MF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_3200MF_pretrained.pdparams",
"RegNetX_4GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams",
+ "RegNetX_6400MF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_6400MF_pretrained.pdparams",
+ "RegNetX_8GF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_8GF_pretrained.pdparams",
+ "RegNetX_12GF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_12GF_pretrained.pdparams",
+ "RegNetX_16GF":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_16GF_pretrained.pdparams",
"RegNetX_32GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_32GF_pretrained.pdparams",
"RegNetY_200MF":
@@ -43,7 +61,7 @@ MODEL_URLS = {
"RegNetY_4GF":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_4GF_pretrained.pdparams",
"RegNetY_32GF":
- "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams",
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetY_32GF_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
@@ -106,7 +124,7 @@ class ConvBNLayer(nn.Layer):
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"),
- bias_attr=ParamAttr(name=name + ".conv2d.output.1.b_0"))
+ bias_attr=False)
bn_name = name + "_bn"
self._batch_norm = BatchNorm(
num_filters,
@@ -354,6 +372,81 @@ def RegNetX_200MF(pretrained=False, use_ssld=False, **kwargs):
return model
+def RegNetX_400MF(pretrained=False, use_ssld=False, **kwargs):
+ model = RegNet(
+ w_a=24.48,
+ w_0=24,
+ w_m=2.54,
+ d=22,
+ group_w=16,
+ bot_mul=1.0,
+ q=8,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RegNetX_400MF"], use_ssld=use_ssld)
+ return model
+
+
+def RegNetX_600MF(pretrained=False, use_ssld=False, **kwargs):
+ model = RegNet(
+ w_a=36.97,
+ w_0=48,
+ w_m=2.24,
+ d=16,
+ group_w=24,
+ bot_mul=1.0,
+ q=8,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RegNetX_600MF"], use_ssld=use_ssld)
+ return model
+
+
+def RegNetX_800MF(pretrained=False, use_ssld=False, **kwargs):
+ model = RegNet(
+ w_a=35.73,
+ w_0=56,
+ w_m=2.28,
+ d=16,
+ group_w=16,
+ bot_mul=1.0,
+ q=8,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RegNetX_800MF"], use_ssld=use_ssld)
+ return model
+
+
+def RegNetX_1600MF(pretrained=False, use_ssld=False, **kwargs):
+ model = RegNet(
+ w_a=34.01,
+ w_0=80,
+ w_m=2.25,
+ d=18,
+ group_w=24,
+ bot_mul=1.0,
+ q=8,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RegNetX_1600MF"], use_ssld=use_ssld)
+ return model
+
+
+def RegNetX_3200MF(pretrained=False, use_ssld=False, **kwargs):
+ model = RegNet(
+ w_a=26.31,
+ w_0=88,
+ w_m=2.25,
+ d=25,
+ group_w=48,
+ bot_mul=1.0,
+ q=8,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RegNetX_3200MF"], use_ssld=use_ssld)
+ return model
+
+
def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
w_a=38.65,
@@ -369,63 +462,75 @@ def RegNetX_4GF(pretrained=False, use_ssld=False, **kwargs):
return model
-def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
+def RegNetX_6400MF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
- w_a=69.86,
- w_0=320,
- w_m=2.0,
+ w_a=60.83,
+ w_0=184,
+ w_m=2.07,
+ d=17,
+ group_w=56,
+ bot_mul=1.0,
+ q=8,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RegNetX_6400MF"], use_ssld=use_ssld)
+ return model
+
+
+def RegNetX_8GF(pretrained=False, use_ssld=False, **kwargs):
+ model = RegNet(
+ w_a=49.56,
+ w_0=80,
+ w_m=2.88,
d=23,
- group_w=168,
+ group_w=120,
bot_mul=1.0,
q=8,
**kwargs)
_load_pretrained(
- pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+ pretrained, model, MODEL_URLS["RegNetX_8GF"], use_ssld=use_ssld)
return model
-def RegNetY_200MF(pretrained=False, use_ssld=False, **kwargs):
+def RegNetX_12GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
- w_a=36.44,
- w_0=24,
- w_m=2.49,
- d=13,
- group_w=8,
+ w_a=73.36,
+ w_0=168,
+ w_m=2.37,
+ d=19,
+ group_w=112,
bot_mul=1.0,
q=8,
- se_on=True,
**kwargs)
_load_pretrained(
- pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+ pretrained, model, MODEL_URLS["RegNetX_12GF"], use_ssld=use_ssld)
return model
-def RegNetY_4GF(pretrained=False, use_ssld=False, **kwargs):
+def RegNetX_16GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
- w_a=31.41,
- w_0=96,
- w_m=2.24,
+ w_a=55.59,
+ w_0=216,
+ w_m=2.1,
d=22,
- group_w=64,
+ group_w=128,
bot_mul=1.0,
q=8,
- se_on=True,
**kwargs)
_load_pretrained(
- pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
+ pretrained, model, MODEL_URLS["RegNetX_16GF"], use_ssld=use_ssld)
return model
-def RegNetY_32GF(pretrained=False, use_ssld=False, **kwargs):
+def RegNetX_32GF(pretrained=False, use_ssld=False, **kwargs):
model = RegNet(
- w_a=115.89,
- w_0=232,
- w_m=2.53,
- d=20,
- group_w=232,
+ w_a=69.86,
+ w_0=320,
+ w_m=2.0,
+ d=23,
+ group_w=168,
bot_mul=1.0,
q=8,
- se_on=True,
**kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["RegNetX_32GF"], use_ssld=use_ssld)
diff --git a/ppcls/arch/backbone/model_zoo/repvgg.py b/ppcls/arch/backbone/model_zoo/repvgg.py
index b30098277b523e50da11108e77127b179345bd34..2dbf7191d617b0796d55c1d5e776477c53564d3f 100644
--- a/ppcls/arch/backbone/model_zoo/repvgg.py
+++ b/ppcls/arch/backbone/model_zoo/repvgg.py
@@ -17,6 +17,7 @@
import paddle.nn as nn
import paddle
+import paddle.nn.functional as F
import numpy as np
from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
@@ -40,8 +41,12 @@ MODEL_URLS = {
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams",
"RepVGG_B2g4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams",
+ "RepVGG_B3":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams",
"RepVGG_B3g4":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams",
+ "RepVGG_D2se":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_D2se_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
@@ -76,6 +81,33 @@ class ConvBN(nn.Layer):
return y
+class SEBlock(nn.Layer):
+ def __init__(self, input_channels, internal_neurons):
+ super(SEBlock, self).__init__()
+ self.down = nn.Conv2D(
+ in_channels=input_channels,
+ out_channels=internal_neurons,
+ kernel_size=1,
+ stride=1,
+ bias_attr=True)
+ self.up = nn.Conv2D(
+ in_channels=internal_neurons,
+ out_channels=input_channels,
+ kernel_size=1,
+ stride=1,
+ bias_attr=True)
+ self.input_channels = input_channels
+
+ def forward(self, inputs):
+ x = F.avg_pool2d(inputs, kernel_size=inputs.shape[3])
+ x = self.down(x)
+ x = F.relu(x)
+ x = self.up(x)
+ x = F.sigmoid(x)
+ x = x.reshape([-1, self.input_channels, 1, 1])
+ return inputs * x
+
+
class RepVGGBlock(nn.Layer):
def __init__(self,
in_channels,
@@ -85,7 +117,8 @@ class RepVGGBlock(nn.Layer):
padding=0,
dilation=1,
groups=1,
- padding_mode='zeros'):
+ padding_mode='zeros',
+ use_se=False):
super(RepVGGBlock, self).__init__()
self.is_repped = False
@@ -105,6 +138,11 @@ class RepVGGBlock(nn.Layer):
self.nonlinearity = nn.ReLU()
+ if use_se:
+ self.se = SEBlock(
+ out_channels, internal_neurons=out_channels // 16)
+ else:
+ self.se = nn.Identity()
self.rbr_identity = nn.BatchNorm2D(
num_features=in_channels
) if out_channels == in_channels and stride == 1 else None
@@ -132,7 +170,7 @@ class RepVGGBlock(nn.Layer):
else:
id_out = self.rbr_identity(inputs)
return self.nonlinearity(
- self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
+ self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
def rep(self):
if not hasattr(self, 'rbr_reparam'):
@@ -198,14 +236,12 @@ class RepVGG(nn.Layer):
num_blocks,
width_multiplier=None,
override_groups_map=None,
- class_num=1000):
+ class_num=1000,
+ use_se=False):
super(RepVGG, self).__init__()
-
assert len(width_multiplier) == 4
self.override_groups_map = override_groups_map or dict()
-
assert 0 not in self.override_groups_map
-
self.in_planes = min(64, int(64 * width_multiplier[0]))
self.stage0 = RepVGGBlock(
@@ -213,20 +249,33 @@ class RepVGG(nn.Layer):
out_channels=self.in_planes,
kernel_size=3,
stride=2,
- padding=1)
+ padding=1,
+ use_se=use_se)
self.cur_layer_idx = 1
self.stage1 = self._make_stage(
- int(64 * width_multiplier[0]), num_blocks[0], stride=2)
+ int(64 * width_multiplier[0]),
+ num_blocks[0],
+ stride=2,
+ use_se=use_se)
self.stage2 = self._make_stage(
- int(128 * width_multiplier[1]), num_blocks[1], stride=2)
+ int(128 * width_multiplier[1]),
+ num_blocks[1],
+ stride=2,
+ use_se=use_se)
self.stage3 = self._make_stage(
- int(256 * width_multiplier[2]), num_blocks[2], stride=2)
+ int(256 * width_multiplier[2]),
+ num_blocks[2],
+ stride=2,
+ use_se=use_se)
self.stage4 = self._make_stage(
- int(512 * width_multiplier[3]), num_blocks[3], stride=2)
+ int(512 * width_multiplier[3]),
+ num_blocks[3],
+ stride=2,
+ use_se=use_se)
self.gap = nn.AdaptiveAvgPool2D(output_size=1)
self.linear = nn.Linear(int(512 * width_multiplier[3]), class_num)
- def _make_stage(self, planes, num_blocks, stride):
+ def _make_stage(self, planes, num_blocks, stride, use_se=False):
strides = [stride] + [1] * (num_blocks - 1)
blocks = []
for stride in strides:
@@ -238,7 +287,8 @@ class RepVGG(nn.Layer):
kernel_size=3,
stride=stride,
padding=1,
- groups=cur_groups))
+ groups=cur_groups,
+ use_se=use_se))
self.in_planes = planes
self.cur_layer_idx += 1
return nn.Sequential(*blocks)
@@ -367,6 +417,17 @@ def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs):
return model
+def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs):
+ model = RepVGG(
+ num_blocks=[4, 6, 16, 1],
+ width_multiplier=[3, 3, 3, 5],
+ override_groups_map=None,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld)
+ return model
+
+
def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
model = RepVGG(
num_blocks=[4, 6, 16, 1],
@@ -376,3 +437,15 @@ def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs):
_load_pretrained(
pretrained, model, MODEL_URLS["RepVGG_B3g4"], use_ssld=use_ssld)
return model
+
+
+def RepVGG_D2se(pretrained=False, use_ssld=False, **kwargs):
+ model = RepVGG(
+ num_blocks=[8, 14, 24, 1],
+ width_multiplier=[2.5, 2.5, 2.5, 5],
+ override_groups_map=None,
+ use_se=True,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["RepVGG_D2se"], use_ssld=use_ssld)
+ return model
diff --git a/ppcls/arch/backbone/model_zoo/resnest.py b/ppcls/arch/backbone/model_zoo/resnest.py
index c9f7850fbade0537704efd0cecadb7df07b28b8e..ab15d65dade8f871969d446467aca41ab3a79249 100644
--- a/ppcls/arch/backbone/model_zoo/resnest.py
+++ b/ppcls/arch/backbone/model_zoo/resnest.py
@@ -39,6 +39,10 @@ MODEL_URLS = {
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams",
"ResNeSt101":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt101_pretrained.pdparams",
+ "ResNeSt200":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt200_pretrained.pdparams",
+ "ResNeSt269":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt269_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
@@ -160,8 +164,7 @@ class SplatConv(nn.Layer):
padding=0,
groups=groups,
weight_attr=ParamAttr(
- name=name + "_weights", initializer=KaimingNormal()),
- bias_attr=False)
+ name=name + "_weights", initializer=KaimingNormal()))
self.rsoftmax = rSoftmax(radix=radix, cardinality=groups)
@@ -739,3 +742,39 @@ def ResNeSt101(pretrained=False, use_ssld=False, **kwargs):
_load_pretrained(
pretrained, model, MODEL_URLS["ResNeSt101"], use_ssld=use_ssld)
return model
+
+
+def ResNeSt200(pretrained=False, use_ssld=False, **kwargs):
+ model = ResNeSt(
+ layers=[3, 24, 36, 3],
+ radix=2,
+ groups=1,
+ bottleneck_width=64,
+ deep_stem=True,
+ stem_width=64,
+ avg_down=True,
+ avd=True,
+ avd_first=False,
+ final_drop=0.0,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ResNeSt200"], use_ssld=use_ssld)
+ return model
+
+
+def ResNeSt269(pretrained=False, use_ssld=False, **kwargs):
+ model = ResNeSt(
+ layers=[3, 30, 48, 8],
+ radix=2,
+ groups=1,
+ bottleneck_width=64,
+ deep_stem=True,
+ stem_width=64,
+ avg_down=True,
+ avd=True,
+ avd_first=False,
+ final_drop=0.0,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["ResNeSt269"], use_ssld=use_ssld)
+ return model
diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py
index b9d8327098db724d3514987ffac6cb4091753caa..8d650085a8c8ff54055b00ee27730321be0a0828 100644
--- a/ppcls/arch/backbone/model_zoo/tnt.py
+++ b/ppcls/arch/backbone/model_zoo/tnt.py
@@ -20,7 +20,6 @@ import numpy as np
import paddle
import paddle.nn as nn
-
from paddle.nn.initializer import TruncatedNormal, Constant
from ..base.theseus_layer import Identity
@@ -28,7 +27,9 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro
MODEL_URLS = {
"TNT_small":
- "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams"
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams",
+ "TNT_base":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_base_pretrained.pdparams"
}
__all__ = MODEL_URLS.keys()
@@ -38,6 +39,14 @@ zeros_ = Constant(value=0.)
ones_ = Constant(value=1.)
+class Identity(nn.Layer):
+ def __init__(self):
+ super(Identity, self).__init__()
+
+ def forward(self, inputs):
+ return inputs
+
+
def drop_path(x, drop_prob=0., training=False):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
@@ -165,8 +174,10 @@ class Block(nn.Layer):
act_layer=act_layer,
drop=drop)
- self.norm1_proj = norm_layer(in_dim)
- self.proj = nn.Linear(in_dim * num_pixel, dim)
+ self.norm1_proj = norm_layer(in_dim * num_pixel)
+ self.proj = nn.Linear(in_dim * num_pixel, dim, bias_attr=False)
+ self.norm2_proj = norm_layer(in_dim * num_pixel)
+
# Outer transformer
self.norm_out = norm_layer(dim)
self.attn_out = Attention(
@@ -196,11 +207,10 @@ class Block(nn.Layer):
self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed))))
# outer
B, N, C = patch_embed.shape
- norm1_proj = self.norm1_proj(pixel_embed)
- norm1_proj = norm1_proj.reshape(
- (B, N - 1, norm1_proj.shape[1] * norm1_proj.shape[2]))
- patch_embed[:, 1:] = paddle.add(patch_embed[:, 1:],
- self.proj(norm1_proj))
+ norm1_proj = pixel_embed.reshape(shape=[B, N - 1, C])
+ norm1_proj = self.norm1_proj(norm1_proj)
+ patch_embed[:, 1:] = paddle.add(
+ patch_embed[:, 1:], self.norm2_proj(self.proj(norm1_proj)))
patch_embed = paddle.add(
patch_embed,
self.drop_path(self.attn_out(self.norm_out(patch_embed))))
@@ -217,6 +227,7 @@ class PixelEmbed(nn.Layer):
in_dim=48,
stride=4):
super().__init__()
+ self.patch_size = patch_size
num_patches = (img_size // patch_size)**2
self.img_size = img_size
self.num_patches = num_patches
@@ -230,14 +241,12 @@ class PixelEmbed(nn.Layer):
def forward(self, x, pixel_pos):
B, C, H, W = x.shape
assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})."
-
- x = self.proj(x)
- x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size)
+ x = nn.functional.unfold(x, self.patch_size, self.patch_size)
x = x.transpose((0, 2, 1)).reshape(
- (-1, self.in_dim, self.new_patch_size, self.new_patch_size))
+ (-1, C, self.patch_size, self.patch_size))
+ x = self.proj(x)
+ x = x.reshape((-1, self.in_dim, self.patch_size)).transpose((0, 2, 1))
x = x + pixel_pos
- x = x.reshape((-1, self.in_dim, self.new_patch_size *
- self.new_patch_size)).transpose((0, 2, 1))
return x
@@ -288,8 +297,7 @@ class TNT(nn.Layer):
self.add_parameter("patch_pos", self.patch_pos)
self.pixel_pos = self.create_parameter(
- shape=(1, in_dim, new_patch_size, new_patch_size),
- default_initializer=zeros_)
+ shape=(1, patch_size, in_dim), default_initializer=zeros_)
self.add_parameter("pixel_pos", self.pixel_pos)
self.pos_drop = nn.Dropout(p=drop_rate)
@@ -345,7 +353,6 @@ class TNT(nn.Layer):
(self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
patch_embed = patch_embed + self.patch_pos
patch_embed = self.pos_drop(patch_embed)
-
for blk in self.blocks:
pixel_embed, patch_embed = blk(pixel_embed, patch_embed)
@@ -385,3 +392,17 @@ def TNT_small(pretrained=False, use_ssld=False, **kwargs):
_load_pretrained(
pretrained, model, MODEL_URLS["TNT_small"], use_ssld=use_ssld)
return model
+
+
+def TNT_base(pretrained=False, use_ssld=False, **kwargs):
+ model = TNT(patch_size=16,
+ embed_dim=640,
+ in_dim=40,
+ depth=12,
+ num_heads=10,
+ in_num_head=4,
+ qkv_bias=False,
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["TNT_base"], use_ssld=use_ssld)
+ return model
diff --git a/ppcls/arch/backbone/model_zoo/van.py b/ppcls/arch/backbone/model_zoo/van.py
index ea7c88853d249534e4dd108188b3d96ad04ff708..887555c1017c2b11ab428fb45764a7785ddf1218 100644
--- a/ppcls/arch/backbone/model_zoo/van.py
+++ b/ppcls/arch/backbone/model_zoo/van.py
@@ -26,6 +26,12 @@ from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_fro
MODEL_URLS = {
"VAN_B0":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B0_pretrained.pdparams",
+ "VAN_B1":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B1_pretrained.pdparams",
+ "VAN_B2":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B2_pretrained.pdparams",
+ "VAN_B3":
+ "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VAN_B3_pretrained.pdparams"
}
__all__ = list(MODEL_URLS.keys())
@@ -269,6 +275,7 @@ class VAN(nn.Layer):
x, H, W = patch_embed(x)
for blk in block:
x = blk(x)
+
x = x.flatten(2)
x = swapdim(x, 1, 2)
x = norm(x)
@@ -317,3 +324,39 @@ def VAN_B0(pretrained=False, use_ssld=False, **kwargs):
_load_pretrained(
pretrained, model, MODEL_URLS["VAN_B0"], use_ssld=use_ssld)
return model
+
+
+def VAN_B1(pretrained=False, use_ssld=False, **kwargs):
+ model = VAN(embed_dims=[64, 128, 320, 512],
+ mlp_ratios=[8, 8, 4, 4],
+ norm_layer=partial(
+ nn.LayerNorm, epsilon=1e-6),
+ depths=[2, 2, 4, 2],
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["VAN_B1"], use_ssld=use_ssld)
+ return model
+
+
+def VAN_B2(pretrained=False, use_ssld=False, **kwargs):
+ model = VAN(embed_dims=[64, 128, 320, 512],
+ mlp_ratios=[8, 8, 4, 4],
+ norm_layer=partial(
+ nn.LayerNorm, epsilon=1e-6),
+ depths=[3, 3, 12, 3],
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["VAN_B2"], use_ssld=use_ssld)
+ return model
+
+
+def VAN_B3(pretrained=False, use_ssld=False, **kwargs):
+ model = VAN(embed_dims=[64, 128, 320, 512],
+ mlp_ratios=[8, 8, 4, 4],
+ norm_layer=partial(
+ nn.LayerNorm, epsilon=1e-6),
+ depths=[3, 5, 27, 3],
+ **kwargs)
+ _load_pretrained(
+ pretrained, model, MODEL_URLS["VAN_B3"], use_ssld=use_ssld)
+ return model