未验证 提交 31a55526 编写于 作者: W Walter 提交者: GitHub

Merge branch 'develop_reg' into develop_reg

此差异已折叠。
Global:
rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/"
batch_size: 1
use_gpu: True
enable_mkldnn: True
cpu_num_threads: 100
enable_benchmark: True
use_fp16: False
ir_optim: True
use_tensorrt: False
gpu_mem: 8000
enable_profile: False
RecPreProcess:
transform_ops:
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 0.00392157
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./dataset/cartoon_demo_data_v1.0/index/"
image_root: "./dataset/cartoon_demo_data_v1.0/"
data_file: "./dataset/cartoon_demo_data_v1.0/data_file.txt"
delimiter: "\t"
dist_type: "IP"
pq_size: 100
embedding_size: 2048
Global:
rec_inference_model_dir: "./inshop/rec/"
rec_inference_model_dir: "./models/product_ResNet50_vd_Inshop_v1.0_infer"
batch_size: 1
use_gpu: True
enable_mkldnn: True
......@@ -26,9 +26,9 @@ RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./inshop/inshop_index/"
image_root: "./inshop/dataset/"
data_file: "./inshop/inshop_gallery_demo.txt"
index_path: "./dataset/product_demo_data_v1.0/index"
image_root: "./dataset/product_demo_data_v1.0"
data_file: "./dataset/product_demo_data_v1.0/data_file.txt"
delimiter: " "
dist_type: "IP"
pq_size: 100
......
Global:
rec_inference_model_dir: "./logo/model/"
rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/"
batch_size: 1
use_gpu: True
enable_mkldnn: True
......@@ -26,9 +26,9 @@ RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./logo/logo_index/"
image_root: "./logo/dataset/"
data_file: "./logo/logo_gallery_demo.txt"
index_path: "./dataset/logo_demo_data_v1.0/index/"
image_root: "./dataset/logo_demo_data_v1.0/"
data_file: "./dataset/logo_demo_data_v1.0/data_file.txt"
delimiter: "\t"
dist_type: "IP"
pq_size: 100
......
Global:
rec_inference_model_dir: "./vehicle/model/"
rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/"
batch_size: 1
use_gpu: True
enable_mkldnn: True
......@@ -26,9 +26,9 @@ RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./vehilce/vehicle_index/"
image_root: "./vehicle/dataset/"
data_file: "./vehilce/demo_gallery.txt"
index_path: "./dataset/vehicle_demo_data_v1.0/index/"
image_root: "./dataset/vehicle_demo_data_v1.0/"
data_file: "./dataset/vehicle_demo_data_v1.0/data_file.txt"
delimiter: " "
dist_type: "IP"
pq_size: 100
......
Global:
infer_imgs: "./dataset/iCartoonFace/val2/0000000.jpg"
det_inference_model_dir: "./output/det"
rec_inference_model_dir: "./output/"
infer_imgs: "./dataset/cartoon_demo_data_v1.0/query/"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
rec_inference_model_dir: "./models/cartoon_rec_ResNet50_iCartoon_v1.0_infer/"
batch_size: 1
image_shape: [3, 640, 640]
threshold: 0.5
......@@ -9,7 +9,6 @@ Global:
labe_list:
- foreground
# inference engine config
use_gpu: True
enable_mkldnn: True
cpu_num_threads: 100
......@@ -34,7 +33,6 @@ DetPreProcess:
DetPostProcess: {}
RecPreProcess:
transform_ops:
- ResizeImage:
......@@ -50,18 +48,8 @@ RecPreProcess:
RecPostProcess: null
# indexing engine config
IndexProcess:
build:
enable: False
index_path: "./icartoon_index/"
image_root: "./dataset/iCartoonFace"
data_file: "./dataset/iCartoonFace/gallery_pesudo.txt"
spacer: "\t"
dist_type: "IP"
pq_size: 100
embedding_size: 2048
infer:
index_path: "./icartoon_index/"
search_budget: 100
return_k: 10
index_path: "./dataset/cartoon_demo_data_v1.0/index/"
search_budget: 100
return_k: 5
dist_type: "IP"
Global:
infer_imgs: "../docs/images/whl/demo.jpg"
inference_model_dir: "./MobileNetV1_infer/"
inference_model_dir: "../inference/"
batch_size: 1
use_gpu: True
enable_mkldnn: True
......@@ -27,4 +27,4 @@ PreProcess:
PostProcess:
name: Topk
topk: 5
class_id_map_file: "ppcls/utils/imagenet1k_label_list.txt"
\ No newline at end of file
class_id_map_file: "../ppcls/utils/imagenet1k_label_list.txt"
\ No newline at end of file
Global:
infer_imgs: "./inshop/demo/01_3_back.jpg"
det_inference_model_dir: "./inshop/det/"
rec_inference_model_dir: "./inshop/rec/"
infer_imgs: "./dataset/product_demo_data_v1.0/query"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer"
rec_inference_model_dir: "./models/product_ResNet50_vd_Inshop_v1.0_infer"
batch_size: 1
image_shape: [3, 640, 640]
threshold: 0.0
max_det_results: 3
max_det_results: 1
labe_list:
- foreground
......@@ -48,7 +48,7 @@ RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./inshop/inshop_index"
index_path: "./dataset/product_demo_data_v1.0/index"
search_budget: 100
return_k: 10
return_k: 5
dist_type: "IP"
Global:
infer_imgs: "./logo/demo/logo_APK.jpg"
det_inference_model_dir: "./logo/det/"
rec_inference_model_dir: "./logo/rec/"
infer_imgs: "./dataset/logo_demo_data_v1.0/query/logo_AKG.jpg"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/"
batch_size: 1
image_shape: [3, 640, 640]
threshold: 0.5
......@@ -48,7 +48,7 @@ RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./logo_index/"
index_path: "./dataset/logo_demo_data_v1.0/index/"
search_budget: 100
return_k: 10
return_k: 5
dist_type: "IP"
Global:
infer_imgs: "./vehicle/demo/2e3521935c280c.jpg"
det_inference_model_dir: "./det/"
rec_inference_model_dir: "./vehicle/rec/"
infer_imgs: "./dataset/vehicle_demo_data_v1.0/query/"
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/"
rec_inference_model_dir: "./models/vehicle_cls_ResNet50_CompCars_v1.0_infer/"
batch_size: 1
image_shape: [3, 640, 640]
threshold: 0.5
......@@ -50,7 +50,7 @@ RecPostProcess: null
# indexing engine config
IndexProcess:
index_path: "./vehicle_index/"
index_path: "./dataset/vehicle_demo_data_v1.0/index/"
search_budget: 100
return_k: 10
return_k: 5
dist_type: "IP"
# DLA series
## Overview
DLA (Deep Layer Aggregation). Visual recognition requires rich representations that span levels from low to high, scales from small to large, and resolutions from fine to coarse. Even with the depth of features in a convolutional network, a layer in isolation is not enough: compounding and aggregating these representations improves inference of what and where. Although skip connections have been incorporated to combine layers, these connections have been "shallow" themselves, and only fuse by simple, one-step operations. The authors augment standard architectures with deeper aggregation to better fuse information across layers. Deep layer aggregation structures iteratively and hierarchically merge the feature hierarchy to make networks with better accuracy and fewer parameters. Experiments across architectures and tasks show that deep layer aggregation improves recognition and resolution compared to existing branching and merging schemes. [paper](https://arxiv.org/abs/1707.06484)
## Accuracy, FLOPS and Parameters
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:-----------------:|:----------:|:---------:|:---------:|:---------:|
| DLA34 | 15.8 | 3.1 | 76.03 | 92.98 |
| DLA46_c | 1.3 | 0.5 | 63.21 | 85.30 |
| DLA46x_c | 1.1 | 0.5 | 64.36 | 86.01 |
| DLA60 | 22.0 | 4.2 | 76.10 | 92.92 |
| DLA60x | 17.4 | 3.5 | 77.53 | 93.78 |
| DLA60x_c | 1.3 | 0.6 | 66.45 | 87.54 |
| DLA102 | 33.3 | 7.2 | 78.93 | 94.52 |
| DLA102x | 26.4 | 5.9 | 78.10 | 94.00 |
| DLA102x2 | 41.4 | 9.3 | 78.85 | 94.45 |
| DLA169 | 53.5 | 11.6 | 78.09 | 94.09 |
# HarDNet series
## Overview
HarDNet(Harmonic DenseNet)is a brand new neural network proposed by National Tsing Hua University in 2019, which to achieve high efficiency in terms of both low MACs and memory traffic. The new network achieves 35%, 36%, 30%, 32%, and 45% inference time reduction compared with FC-DenseNet-103, DenseNet-264, ResNet-50, ResNet-152, and SSD-VGG, respectively. We use tools including Nvidia profiler and ARM Scale-Sim to measure the memory traffic and verify that the inference latency is indeed proportional to the memory traffic consumption and the proposed network consumes low memory traffic. [Paper](https://arxiv.org/abs/1909.00948).
## Accuracy, FLOPS and Parameters
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:---------------------:|:----------:|:---------:|:---------:|:---------:|
| HarDNet68 | 17.6 | 4.3 | 75.46 | 92.65 |
| HarDNet85 | 36.7 | 9.1 | 77.44 | 93.55 |
| HarDNet39_ds | 3.5 | 0.4 | 71.33 | 89.98 |
| HarDNet68_ds | 4.2 | 0.8 | 73.62 | 91.52 |
\ No newline at end of file
# RedNet series
## Overview
In the backbone of ResNet and in all bottleneck positions of backbone, the convolution is replaced by Involution, but all convolutions are reserved for channel mapping and fusion. These carefully redesigned entities combine to form a new efficient backbone network, called Rednet. [paper](https://arxiv.org/abs/2103.06255).
## Accuracy, FLOPS and Parameters
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:---------------------:|:----------:|:---------:|:---------:|:---------:|
| RedNet26 | 9.2 | 1.7 | 75.95 | 93.19 |
| RedNet38 | 12.4 | 2.2 | 77.47 | 93.56 |
| RedNet50 | 15.5 | 2.7 | 78.33 | 94.17 |
| RedNet101 | 25.7 | 4.7 | 78.94 | 94.36 |
| RedNet152 | 34.0 | 6.8 | 79.17 | 94.40 |
\ No newline at end of file
# TNT series
## Overview
TNT(Transformer-iN-Transformer) series models were proposed by Huawei-Noah in 2021 for modeling both patch-level and pixel-level representation. In each TNT block, an outer transformer block is utilized to process patch embeddings, and an inner transformer block extracts local features from pixel embeddings. The pixel-level feature is projected to the space of patch embedding by a linear transformation layer and then added into the patch. By stacking the TNT blocks, we build the TNT model for image recognition. Experiments on ImageNet benchmark and downstream tasks demonstrate the superiority and efficiency of the proposed TNT architecture. For example, our TNT achieves 81.3% top-1 accuracy on ImageNet which is 1.5% higher than that of DeiT with similar computational cost. [Paper](https://arxiv.org/abs/2103.00112).
## Accuracy, FLOPS and Parameters
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:---------------------:|:----------:|:---------:|:---------:|:---------:|
| TNT_small | 23.8 | 5.2 | 81.12 | 95.56 |
\ No newline at end of file
此差异已折叠。
# Logo识别
Logo识别技术,是现实生活中应用很广的一个领域,比如一张照片中是否出现了Adidas或者Nike的商标Logo,或者一个杯子上是否出现了星巴克或者可口可乐的商标Logo。通常Logo类别数量较多时,往往采用检测+识别两阶段方式,检测模块负责检测出潜在的Logo区域,根据检测区域抠图后输入识别模块进行识别。识别模块多采用检索的方式,根据查询图片和底库图片进行相似度排序获得预测类别。此文档主要对Logo图片的特征提取部分进行相关介绍,内容包括:
- 数据集及预处理方式
- Backbone的具体设置
- Loss函数的相关设置
全部的超参数及具体配置:[ResNet50_ReID.yaml](../../../ppcls/configs/Logo/ResNet50_ReID.yaml)
## 数据集及预处理
### LogoDet-3K数据集
<img src="../../images/logo/logodet3k.jpg" style="zoom:50%;" />
LogoDet-3K数据集是具有完整标注的Logo数据集,有3000个标识类别,约20万个高质量的人工标注的标识对象和158652张图片。相关数据介绍参考[原论文](https://arxiv.org/abs/2008.05359)
## 数据预处理
由于原始的数据集中,图像包含标注的检测框,在识别阶段只考虑检测器抠图后的logo区域,因此采用原始的标注框抠出Logo区域图像构成训练集,排除背景在识别阶段的影响。对数据集进行划分,产生155427张训练集,覆盖3000个logo类别(同时作为测试时gallery图库),3225张测试集,用于作为查询集。抠图后的训练集可[在此下载](https://arxiv.org/abs/2008.05359)
- 图像`Resize`到224
- 随机水平翻转
- [AugMix](https://arxiv.org/abs/1912.02781v1)
- Normlize:归一化到0~1
- [RandomErasing](https://arxiv.org/pdf/1708.04896v2.pdf)
在配置文件中设置如下,详见`transform_ops`部分:
```yaml
DataLoader:
Train:
dataset:
# 具体使用的Dataset的的名称
name: "LogoDataset"
# 使用此数据集的具体参数
image_root: "dataset/LogoDet-3K-crop/train/"
cls_label_path: "dataset/LogoDet-3K-crop/LogoDet-3K+train.txt"
# 图像增广策略:ResizeImage、RandFlipImage等
transform_ops:
- ResizeImage:
size: 224
- RandFlipImage:
flip_code: 1
- AugMix:
prob: 0.5
- NormalizeImage:
scale: 0.00392157
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- RandomErasing:
EPSILON: 0.5
sampler:
name: DistributedRandomIdentitySampler
batch_size: 128
num_instances: 2
drop_last: False
shuffle: True
loader:
num_workers: 6
use_shared_memory: False
```
## Backbone的具体设置
具体是用`ResNet50`作为backbone,主要做了如下修改:
- 使用ImageNet预训练模型
- last stage stride=1, 保持最后输出特征图尺寸14x14
- 在最后加入一个embedding 卷积层,特征维度为512
具体代码:[ResNet50_last_stage_stride1](../../../ppcls/arch/backbone/variant_models/resnet_variant.py)
在配置文件中Backbone设置如下:
```yaml
Arch:
# 使用RecModel模型进行训练,目前支持普通ImageNet和RecModel两个方式
name: "RecModel"
# 导出inference model的具体配置
infer_output_key: "features"
infer_add_softmax: False
# 使用的Backbone
Backbone:
name: "ResNet50_last_stage_stride1"
pretrained: True
# 使用此层作为Backbone的feature输出,name为具体层的full_name
BackboneStopLayer:
name: "adaptive_avg_pool2d_0"
# Backbone的基础上,新增网络层。此模型添加1x1的卷积层(embedding)
Neck:
name: "VehicleNeck"
in_channels: 2048
out_channels: 512
# 增加CircleMargin head
Head:
name: "CircleMargin"
margin: 0.35
scale: 64
embedding_size: 512
```
## Loss的设置
在Logo识别中,使用了[Pairwise Cosface + CircleMargin](https://arxiv.org/abs/2002.10857) 联合训练,其中权重比例为1:1
具体代码详见:[PairwiseCosface](../../../ppcls/loss/pairwisecosface.py)[CircleMargin](../../../ppcls/arch/gears/circlemargin.py)
在配置文件中设置如下:
```yaml
Loss:
Train:
- CELoss:
weight: 1.0
- PairwiseCosface:
margin: 0.35
gamma: 64
weight: 1.0
Eval:
- CELoss:
weight: 1.0
```
## 其他相关设置
### Optimizer设置
```yaml
Optimizer:
# 使用的优化器名称
name: Momentum
# 优化器具体参数
momentum: 0.9
lr:
# 使用的学习率调节具体名称
name: Cosine
# 学习率调节算法具体参数
learning_rate: 0.01
regularizer:
name: 'L2'
coeff: 0.0001
```
### Eval Metric设置
```yaml
Metric:
Eval:
# 使用Recallk和mAP两种评价指标
- Recallk:
topk: [1, 5]
- mAP: {}
```
### 其他超参数设置
```yaml
Global:
# 如为null则从头开始训练。若指定中间训练保存的状态地址,则继续训练
checkpoints: null
pretrained_model: null
output_dir: "./output/"
device: "gpu"
class_num: 3000
# 保存模型的粒度,每个epoch保存一次
save_interval: 1
eval_during_train: True
eval_interval: 1
# 训练的epoch数
epochs: 120
# log输出频率
print_batch_step: 10
# 是否使用visualdl库
use_visualdl: False
# used for static mode and model export
image_shape: [3, 224, 224]
save_inference_dir: "./inference"
# 使用retrival的方式进行评测
eval_mode: "retrieval"
```
......@@ -128,8 +128,8 @@ ResNet系列模型中,相比于其他模型,ResNet_vd模型在预测速度
**A**
* 对于单张图像的增广,可以参考[基于单张图片的数据增广脚本](../../../ppcls/data/imaug/operators.py),参考`ResizeImage`或者`CropImage`等数据算子的写法,创建一个新的类,然后在`__call__`中,实现对应的增广方法即可。
* 对于一个batch图像的增广,可以参考[基于batch数据的数据增广脚本](../../../ppcls/data/imaug/batch_operators.py),参考`MixupOperator`或者`CutmixOperator`等数据算子的写法,创建一个新的类,然后在`__call__`中,实现对应的增广方法即可。
* 对于单张图像的增广,可以参考[基于单张图片的数据增广脚本](../../../ppcls/data/preprocess/ops),参考`ResizeImage`或者`CropImage`等数据算子的写法,创建一个新的类,然后在`__call__`中,实现对应的增广方法即可。
* 对于一个batch图像的增广,可以参考[基于batch数据的数据增广脚本](../../../ppcls/data/preprocess/batch_ops),参考`MixupOperator`或者`CutmixOperator`等数据算子的写法,创建一个新的类,然后在`__call__`中,实现对应的增广方法即可。
## Q3.5: 怎么进一步加速模型训练过程呢?
......
# DLA系列
## 概述
DLA (Deep Layer Aggregation)。 视觉识别需要丰富的表示形式,其范围从低到高,范围从小到大,分辨率从精细到粗糙。即使卷积网络中的要素深度很深,仅靠隔离层还是不够的:将这些表示法进行复合和聚合可改善对内容和位置的推断。尽管已合并了残差连接以组合各层,但是这些连接本身是“浅”的,并且只能通过简单的一步操作来融合。作者通过更深层的聚合来增强标准体系结构,以更好地融合各层的信息。Deep Layer Aggregation 结构迭代地和分层地合并了特征层次结构,以使网络具有更高的准确性和更少的参数。跨体系结构和任务的实验表明,与现有的分支和合并方案相比,Deep Layer Aggregation 可提高识别和分辨率。[论文地址](https://arxiv.org/abs/1707.06484)
## 精度、FLOPS和参数量
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:-----------------:|:----------:|:---------:|:---------:|:---------:|
| DLA34 | 15.8 | 3.1 | 76.03 | 92.98 |
| DLA46_c | 1.3 | 0.5 | 63.21 | 85.30 |
| DLA46x_c | 1.1 | 0.5 | 64.36 | 86.01 |
| DLA60 | 22.0 | 4.2 | 76.10 | 92.92 |
| DLA60x | 17.4 | 3.5 | 77.53 | 93.78 |
| DLA60x_c | 1.3 | 0.6 | 66.45 | 87.54 |
| DLA102 | 33.3 | 7.2 | 78.93 | 94.52 |
| DLA102x | 26.4 | 5.9 | 78.10 | 94.00 |
| DLA102x2 | 41.4 | 9.3 | 78.85 | 94.45 |
| DLA169 | 53.5 | 11.6 | 78.09 | 94.09 |
\ No newline at end of file
# HarDNet系列
## 概述
HarDNet(Harmonic DenseNet)是 2019 年由国立清华大学提出的一种全新的神经网络,在低 MAC 和内存流量的条件下实现了高效率。与 FC-DenseNet-103,DenseNet-264,ResNet-50,ResNet-152 和SSD-VGG 相比,新网络的推理时间减少了 35%,36%,30%,32% 和 45%。我们使用了包括Nvidia Profiler 和 ARM Scale-Sim 在内的工具来测量内存流量,并验证推理延迟确实与内存流量消耗成正比,并且所提议的网络消耗的内存流量很低。[论文地址](https://arxiv.org/abs/1909.00948)
## 精度、FLOPS和参数量
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:---------------------:|:----------:|:---------:|:---------:|:---------:|
| HarDNet68 | 17.6 | 4.3 | 75.46 | 92.65 |
| HarDNet85 | 36.7 | 9.1 | 77.44 | 93.55 |
| HarDNet39_ds | 3.5 | 0.4 | 71.33 | 89.98 |
| HarDNet68_ds | 4.2 | 0.8 | 73.62 | 91.52 |
# LeViT
## 概述
LeViT是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能,因此能够更好地反映普遍应用的真实场景。通过大量实验,作者找到了卷积神经网络与Transformer体系更好的结合方式,并且提出了attention-based方法,用于整合Transformer中的位置信息编码。[论文地址](https://arxiv.org/abs/2104.01136)
## 精度、FLOPS和参数量
| Models | Top1 | Top5 | Reference<br>top1 | Reference<br>top5 | FLOPS<br>(M) | Params<br>(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
| LeViT-128S | 0.7598 | 0.9269 | 0.766 | 0.929 | 305 | 7.8 |
| LeViT-128 | 0.7810 | 0.9371 | 0.786 | 0.940 | 406 | 9.2 |
| LeViT-192 | 0.7934 | 0.9446 | 0.800 | 0.947 | 658 | 11 |
| LeViT-256 | 0.8085 | 0.9497 | 0.816 | 0.954 | 1120 | 19 |
| LeViT-384 | 0.8191 | 0.9551 | 0.826 | 0.960 | 2353 | 39 |
**注**:与Reference的精度差异源于数据预处理不同及未使用蒸馏的head作为输出。
# RedNet系列
## 概述
在 ResNet 的 Backbone 和 Backbone 的所有 Bottleneck 位置上使用 Involution 替换掉了卷积,但保留了所有的卷积用于通道映射和融合。这些精心重新设计的实体联合起来,形成了一种新的高效 Backbone 网络,称为 RedNet。[论文地址](https://arxiv.org/abs/2103.06255)
## 精度、FLOPS和参数量
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:---------------------:|:----------:|:---------:|:---------:|:---------:|
| RedNet26 | 9.2 | 1.7 | 75.95 | 93.19 |
| RedNet38 | 12.4 | 2.2 | 77.47 | 93.56 |
| RedNet50 | 15.5 | 2.7 | 78.33 | 94.17 |
| RedNet101 | 25.7 | 4.7 | 78.94 | 94.36 |
| RedNet152 | 34.0 | 6.8 | 79.17 | 94.40 |
\ No newline at end of file
# TNT系列
## 概述
TNT(Transformer-iN-Transformer)系列模型由华为诺亚于2021年提出,用于对 patch 级别和 pixel 级别的表示进行建模。在每个 TNT 块中,outer transformer block 用于处理 patch 嵌入,inner transformer block 从 pixel 嵌入中提取局部特征。通过线性变换层将 pixel 级特征投影到 patch 嵌入空间,然后加入到 patch 中。通过对 TNT 块的叠加,建立了用于图像识别的 TNT 模型。在ImageNet 基准测试和下游任务上的实验证明了该 TNT 体系结构的优越性和有效性。例如,在计算量相当的情况下 TNT 能在 ImageNet 上达到 81.3% 的 top-1 精度,比 DeiT 高 1.5%。[论文地址](https://arxiv.org/abs/2103.00112)
## 精度、FLOPS和参数量
| Model | Params (M) | FLOPs (G) | Top-1 (%) | Top-5 (%) |
|:---------------------:|:----------:|:---------:|:---------:|:---------:|
| TNT_small | 23.8 | 5.2 | 81.21 | 95.63 |
# Twins
## 概述
Twins网络包括Twins-PCPVT和Twins-SVT,其重点对空间注意力机制进行了精心设计,得到了简单却更为有效的方案。由于该体系结构仅涉及矩阵乘法,而目前的深度学习框架中对矩阵乘法有较高的优化程度,因此该体系结构十分高效且易于实现。并且,该体系结构在图像分类、目标检测和语义分割等多种下游视觉任务中都能够取得优异的性能。[论文地址](https://arxiv.org/abs/2104.13840)
## 精度、FLOPS和参数量
| Models | Top1 | Top5 | Reference<br>top1 | Reference<br>top5 | FLOPS<br>(G) | Params<br>(M) |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
| pcpvt_small | 0.8082 | 0.9552 | 0.812 | - | 3.7 | 24.1 |
| pcpvt_base | 0.8242 | 0.9619 | 0.827 | - | 6.4 | 43.8 |
| pcpvt_large | 0.8273 | 0.9650 | 0.831 | - | 9.5 | 60.9 |
| alt_gvt_small | 0.8140 | 0.9546 | 0.817 | - | 2.8 | 24 |
| alt_gvt_base | 0.8294 | 0.9621 | 0.832 | - | 8.3 | 56 |
| alt_gvt_large | 0.8331 | 0.9642 | 0.837 | - | 14.8 | 99.2 |
**注**:与Reference的精度差异源于数据预处理不同。
# 图像识别快速开始
图像识别主要包含3个部分:主体检测得到检测框、识别提取特征、根据特征进行检索。
## 1. 环境配置
* 请先参考[快速安装](./installation.md)配置PaddleClas运行环境。
注意:
**本部分内容需要在`deploy`文件夹下运行,在PaddleClas代码的根目录下,可以通过以下方法进入该文件夹**
```shell
cd deploy
```
## 2. inference 模型和数据下载
检测模型与4个方向(Logo、动漫人物、车辆、商品)的识别inference模型以及测试数据下载方法如下。。
| 模型简介 | 推荐场景 | 测试数据地址 | inference模型 |
| ------------ | ------------- | ------- | -------- |
| 通用主体检测模型 | 通用场景 | - |[下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar) |
| Logo识别模型 | Logo场景 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar) |
| 动漫人物识别模型 | 动漫人物场景 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/cartoon_demo_data_v1.0.tar) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/cartoon_rec_ResNet50_iCartoon_v1.0_infer.tar) |
| 车辆细分类模型 | 车辆场景 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/vehicle_demo_data_v1.0.tar) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/vehicle_cls_ResNet50_CompCars_v1.0_infer.tar) |
| 商品识别模型 | 商品场景 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/product_demo_data_v1.0.tar) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_Inshop_v1.0_infer.tar) |
**注意**:windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下
* 下载并解压数据与模型
```shell
mkdir dataset
cd dataset
# 下载demo数据并解压
wget {url/of/data} && tar -xf {name/of/data/package}
cd ..
mkdir models
cd models
# 下载识别inference模型并解压
wget {url/of/inference model} && tar -xf {name/of/inference model/package}
cd ..
```
### 2.1 下载通用检测模型
```shell
mkdir models
cd models
# 下载通用检测inference模型并解压
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar && tar -xf ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar
cd ..
```
### 2.1 Logo识别
以Logo识别demo为例,按照下面的命令下载demo数据与模型。
```shell
mkdir dataset
cd dataset
# 下载demo数据并解压
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/data/logo_demo_data_v1.0.tar && tar -xf logo_demo_data_v1.0.tar
cd ..
mkdir models
cd models
# 下载识别inference模型并解压
wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/logo_rec_ResNet50_Logo3K_v1.0_infer.tar && tar -xf logo_rec_ResNet50_Logo3K_v1.0_infer.tar
cd ..
```
解压完毕后,`dataset`文件夹下应有如下文件结构:
```
├── logo_demo_data_v1.0
│ ├── data_file.txt
│ ├── gallery
│ ├── index
│ └── query
├── ...
```
`models`文件夹下应有如下文件结构:
```
├── logo_rec_ResNet50_Logo3K_v1.0_infer
│ ├── inference.pdiparams
│ ├── inference.pdiparams.info
│ └── inference.pdmodel
├── ppyolov2_r50vd_dcn_mainbody_v1.0_infer
│ ├── inference.pdiparams
│ ├── inference.pdiparams.info
│ └── inference.pdmodel
```
按照下面的方式可以完成对于图片的检索
```shell
python3.7 python/predict_system.py -c configs/inference_logo.yaml
```
配置文件中,部分关键字段解释如下
```yaml
Global:
infer_imgs: "./dataset/logo_demo_data_v1.0/query/" # 预测图像
det_inference_model_dir: "./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer/" # 检测inference模型文件夹
rec_inference_model_dir: "./models/logo_rec_ResNet50_Logo3K_v1.0_infer/" # 识别inference模型文件夹
batch_size: 1 # 预测的批大小
image_shape: [3, 640, 640] # 检测的图像尺寸
threshold: 0.5 # 检测的阈值,得分超过该阈值的检测框才会被检出
max_det_results: 1 # 用于图像识别的检测框数量,符合阈值条件的检测框中,根据得分,最多对其中的max_det_results个检测框做后续的识别
# indexing engine config
IndexProcess:
index_path: "./dataset/logo_demo_data_v1.0/index/" # 索引文件夹,用于识别特征提取之后的索引
search_budget: 100
return_k: 5 # 从底库中反馈return_k个数量的最相似内容
dist_type: "IP"
```
最终输出结果如下
```
[{'bbox': [25, 21, 483, 382], 'rec_docs': ['AKG', 'AKG', 'AKG', 'AKG', 'AKG'], 'rec_scores': array([2.32288337, 2.31903863, 2.28398442, 2.16804123, 2.10190272])}]
```
其中bbox表示检测出的主体所在位置,rec_docs表示底库中与检出主体最相近的若干张图像对应的标签,rec_scores表示对应的相似度。
如果希望预测文件夹内的图像,可以直接修改配置文件,也可以通过下面的`-o`参数修改对应的配置。
```shell
python3.7 python/predict_system.py -c configs/inference_logo.yaml -o Global.infer_imgs="./dataset/logo_demo_data_v1.0/query"
```
如果希望在底库中新增图像,重新构建idnex,可以使用下面的命令重新构建index。
```shell
python3.7 python/build_gallery.py -c configs/build_logo.yaml
```
其中index相关配置如下。
```yaml
# indexing engine config
IndexProcess:
index_path: "./dataset/logo_demo_data_v1.0/index/" # 保存的索引地址
image_root: "./dataset/logo_demo_data_v1.0/" # 图像的根目录
data_file: "./dataset/logo_demo_data_v1.0/data_file.txt" # 图像的数据list文本,每一行包含图像的文件名与标签信息
delimiter: "\t"
dist_type: "IP"
pq_size: 100
embedding_size: 512 # 特征维度
```
需要改动的内容为:
1. 在图像根目录下面添加对应的图像内容(也可以在其子文件夹下面,保证最终根目录与数据list文本中添加的文件名合并之后,图像存在即可)
2. 图像的数据list文本中添加图像新的内容,每行包含图像文件名以及对应的标签信息。
### 2.2 其他任务的识别
如果希望尝试其他方向的识别与检索效果,在下载解压好对应的demo数据与模型之后,替换对应的配置文件即可完成预测。
| 场景 | 预测配置文件 | 构建底库的配置文件 |
| ---- | ----- | ----- |
| 动漫人物 | [inference_cartoon.yaml](../../../deploy/configs/inference_cartoon.yaml) | [build_cartoon.yaml](../../../deploy/configs/build_cartoon.yaml) |
| 车辆 | [inference_vehicle.yaml](../../../deploy/configs/inference_vehicle.yaml) | [build_vehicle.yaml](../../../deploy/configs/build_vehicle.yaml) |
| 商品 | [inference_inshop.yaml](../../../deploy/configs/) | [build_inshop.yaml](../../../deploy/configs/build_inshop.yaml) |
......@@ -21,8 +21,9 @@ from . import backbone, gears
from .backbone import *
from .gears import build_gear
from .utils import *
from ppcls.utils.save_load import load_dygraph_pretrain
__all__ = ["build_model", "RecModel"]
__all__ = ["build_model", "RecModel", "DistillationModel"]
def build_model(config):
......@@ -62,3 +63,48 @@ class RecModel(nn.Layer):
else:
y = None
return {"features": x, "logits": y}
class DistillationModel(nn.Layer):
def __init__(self,
models=None,
pretrained_list=None,
freeze_params_list=None,
**kargs):
super().__init__()
assert isinstance(models, list)
self.model_list = []
self.model_name_list = []
if pretrained_list is not None:
assert len(pretrained_list) == len(models)
if freeze_params_list is None:
freeze_params_list = [False] * len(models)
assert len(freeze_params_list) == len(models)
for idx, model_config in enumerate(models):
assert len(model_config) == 1
key = list(model_config.keys())[0]
model_config = model_config[key]
model_name = model_config.pop("name")
model = eval(model_name)(**model_config)
if freeze_params_list[idx]:
for param in model.parameters():
param.trainable = False
self.model_list.append(self.add_sublayer(key, model))
self.model_name_list.append(key)
if pretrained_list is not None:
for idx, pretrained in enumerate(pretrained_list):
if pretrained is not None:
load_dygraph_pretrain(
self.model_name_list[idx], path=pretrained)
def forward(self, x, label=None):
result_dict = dict()
for idx, model_name in enumerate(self.model_name_list):
if label is None:
result_dict[model_name] = self.model_list[idx](x)
else:
result_dict[model_name] = self.model_list[idx](x, label)
return result_dict
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -19,11 +19,12 @@ from ppcls.arch.backbone.legendary_models.vgg import VGG11, VGG13, VGG16, VGG19
from ppcls.arch.backbone.legendary_models.inception_v3 import InceptionV3
from ppcls.arch.backbone.legendary_models.hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W64_C
from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet18_vc, ResNet34_vc, ResNet50_vc, ResNet101_vc, ResNet152_vc
from ppcls.arch.backbone.model_zoo.resnet_vc import ResNet50_vc
from ppcls.arch.backbone.model_zoo.resnext import ResNeXt50_32x4d, ResNeXt50_64x4d, ResNeXt101_32x4d, ResNeXt101_64x4d, ResNeXt152_32x4d, ResNeXt152_64x4d
from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2Net50_48w_2s, Res2Net50_26w_6s, Res2Net50_26w_8s, Res2Net101_26w_4s, Res2Net152_26w_4s, Res2Net200_26w_4s
from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_48w_2s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s
from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd, SE_ResNet101_vd, SE_ResNet152_vd, SE_ResNet200_vd
from ppcls.arch.backbone.model_zoo.resnext_vd import ResNeXt50_vd_32x4d, ResNeXt50_vd_64x4d, ResNeXt101_vd_32x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_32x4d, ResNeXt152_vd_64x4d
from ppcls.arch.backbone.model_zoo.res2net import Res2Net50_26w_4s, Res2Net50_14w_8s
from ppcls.arch.backbone.model_zoo.res2net_vd import Res2Net50_vd_26w_4s, Res2Net101_vd_26w_4s, Res2Net200_vd_26w_4s
from ppcls.arch.backbone.model_zoo.se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd
from ppcls.arch.backbone.model_zoo.se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt50_vd_32x4d, SENet154_vd
from ppcls.arch.backbone.model_zoo.se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_64x4d
from ppcls.arch.backbone.model_zoo.dpn import DPN68, DPN92, DPN98, DPN107, DPN131
......@@ -33,10 +34,11 @@ from ppcls.arch.backbone.model_zoo.resnest import ResNeSt50_fast_1s1x64d, ResNeS
from ppcls.arch.backbone.model_zoo.googlenet import GoogLeNet
from ppcls.arch.backbone.model_zoo.mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0
from ppcls.arch.backbone.model_zoo.shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish
from ppcls.arch.backbone.model_zoo.ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
from ppcls.arch.backbone.model_zoo.alexnet import AlexNet
from ppcls.arch.backbone.model_zoo.inception_v4 import InceptionV4
from ppcls.arch.backbone.model_zoo.xception import Xception41, Xception65, Xception71
from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab, Xception71_deeplab
from ppcls.arch.backbone.model_zoo.xception_deeplab import Xception41_deeplab, Xception65_deeplab
from ppcls.arch.backbone.model_zoo.resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl
from ppcls.arch.backbone.model_zoo.squeezenet import SqueezeNet1_0, SqueezeNet1_1
from ppcls.arch.backbone.model_zoo.darknet import DarkNet53
......@@ -47,4 +49,10 @@ from ppcls.arch.backbone.model_zoo.distillation_models import ResNet50_vd_distil
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
from ppcls.arch.backbone.model_zoo.gvt import pcpvt_small, pcpvt_base, pcpvt_large, alt_gvt_small, alt_gvt_base, alt_gvt_large
from ppcls.arch.backbone.model_zoo.levit import LeViT_128S, LeViT_128, LeViT_192, LeViT_256, LeViT_384
from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152
from ppcls.arch.backbone.model_zoo.tnt import TNT_small
from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
......@@ -7,8 +21,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
__all__ = ["AlexNet"]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"AlexNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class ConvPoolLayer(nn.Layer):
def __init__(self,
......@@ -126,7 +143,19 @@ class AlexNetDY(nn.Layer):
x = self._fc8(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def AlexNet(**args):
model = AlexNetDY(**args)
def AlexNet(pretrained=False, use_ssld=False, **kwargs):
model = AlexNetDY(**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["AlexNet"], use_ssld=use_ssld)
return model
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import ParamAttr
import paddle.nn as nn
......@@ -7,8 +21,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
__all__ = ["DarkNet53"]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"DarkNet53": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
def __init__(self,
......@@ -155,7 +172,19 @@ class DarkNet(nn.Layer):
x = self._out(x)
return x
def DarkNet53(**args):
model = DarkNet(**args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DarkNet53(pretrained=False, use_ssld=False, **kwargs):
model = DarkNet(**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DarkNet53"], use_ssld=use_ssld)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -26,9 +26,16 @@ from paddle.nn.initializer import Uniform
import math
__all__ = [
"DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", "DenseNet264"
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"DenseNet121": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams",
"DenseNet161": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams",
"DenseNet169": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams",
"DenseNet201": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams",
"DenseNet264": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class BNACConvLayer(nn.Layer):
......@@ -282,27 +289,43 @@ class DenseNet(nn.Layer):
y = self.out(y)
return y
def DenseNet121(**args):
model = DenseNet(layers=121, **args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DenseNet121(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=121, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DenseNet121"], use_ssld=use_ssld)
return model
def DenseNet161(**args):
model = DenseNet(layers=161, **args)
def DenseNet161(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=161, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DenseNet161"], use_ssld=use_ssld)
return model
def DenseNet169(**args):
model = DenseNet(layers=169, **args)
def DenseNet169(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=169, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DenseNet169"], use_ssld=use_ssld)
return model
def DenseNet201(**args):
model = DenseNet(layers=201, **args)
def DenseNet201(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=201, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DenseNet201"], use_ssld=use_ssld)
return model
def DenseNet264(**args):
model = DenseNet(layers=264, **args)
def DenseNet264(pretrained=False, use_ssld=False, **kwargs):
model = DenseNet(layers=264, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DenseNet264"], use_ssld=use_ssld)
return model
......@@ -16,12 +16,20 @@ import paddle
import paddle.nn as nn
from .vision_transformer import VisionTransformer, Identity, trunc_normal_, zeros_
__all__ = [
'DeiT_tiny_patch16_224', 'DeiT_small_patch16_224', 'DeiT_base_patch16_224',
'DeiT_tiny_distilled_patch16_224', 'DeiT_small_distilled_patch16_224',
'DeiT_base_distilled_patch16_224', 'DeiT_base_patch16_384',
'DeiT_base_distilled_patch16_384'
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DeiT_tiny_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams",
"DeiT_small_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams",
"DeiT_base_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams",
"DeiT_tiny_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams",
"DeiT_small_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams",
"DeiT_base_distilled_patch16_224": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams",
"DeiT_base_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams",
"DeiT_base_distilled_patch16_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class DistilledVisionTransformer(VisionTransformer):
......@@ -90,7 +98,20 @@ class DistilledVisionTransformer(VisionTransformer):
return (x + x_dist) / 2
def DeiT_tiny_patch16_224(**kwargs):
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DeiT_tiny_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=192,
......@@ -100,10 +121,11 @@ def DeiT_tiny_patch16_224(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_patch16_224"], use_ssld=use_ssld)
return model
def DeiT_small_patch16_224(**kwargs):
def DeiT_small_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=384,
......@@ -113,10 +135,11 @@ def DeiT_small_patch16_224(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_patch16_224"], use_ssld=use_ssld)
return model
def DeiT_base_patch16_224(**kwargs):
def DeiT_base_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=768,
......@@ -126,10 +149,11 @@ def DeiT_base_patch16_224(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_224"], use_ssld=use_ssld)
return model
def DeiT_tiny_distilled_patch16_224(**kwargs):
def DeiT_tiny_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = DistilledVisionTransformer(
patch_size=16,
embed_dim=192,
......@@ -139,10 +163,11 @@ def DeiT_tiny_distilled_patch16_224(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_tiny_distilled_patch16_224"], use_ssld=use_ssld)
return model
def DeiT_small_distilled_patch16_224(**kwargs):
def DeiT_small_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = DistilledVisionTransformer(
patch_size=16,
embed_dim=384,
......@@ -152,10 +177,11 @@ def DeiT_small_distilled_patch16_224(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_small_distilled_patch16_224"], use_ssld=use_ssld)
return model
def DeiT_base_distilled_patch16_224(**kwargs):
def DeiT_base_distilled_patch16_224(pretrained=False, use_ssld=False, **kwargs):
model = DistilledVisionTransformer(
patch_size=16,
embed_dim=768,
......@@ -165,10 +191,11 @@ def DeiT_base_distilled_patch16_224(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_224"], use_ssld=use_ssld)
return model
def DeiT_base_patch16_384(**kwargs):
def DeiT_base_patch16_384(pretrained=False, use_ssld=False, **kwargs):
model = VisionTransformer(
img_size=384,
patch_size=16,
......@@ -179,10 +206,11 @@ def DeiT_base_patch16_384(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_patch16_384"], use_ssld=use_ssld)
return model
def DeiT_base_distilled_patch16_384(**kwargs):
def DeiT_base_distilled_patch16_384(pretrained=False, use_ssld=False, **kwargs):
model = DistilledVisionTransformer(
img_size=384,
patch_size=16,
......@@ -193,4 +221,5 @@ def DeiT_base_distilled_patch16_384(**kwargs):
qkv_bias=True,
epsilon=1e-6,
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DeiT_base_distilled_patch16_384"], use_ssld=use_ssld)
return model
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from ppcls.arch.backbone.base.theseus_layer import Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"DLA34":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams",
"DLA46_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams",
"DLA46x_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams",
"DLA60":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams",
"DLA60x":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams",
"DLA60x_c":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams",
"DLA102":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams",
"DLA102x":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams",
"DLA102x2":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams",
"DLA169":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams"
}
__all__ = MODEL_URLS.keys()
zeros_ = Constant(value=0.)
ones_ = Constant(value=1.)
class DlaBasic(nn.Layer):
def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
super(DlaBasic, self).__init__()
self.conv1 = nn.Conv2D(
inplanes, planes, kernel_size=3, stride=stride,
padding=dilation, bias_attr=False, dilation=dilation
)
self.bn1 = nn.BatchNorm2D(planes)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2D(
planes, planes, kernel_size=3, stride=1,
padding=dilation, bias_attr=False, dilation=dilation
)
self.bn2 = nn.BatchNorm2D(planes)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class DlaBottleneck(nn.Layer):
expansion = 2
def __init__(self, inplanes, outplanes, stride=1,
dilation=1, cardinality=1, base_width=64):
super(DlaBottleneck, self).__init__()
self.stride = stride
mid_planes = int(math.floor(
outplanes * (base_width / 64)) * cardinality)
mid_planes = mid_planes // self.expansion
self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm2D(mid_planes)
self.conv2 = nn.Conv2D(
mid_planes, mid_planes, kernel_size=3,
stride=stride, padding=dilation, bias_attr=False,
dilation=dilation, groups=cardinality
)
self.bn2 = nn.BatchNorm2D(mid_planes)
self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
self.bn3 = nn.BatchNorm2D(outplanes)
self.relu = nn.ReLU()
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class DlaRoot(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(DlaRoot, self).__init__()
self.conv = nn.Conv2D(
in_channels, out_channels, 1, stride=1,
bias_attr=False, padding=(kernel_size - 1) // 2
)
self.bn = nn.BatchNorm2D(out_channels)
self.relu = nn.ReLU()
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(paddle.concat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class DlaTree(nn.Layer):
def __init__(self, levels, block, in_channels, out_channels,
stride=1,dilation=1, cardinality=1, base_width=64,
level_root=False, root_dim=0, root_kernel_size=1,
root_residual=False):
super(DlaTree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
self.downsample = nn.MaxPool2D(
stride, stride=stride) if stride > 1 else Identity()
self.project = Identity()
cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride, **cargs)
self.tree2 = block(out_channels, out_channels, 1, **cargs)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
nn.BatchNorm2D(out_channels))
else:
cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
self.tree1 = DlaTree(
levels - 1, block, in_channels,
out_channels, stride, root_dim=0, **cargs
)
self.tree2 = DlaTree(
levels - 1, block, out_channels,
out_channels, root_dim=root_dim + out_channels, **cargs
)
if levels == 1:
self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.levels = levels
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x)
residual = self.project(bottom)
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Layer):
def __init__(self, levels, channels, in_chans=3, cardinality=1,
base_width=64, block=DlaBottleneck, residual_root=False,
drop_rate=0.0, class_dim=1000, with_pool=True):
super(DLA, self).__init__()
self.channels = channels
self.class_dim = class_dim
self.with_pool = with_pool
self.cardinality = cardinality
self.base_width = base_width
self.drop_rate = drop_rate
self.base_layer = nn.Sequential(
nn.Conv2D(
in_chans, channels[0], kernel_size=7,
stride=1, padding=3, bias_attr=False
),
nn.BatchNorm2D(channels[0]),
nn.ReLU())
self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
cargs = dict(
cardinality=cardinality,
base_width=base_width,
root_residual=residual_root
)
self.level2 = DlaTree(
levels[2], block, channels[1],
channels[2], 2, level_root=False, **cargs
)
self.level3 = DlaTree(
levels[3], block, channels[2],
channels[3], 2, level_root=True, **cargs
)
self.level4 = DlaTree(
levels[4], block, channels[3],
channels[4], 2, level_root=True, **cargs
)
self.level5 = DlaTree(
levels[5], block, channels[4],
channels[5], 2, level_root=True, **cargs
)
self.feature_info = [
# rare to have a meaningful stride 1 level
dict(num_chs=channels[0], reduction=1, module='level0'),
dict(num_chs=channels[1], reduction=2, module='level1'),
dict(num_chs=channels[2], reduction=4, module='level2'),
dict(num_chs=channels[3], reduction=8, module='level3'),
dict(num_chs=channels[4], reduction=16, module='level4'),
dict(num_chs=channels[5], reduction=32, module='level5'),
]
self.num_features = channels[-1]
if with_pool:
self.global_pool = nn.AdaptiveAvgPool2D(1)
if class_dim > 0:
self.fc = nn.Conv2D(self.num_features, class_dim, 1)
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
normal_(m.weight)
elif isinstance(m, nn.BatchNorm2D):
ones_(m.weight)
zeros_(m.bias)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2D(
inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias_attr=False, dilation=dilation
),
nn.BatchNorm2D(planes),
nn.ReLU()])
inplanes = planes
return nn.Sequential(*modules)
def forward_features(self, x):
x = self.base_layer(x)
x = self.level0(x)
x = self.level1(x)
x = self.level2(x)
x = self.level3(x)
x = self.level4(x)
x = self.level5(x)
return x
def forward(self, x):
x = self.forward_features(x)
if self.with_pool:
x = self.global_pool(x)
if self.drop_rate > 0.:
x = F.dropout(x, p=self.drop_rate, training=self.training)
if self.class_dim > 0:
x = self.fc(x)
x = x.flatten(1)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DLA34(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 128, 256, 512),
block=DlaBasic,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
return model
def DLA46_c(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
return model
def DLA46x_c(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 2, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
return model
def DLA60(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
return model
def DLA60x(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
return model
def DLA60x_c(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 2, 3, 1),
channels=(16, 32, 64, 64, 128, 256),
block=DlaBottleneck,
cardinality=32,
base_width=4,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
return model
def DLA102(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
return model
def DLA102x(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=32,
base_width=4,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
return model
def DLA102x2(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 1, 3, 4, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
cardinality=64,
base_width=4,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
return model
def DLA169(pretrained=False, **kwargs):
model = DLA(
levels=(1, 1, 2, 3, 5, 1),
channels=(16, 32, 128, 256, 512, 1024),
block=DlaBottleneck,
residual_root=True,
**kwargs
)
_load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
return model
......@@ -27,14 +27,16 @@ from paddle.nn.initializer import Uniform
import math
__all__ = [
"DPN",
"DPN68",
"DPN92",
"DPN98",
"DPN107",
"DPN131",
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"DPN68": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams",
"DPN92": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams",
"DPN98": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams",
"DPN107": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams",
"DPN131": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -398,28 +400,45 @@ class DPN(nn.Layer):
net_arg['init_padding'] = init_padding
return net_arg
def DPN68(**args):
model = DPN(layers=68, **args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def DPN68(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=68, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN68"])
return model
def DPN92(**args):
model = DPN(layers=92, **args)
def DPN92(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=92, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN92"])
return model
def DPN98(**args):
model = DPN(layers=98, **args)
def DPN98(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=98, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN98"])
return model
def DPN107(**args):
model = DPN(layers=107, **args)
def DPN107(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=107, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN107"])
return model
def DPN131(**args):
model = DPN(layers=131, **args)
return model
def DPN131(pretrained=False, use_ssld=False, **kwargs):
model = DPN(layers=131, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["DPN131"])
return model
\ No newline at end of file
......@@ -9,11 +9,20 @@ import collections
import re
import copy
__all__ = [
'EfficientNet', 'EfficientNetB0_small', 'EfficientNetB0', 'EfficientNetB1',
'EfficientNetB2', 'EfficientNetB3', 'EfficientNetB4', 'EfficientNetB5',
'EfficientNetB6', 'EfficientNetB7'
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"EfficientNetB0_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams",
"EfficientNetB0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams",
"EfficientNetB1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams",
"EfficientNetB2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams",
"EfficientNetB3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams",
"EfficientNetB4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams",
"EfficientNetB5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams",
"EfficientNetB6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams",
"EfficientNetB7": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
GlobalParams = collections.namedtuple('GlobalParams', [
'batch_norm_momentum',
......@@ -783,119 +792,159 @@ class EfficientNet(nn.Layer):
x = self._fc(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def EfficientNetB0_small(padding_type='DYNAMIC',
override_params=None,
use_se=False,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b0',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"])
return model
def EfficientNetB0(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b0',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"])
return model
def EfficientNetB1(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b1',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"])
return model
def EfficientNetB2(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b2',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"])
return model
def EfficientNetB3(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b3',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"])
return model
def EfficientNetB4(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b4',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"])
return model
def EfficientNetB5(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b5',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"])
return model
def EfficientNetB6(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b6',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"])
return model
def EfficientNetB7(padding_type='SAME',
override_params=None,
use_se=True,
**args):
pretrained=False,
use_ssld=False,
**kwargs):
model = EfficientNet(
name='b7',
padding_type=padding_type,
override_params=override_params,
use_se=use_se,
**args)
return model
**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"])
return model
\ No newline at end of file
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -21,7 +21,14 @@ from paddle.nn import Conv2D, BatchNorm, AdaptiveAvgPool2D, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Uniform, KaimingNormal
__all__ = ["GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"GhostNet_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams",
"GhostNet_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams",
"GhostNet_x1_3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -315,17 +322,33 @@ class GhostNet(nn.Layer):
new_v += divisor
return new_v
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def GhostNet_x0_5(**args):
model = GhostNet(scale=0.5)
def GhostNet_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = GhostNet(scale=0.5, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x0_5"], use_ssld=use_ssld)
return model
def GhostNet_x1_0(**args):
model = GhostNet(scale=1.0)
def GhostNet_x1_0(pretrained=False, use_ssld=False, **kwargs):
model = GhostNet(scale=1.0, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_0"], use_ssld=use_ssld)
return model
def GhostNet_x1_3(**args):
model = GhostNet(scale=1.3)
def GhostNet_x1_3(pretrained=False, use_ssld=False, **kwargs):
model = GhostNet(scale=1.3, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["GhostNet_x1_3"], use_ssld=use_ssld)
return model
......@@ -8,7 +8,12 @@ from paddle.nn.initializer import Uniform
import math
__all__ = ['GoogLeNet']
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"GoogLeNet": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
def xavier(channels, filter_size, name):
......@@ -200,8 +205,22 @@ class GoogLeNetDY(nn.Layer):
x = self._drop_o2(x)
out2 = self._out2(x)
return [out, out1, out2]
def GoogLeNet(**args):
model = GoogLeNetDY(**args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def GoogLeNet(pretrained=False, use_ssld=False, **kwargs):
model = GoogLeNetDY(**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["GoogLeNet"], use_ssld=use_ssld)
return model
此差异已折叠。
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
'HarDNet39_ds':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams',
'HarDNet68_ds':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams',
'HarDNet68':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams',
'HarDNet85':
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams'
}
__all__ = MODEL_URLS.keys()
def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
layer = nn.Sequential(
('conv', nn.Conv2D(
in_channels, out_channels, kernel_size=kernel_size,
stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr
)),
('norm', nn.BatchNorm2D(out_channels)),
('relu', nn.ReLU6())
)
return layer
def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
layer = nn.Sequential(
('dwconv', nn.Conv2D(
in_channels, out_channels, kernel_size=kernel_size,
stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr
)),
('norm', nn.BatchNorm2D(out_channels))
)
return layer
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
layer = nn.Sequential(
('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)),
('layer2', DWConvLayer(out_channels, out_channels, stride=stride))
)
return layer
class HarDBlock(nn.Layer):
def __init__(self, in_channels, growth_rate, grmul, n_layers,
keepBase=False, residual_out=False, dwconv=False):
super().__init__()
self.keepBase = keepBase
self.links = []
layers_ = []
self.out_channels = 0 # if upsample else in_channels
for i in range(n_layers):
outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
self.links.append(link)
if dwconv:
layers_.append(CombConvLayer(inch, outch))
else:
layers_.append(ConvLayer(inch, outch))
if (i % 2 == 0) or (i == n_layers - 1):
self.out_channels += outch
# print("Blk out =",self.out_channels)
self.layers = nn.LayerList(layers_)
def get_link(self, layer, base_ch, growth_rate, grmul):
if layer == 0:
return base_ch, 0, []
out_channels = growth_rate
link = []
for i in range(10):
dv = 2 ** i
if layer % dv == 0:
k = layer - dv
link.append(k)
if i > 0:
out_channels *= grmul
out_channels = int(int(out_channels + 1) / 2) * 2
in_channels = 0
for i in link:
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
in_channels += ch
return out_channels, in_channels, link
def forward(self, x):
layers_ = [x]
for layer in range(len(self.layers)):
link = self.links[layer]
tin = []
for i in link:
tin.append(layers_[i])
if len(tin) > 1:
x = paddle.concat(tin, 1)
else:
x = tin[0]
out = self.layers[layer](x)
layers_.append(out)
t = len(layers_)
out_ = []
for i in range(t):
if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1):
out_.append(layers_[i])
out = paddle.concat(out_, 1)
return out
class HarDNet(nn.Layer):
def __init__(self, depth_wise=False, arch=85,
class_dim=1000, with_pool=True):
super().__init__()
first_ch = [32, 64]
second_kernel = 3
max_pool = True
grmul = 1.7
drop_rate = 0.1
# HarDNet68
ch_list = [128, 256, 320, 640, 1024]
gr = [14, 16, 20, 40, 160]
n_layers = [8, 16, 16, 16, 4]
downSamp = [1, 0, 1, 1, 0]
if arch == 85:
# HarDNet85
first_ch = [48, 96]
ch_list = [192, 256, 320, 480, 720, 1280]
gr = [24, 24, 28, 36, 48, 256]
n_layers = [8, 16, 16, 16, 16, 4]
downSamp = [1, 0, 1, 0, 1, 0]
drop_rate = 0.2
elif arch == 39:
# HarDNet39
first_ch = [24, 48]
ch_list = [96, 320, 640, 1024]
grmul = 1.6
gr = [16, 20, 64, 160]
n_layers = [4, 16, 8, 4]
downSamp = [1, 1, 1, 0]
if depth_wise:
second_kernel = 1
max_pool = False
drop_rate = 0.05
blks = len(n_layers)
self.base = nn.LayerList([])
# First Layer: Standard Conv3x3, Stride=2
self.base.append(
ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3,
stride=2, bias_attr=False))
# Second Layer
self.base.append(
ConvLayer(first_ch[0], first_ch[1], kernel_size=second_kernel))
# Maxpooling or DWConv3x3 downsampling
if max_pool:
self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
else:
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
# Build all HarDNet blocks
ch = first_ch[1]
for i in range(blks):
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
ch = blk.out_channels
self.base.append(blk)
if i == blks-1 and arch == 85:
self.base.append(nn.Dropout(0.1))
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
ch = ch_list[i]
if downSamp[i] == 1:
if max_pool:
self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
else:
self.base.append(DWConvLayer(ch, ch, stride=2))
ch = ch_list[blks-1]
layers = []
if with_pool:
layers.append(nn.AdaptiveAvgPool2D((1, 1)))
if class_dim > 0:
layers.append(nn.Flatten())
layers.append(nn.Dropout(drop_rate))
layers.append(nn.Linear(ch, class_dim))
self.base.append(nn.Sequential(*layers))
def forward(self, x):
for layer in self.base:
x = layer(x)
return x
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def HarDNet39_ds(pretrained=False, **kwargs):
model = HarDNet(arch=39, depth_wise=True, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
return model
def HarDNet68_ds(pretrained=False, **kwargs):
model = HarDNet(arch=68, depth_wise=True, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
return model
def HarDNet68(pretrained=False, **kwargs):
model = HarDNet(arch=68, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
return model
def HarDNet85(pretrained=False, **kwargs):
model = HarDNet(arch=85, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
return model
......@@ -27,24 +27,18 @@ from paddle.nn.initializer import Uniform
import math
__all__ = [
"HRNet_W18_C",
"HRNet_W30_C",
"HRNet_W32_C",
"HRNet_W40_C",
"HRNet_W44_C",
"HRNet_W48_C",
"HRNet_W60_C",
"HRNet_W64_C",
"SE_HRNet_W18_C",
"SE_HRNet_W30_C",
"SE_HRNet_W32_C",
"SE_HRNet_W40_C",
"SE_HRNet_W44_C",
"SE_HRNet_W48_C",
"SE_HRNet_W60_C",
"SE_HRNet_W64_C",
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"HRNet_W18_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W18_C_pretrained.pdparams",
"HRNet_W30_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W30_C_pretrained.pdparams",
"HRNet_W32_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W32_C_pretrained.pdparams",
"HRNet_W40_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W40_C_pretrained.pdparams",
"HRNet_W44_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W44_C_pretrained.pdparams",
"HRNet_W48_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W48_C_pretrained.pdparams",
"HRNet_W64_C": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W64_C_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -661,82 +655,62 @@ class HRNet(nn.Layer):
y = self.out(y)
return y
def HRNet_W18_C(**args):
model = HRNet(width=18, **args)
return model
def HRNet_W30_C(**args):
model = HRNet(width=30, **args)
return model
def HRNet_W32_C(**args):
model = HRNet(width=32, **args)
return model
def HRNet_W40_C(**args):
model = HRNet(width=40, **args)
return model
def HRNet_W44_C(**args):
model = HRNet(width=44, **args)
return model
def HRNet_W48_C(**args):
model = HRNet(width=48, **args)
return model
def HRNet_W60_C(**args):
model = HRNet(width=60, **args)
return model
def HRNet_W64_C(**args):
model = HRNet(width=64, **args)
return model
def SE_HRNet_W18_C(**args):
model = HRNet(width=18, has_se=True, **args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def HRNet_W18_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=18, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W18_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W30_C(**args):
model = HRNet(width=30, has_se=True, **args)
def HRNet_W30_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=30, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W30_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W32_C(**args):
model = HRNet(width=32, has_se=True, **args)
def HRNet_W32_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=32, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W32_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W40_C(**args):
model = HRNet(width=40, has_se=True, **args)
def HRNet_W40_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=40, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W40_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W44_C(**args):
model = HRNet(width=44, has_se=True, **args)
def HRNet_W44_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=44, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W44_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W48_C(**args):
model = HRNet(width=48, has_se=True, **args)
def HRNet_W48_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=48, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W48_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W60_C(**args):
model = HRNet(width=60, has_se=True, **args)
def HRNet_W64_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=64, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["HRNet_W64_C"], use_ssld=use_ssld)
return model
def SE_HRNet_W64_C(**args):
model = HRNet(width=64, has_se=True, **args)
def SE_HRNet_W64_C(pretrained=False, use_ssld=False, **kwarg):
model = HRNet(width=64, **kwarg)
_load_pretrained(pretrained, model, MODEL_URLS["SE_HRNet_W64_C"], use_ssld=use_ssld)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -26,7 +26,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
__all__ = ["InceptionV3"]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"InceptionV3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV3_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -425,9 +429,9 @@ class InceptionE(nn.Layer):
return outputs
class InceptionV3(nn.Layer):
class Inception_V3(nn.Layer):
def __init__(self, class_dim=1000):
super(InceptionV3, self).__init__()
super(Inception_V3, self).__init__()
self.inception_a_list = [[192, 256, 288], [32, 64, 64]]
self.inception_c_list = [[768, 768, 768, 768], [128, 160, 160, 192]]
......@@ -472,10 +476,28 @@ class InceptionV3(nn.Layer):
def forward(self, x):
y = self.inception_stem(x)
for inception_block in self.inception_block_list:
y = inception_block(y)
y = inception_block(y)
y = self.gap(y)
y = paddle.reshape(y, shape=[-1, 2048])
y = self.drop(y)
y = self.out(y)
return y
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def InceptionV3(pretrained=False, use_ssld=False, **kwargs):
model = Inception_V3(**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["InceptionV3"], use_ssld=use_ssld)
return model
......@@ -21,7 +21,11 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import Uniform
import math
__all__ = ["InceptionV4"]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"InceptionV4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -450,6 +454,19 @@ class InceptionV4DY(nn.Layer):
return x
def InceptionV4(**args):
model = InceptionV4DY(**args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def InceptionV4(pretrained=False, use_ssld=False, **kwargs):
model = InceptionV4DY(**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
return model
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import itertools
import math
import warnings
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import TruncatedNormal, Constant
from paddle.regularizer import L2Decay
from .vision_transformer import trunc_normal_, zeros_, ones_, Identity
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {
"LeViT_128S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams",
"LeViT_128": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams",
"LeViT_192": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams",
"LeViT_256": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams",
"LeViT_384": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams",
}
__all__ = list(MODEL_URLS.keys())
def cal_attention_biases(attention_biases, attention_bias_idxs):
gather_list = []
attention_bias_t = paddle.transpose(attention_biases, (1, 0))
for idx in attention_bias_idxs:
gather = paddle.gather(attention_bias_t, idx)
gather_list.append(gather)
shape0, shape1 = attention_bias_idxs.shape
return paddle.transpose(paddle.concat(gather_list), (1, 0)).reshape(
(0, shape0, shape1))
class Conv2d_BN(nn.Sequential):
def __init__(self,
a,
b,
ks=1,
stride=1,
pad=0,
dilation=1,
groups=1,
bn_weight_init=1,
resolution=-10000):
super().__init__()
self.add_sublayer(
'c',
nn.Conv2D(
a, b, ks, stride, pad, dilation, groups, bias_attr=False))
bn = nn.BatchNorm2D(b)
ones_(bn.weight)
zeros_(bn.bias)
self.add_sublayer('bn', bn)
class Linear_BN(nn.Sequential):
def __init__(self, a, b, bn_weight_init=1):
super().__init__()
self.add_sublayer('c', nn.Linear(a, b, bias_attr=False))
bn = nn.BatchNorm1D(b)
ones_(bn.weight)
zeros_(bn.bias)
self.add_sublayer('bn', bn)
def forward(self, x):
l, bn = self._sub_layers.values()
x = l(x)
return paddle.reshape(bn(x.flatten(0, 1)), x.shape)
class BN_Linear(nn.Sequential):
def __init__(self, a, b, bias=True, std=0.02):
super().__init__()
self.add_sublayer('bn', nn.BatchNorm1D(a))
l = nn.Linear(a, b, bias_attr=bias)
trunc_normal_(l.weight)
if bias:
zeros_(l.bias)
self.add_sublayer('l', l)
def b16(n, activation, resolution=224):
return nn.Sequential(
Conv2d_BN(
3, n // 8, 3, 2, 1, resolution=resolution),
activation(),
Conv2d_BN(
n // 8, n // 4, 3, 2, 1, resolution=resolution // 2),
activation(),
Conv2d_BN(
n // 4, n // 2, 3, 2, 1, resolution=resolution // 4),
activation(),
Conv2d_BN(
n // 2, n, 3, 2, 1, resolution=resolution // 8))
class Residual(nn.Layer):
def __init__(self, m, drop):
super().__init__()
self.m = m
self.drop = drop
def forward(self, x):
if self.training and self.drop > 0:
return x + self.m(x) * paddle.rand(
x.size(0), 1, 1,
device=x.device).ge_(self.drop).div(1 - self.drop).detach()
else:
return x + self.m(x)
class Attention(nn.Layer):
def __init__(self,
dim,
key_dim,
num_heads=8,
attn_ratio=4,
activation=None,
resolution=14):
super().__init__()
self.num_heads = num_heads
self.scale = key_dim**-0.5
self.key_dim = key_dim
self.nh_kd = nh_kd = key_dim * num_heads
self.d = int(attn_ratio * key_dim)
self.dh = int(attn_ratio * key_dim) * num_heads
self.attn_ratio = attn_ratio
self.h = self.dh + nh_kd * 2
self.qkv = Linear_BN(dim, self.h)
self.proj = nn.Sequential(
activation(), Linear_BN(
self.dh, dim, bn_weight_init=0))
points = list(itertools.product(range(resolution), range(resolution)))
N = len(points)
attention_offsets = {}
idxs = []
for p1 in points:
for p2 in points:
offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1]))
if offset not in attention_offsets:
attention_offsets[offset] = len(attention_offsets)
idxs.append(attention_offsets[offset])
self.attention_biases = self.create_parameter(
shape=(num_heads, len(attention_offsets)),
default_initializer=zeros_,
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
tensor_idxs = paddle.to_tensor(idxs, dtype='int64')
self.register_buffer('attention_bias_idxs',
paddle.reshape(tensor_idxs, [N, N]))
@paddle.no_grad()
def train(self, mode=True):
if mode:
super().train()
else:
super().eval()
if mode and hasattr(self, 'ab'):
del self.ab
else:
self.ab = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
def forward(self, x):
self.training = True
B, N, C = x.shape
qkv = self.qkv(x)
qkv = paddle.reshape(qkv,
[B, N, self.num_heads, self.h // self.num_heads])
q, k, v = paddle.split(
qkv, [self.key_dim, self.key_dim, self.d], axis=3)
q = paddle.transpose(q, perm=[0, 2, 1, 3])
k = paddle.transpose(k, perm=[0, 2, 1, 3])
v = paddle.transpose(v, perm=[0, 2, 1, 3])
k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2])
if self.training:
attention_biases = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
else:
attention_biases = self.ab
attn = ((q @k_transpose) * self.scale + attention_biases)
attn = F.softmax(attn)
x = paddle.transpose(attn @v, perm=[0, 2, 1, 3])
x = paddle.reshape(x, [B, N, self.dh])
x = self.proj(x)
return x
class Subsample(nn.Layer):
def __init__(self, stride, resolution):
super().__init__()
self.stride = stride
self.resolution = resolution
def forward(self, x):
B, N, C = x.shape
x = paddle.reshape(x, [B, self.resolution, self.resolution,
C])[:, ::self.stride, ::self.stride]
x = paddle.reshape(x, [B, -1, C])
return x
class AttentionSubsample(nn.Layer):
def __init__(self,
in_dim,
out_dim,
key_dim,
num_heads=8,
attn_ratio=2,
activation=None,
stride=2,
resolution=14,
resolution_=7):
super().__init__()
self.num_heads = num_heads
self.scale = key_dim**-0.5
self.key_dim = key_dim
self.nh_kd = nh_kd = key_dim * num_heads
self.d = int(attn_ratio * key_dim)
self.dh = int(attn_ratio * key_dim) * self.num_heads
self.attn_ratio = attn_ratio
self.resolution_ = resolution_
self.resolution_2 = resolution_**2
self.training = True
h = self.dh + nh_kd
self.kv = Linear_BN(in_dim, h)
self.q = nn.Sequential(
Subsample(stride, resolution), Linear_BN(in_dim, nh_kd))
self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim))
self.stride = stride
self.resolution = resolution
points = list(itertools.product(range(resolution), range(resolution)))
points_ = list(
itertools.product(range(resolution_), range(resolution_)))
N = len(points)
N_ = len(points_)
attention_offsets = {}
idxs = []
i = 0
j = 0
for p1 in points_:
i += 1
for p2 in points:
j += 1
size = 1
offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2),
abs(p1[1] * stride - p2[1] + (size - 1) / 2))
if offset not in attention_offsets:
attention_offsets[offset] = len(attention_offsets)
idxs.append(attention_offsets[offset])
self.attention_biases = self.create_parameter(
shape=(num_heads, len(attention_offsets)),
default_initializer=zeros_,
attr=paddle.ParamAttr(regularizer=L2Decay(0.0)))
tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64')
self.register_buffer('attention_bias_idxs',
paddle.reshape(tensor_idxs_, [N_, N]))
@paddle.no_grad()
def train(self, mode=True):
if mode:
super().train()
else:
super().eval()
if mode and hasattr(self, 'ab'):
del self.ab
else:
self.ab = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
def forward(self, x):
self.training = True
B, N, C = x.shape
kv = self.kv(x)
kv = paddle.reshape(kv, [B, N, self.num_heads, -1])
k, v = paddle.split(kv, [self.key_dim, self.d], axis=3)
k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC
v = paddle.transpose(v, perm=[0, 2, 1, 3])
q = paddle.reshape(
self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim])
q = paddle.transpose(q, perm=[0, 2, 1, 3])
if self.training:
attention_biases = cal_attention_biases(self.attention_biases,
self.attention_bias_idxs)
else:
attention_biases = self.ab
attn = (q @paddle.transpose(
k, perm=[0, 1, 3, 2])) * self.scale + attention_biases
attn = F.softmax(attn)
x = paddle.reshape(
paddle.transpose(
(attn @v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
x = self.proj(x)
return x
class LeViT(nn.Layer):
""" Vision Transformer with support for patch or hybrid CNN input stage
"""
def __init__(self,
img_size=224,
patch_size=16,
in_chans=3,
class_dim=1000,
embed_dim=[192],
key_dim=[64],
depth=[12],
num_heads=[3],
attn_ratio=[2],
mlp_ratio=[2],
hybrid_backbone=None,
down_ops=[],
attention_activation=nn.Hardswish,
mlp_activation=nn.Hardswish,
distillation=True,
drop_path=0):
super().__init__()
self.class_dim = class_dim
self.num_features = embed_dim[-1]
self.embed_dim = embed_dim
self.distillation = distillation
self.patch_embed = hybrid_backbone
self.blocks = []
down_ops.append([''])
resolution = img_size // patch_size
for i, (ed, kd, dpth, nh, ar, mr, do) in enumerate(
zip(embed_dim, key_dim, depth, num_heads, attn_ratio,
mlp_ratio, down_ops)):
for _ in range(dpth):
self.blocks.append(
Residual(
Attention(
ed,
kd,
nh,
attn_ratio=ar,
activation=attention_activation,
resolution=resolution, ),
drop_path))
if mr > 0:
h = int(ed * mr)
self.blocks.append(
Residual(
nn.Sequential(
Linear_BN(ed, h),
mlp_activation(),
Linear_BN(
h, ed, bn_weight_init=0), ),
drop_path))
if do[0] == 'Subsample':
#('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
resolution_ = (resolution - 1) // do[5] + 1
self.blocks.append(
AttentionSubsample(
*embed_dim[i:i + 2],
key_dim=do[1],
num_heads=do[2],
attn_ratio=do[3],
activation=attention_activation,
stride=do[5],
resolution=resolution,
resolution_=resolution_))
resolution = resolution_
if do[4] > 0: # mlp_ratio
h = int(embed_dim[i + 1] * do[4])
self.blocks.append(
Residual(
nn.Sequential(
Linear_BN(embed_dim[i + 1], h),
mlp_activation(),
Linear_BN(
h, embed_dim[i + 1], bn_weight_init=0), ),
drop_path))
self.blocks = nn.Sequential(*self.blocks)
# Classifier head
self.head = BN_Linear(embed_dim[-1],
class_dim) if class_dim > 0 else Identity()
if distillation:
self.head_dist = BN_Linear(
embed_dim[-1], class_dim) if class_dim > 0 else Identity()
def forward(self, x):
x = self.patch_embed(x)
x = x.flatten(2)
x = paddle.transpose(x, perm=[0, 2, 1])
x = self.blocks(x)
x = x.mean(1)
if self.distillation:
x = self.head(x), self.head_dist(x)
if not self.training:
x = (x[0] + x[1]) / 2
else:
x = self.head(x)
return x
def model_factory(C, D, X, N, drop_path, class_dim, distillation):
embed_dim = [int(x) for x in C.split('_')]
num_heads = [int(x) for x in N.split('_')]
depth = [int(x) for x in X.split('_')]
act = nn.Hardswish
model = LeViT(
patch_size=16,
embed_dim=embed_dim,
num_heads=num_heads,
key_dim=[D] * 3,
depth=depth,
attn_ratio=[2, 2, 2],
mlp_ratio=[2, 2, 2],
down_ops=[
#('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride)
['Subsample', D, embed_dim[0] // D, 4, 2, 2],
['Subsample', D, embed_dim[1] // D, 4, 2, 2],
],
attention_activation=act,
mlp_activation=act,
hybrid_backbone=b16(embed_dim[0], activation=act),
class_dim=class_dim,
drop_path=drop_path,
distillation=distillation)
return model
specification = {
'LeViT_128S': {
'C': '128_256_384',
'D': 16,
'N': '4_6_8',
'X': '2_3_4',
'drop_path': 0
},
'LeViT_128': {
'C': '128_256_384',
'D': 16,
'N': '4_8_12',
'X': '4_4_4',
'drop_path': 0
},
'LeViT_192': {
'C': '192_288_384',
'D': 32,
'N': '3_5_6',
'X': '4_4_4',
'drop_path': 0
},
'LeViT_256': {
'C': '256_384_512',
'D': 32,
'N': '4_6_8',
'X': '4_4_4',
'drop_path': 0
},
'LeViT_384': {
'C': '384_512_768',
'D': 32,
'N': '6_9_12',
'X': '4_4_4',
'drop_path': 0.1
},
}
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def LeViT_128S(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
model = model_factory(
**specification['LeViT_128S'],
class_dim=class_dim,
distillation=distillation)
_load_pretrained(pretrained, model, MODEL_URLS["LeViT_128S"], use_ssld=use_ssld)
return model
def LeViT_128(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
model = model_factory(
**specification['LeViT_128'],
class_dim=class_dim,
distillation=distillation)
_load_pretrained(pretrained, model, MODEL_URLS["LeViT_128"], use_ssld=use_ssld)
return model
def LeViT_192(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
model = model_factory(
**specification['LeViT_192'],
class_dim=class_dim,
distillation=distillation)
_load_pretrained(pretrained, model, MODEL_URLS["LeViT_192"], use_ssld=use_ssld)
return model
def LeViT_256(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
model = model_factory(
**specification['LeViT_256'],
class_dim=class_dim,
distillation=distillation)
_load_pretrained(pretrained, model, MODEL_URLS["LeViT_256"], use_ssld=use_ssld)
return model
def LeViT_384(pretrained=False, use_ssld=False, class_dim=1000, distillation=False, **kwargs):
model = model_factory(
**specification['LeViT_384'],
class_dim=class_dim,
distillation=distillation)
_load_pretrained(pretrained, model, MODEL_URLS["LeViT_384"], use_ssld=use_ssld)
return model
......@@ -17,14 +17,20 @@
https://arxiv.org/abs/1907.09595.
"""
__all__ = ['MixNet_S', 'MixNet_M', 'MixNet_L']
import os
from inspect import isfunction
from functools import reduce
import paddle
import paddle.nn as nn
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"MixNet_S": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams",
"MixNet_M": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams",
"MixNet_L": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class Identity(nn.Layer):
"""
......@@ -755,13 +761,33 @@ def get_mixnet(version, width_scale, model_name=None, **kwargs):
return net
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MixNet_S(pretrained=False, use_ssld=False, **kwargs):
model = InceptionV4DY(**kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["InceptionV4"], use_ssld=use_ssld)
return model
def MixNet_S(**kwargs):
"""
MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
return get_mixnet(
model = get_mixnet(
version="s", width_scale=1.0, model_name="MixNet_S", **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MixNet_S"], use_ssld=use_ssld)
return model
def MixNet_M(**kwargs):
......@@ -769,14 +795,19 @@ def MixNet_M(**kwargs):
MixNet-M model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
return get_mixnet(
model = get_mixnet(
version="m", width_scale=1.0, model_name="MixNet_M", **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MixNet_M"], use_ssld=use_ssld)
return model
def MixNet_L(**kwargs):
"""
MixNet-L model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
MixNet-S model from 'MixConv: Mixed Depthwise Convolutional Kernels,'
https://arxiv.org/abs/1907.09595.
"""
return get_mixnet(
model = get_mixnet(
version="m", width_scale=1.3, model_name="MixNet_L", **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MixNet_L"], use_ssld=use_ssld)
return model
......@@ -26,9 +26,14 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import KaimingNormal
import math
__all__ = [
"MobileNetV1_x0_25", "MobileNetV1_x0_5", "MobileNetV1_x0_75", "MobileNetV1"
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"MobileNetV1_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_25_pretrained.pdparams",
"MobileNetV1_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_5_pretrained.pdparams",
"MobileNetV1_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_75_pretrained.pdparams",
"MobileNetV1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -245,22 +250,39 @@ class MobileNet(nn.Layer):
y = self.out(y)
return y
def MobileNetV1_x0_25(**args):
model = MobileNet(scale=0.25, **args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MobileNetV1_x0_25(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.25, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_25"], use_ssld=use_ssld)
return model
def MobileNetV1_x0_5(**args):
model = MobileNet(scale=0.5, **args)
def MobileNetV1_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.5, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_5"], use_ssld=use_ssld)
return model
def MobileNetV1_x0_75(**args):
model = MobileNet(scale=0.75, **args)
def MobileNetV1_x0_75(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.75, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1_x0_75"], use_ssld=use_ssld)
return model
def MobileNetV1(**args):
model = MobileNet(scale=1.0, **args)
return model
def MobileNetV1(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=1.0, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV1"], use_ssld=use_ssld)
return model
\ No newline at end of file
......@@ -26,10 +26,16 @@ from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
import math
__all__ = [
"MobileNetV2_x0_25", "MobileNetV2_x0_5", "MobileNetV2_x0_75",
"MobileNetV2", "MobileNetV2_x1_5", "MobileNetV2_x2_0"
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"MobileNetV2_x0_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams",
"MobileNetV2_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams",
"MobileNetV2_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams",
"MobileNetV2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams",
"MobileNetV2_x1_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams",
"MobileNetV2_x2_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
class ConvBNLayer(nn.Layer):
......@@ -149,7 +155,7 @@ class InvresiBlocks(nn.Layer):
class MobileNet(nn.Layer):
def __init__(self, class_dim=1000, scale=1.0, prefix_name="", **args):
def __init__(self, class_dim=1000, scale=1.0, prefix_name=""):
super(MobileNet, self).__init__()
self.scale = scale
self.class_dim = class_dim
......@@ -216,33 +222,52 @@ class MobileNet(nn.Layer):
y = paddle.flatten(y, start_axis=1, stop_axis=-1)
y = self.out(y)
return y
def MobileNetV2_x0_25(**args):
model = MobileNet(scale=0.25, **args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MobileNetV2_x0_25(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.25, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_25"], use_ssld=use_ssld)
return model
def MobileNetV2_x0_5(**args):
model = MobileNet(scale=0.5, **args)
def MobileNetV2_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.5, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_5"], use_ssld=use_ssld)
return model
def MobileNetV2_x0_75(**args):
model = MobileNet(scale=0.75, **args)
def MobileNetV2_x0_75(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=0.75, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x0_75"], use_ssld=use_ssld)
return model
def MobileNetV2(**args):
model = MobileNet(scale=1.0, **args)
def MobileNetV2(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=1.0, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2"], use_ssld=use_ssld)
return model
def MobileNetV2_x1_5(**args):
model = MobileNet(scale=1.5, **args)
def MobileNetV2_x1_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=1.5, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x1_5"], use_ssld=use_ssld)
return model
def MobileNetV2_x2_0(**args):
model = MobileNet(scale=2.0, **args)
def MobileNetV2_x2_0(pretrained=False, use_ssld=False, **kwargs):
model = MobileNet(scale=2.0, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV2_x2_0"], use_ssld=use_ssld)
return model
......@@ -28,13 +28,20 @@ from paddle.regularizer import L2Decay
import math
__all__ = [
"MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
"MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
"MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
"MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
"MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
]
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
MODEL_URLS = {"MobileNetV3_small_x0_35": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_35_pretrained.pdparams",
"MobileNetV3_small_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_5_pretrained.pdparams",
"MobileNetV3_small_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_75_pretrained.pdparams",
"MobileNetV3_small_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_0_pretrained.pdparams",
"MobileNetV3_small_x1_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_25_pretrained.pdparams",
"MobileNetV3_large_x0_35": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_35_pretrained.pdparams",
"MobileNetV3_large_x0_5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams",
"MobileNetV3_large_x0_75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_75_pretrained.pdparams",
"MobileNetV3_large_x1_0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams",
"MobileNetV3_large_x1_25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_25_pretrained.pdparams"}
__all__ = list(MODEL_URLS.keys())
def make_divisible(v, divisor=8, min_value=None):
......@@ -308,52 +315,75 @@ class SEModule(nn.Layer):
outputs = hardsigmoid(outputs, slope=0.2, offset=0.5)
return paddle.multiply(x=inputs, y=outputs)
def MobileNetV3_small_x0_35(**args):
model = MobileNetV3(model_name="small", scale=0.35, **args)
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
if pretrained is False:
pass
elif pretrained is True:
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
elif isinstance(pretrained, str):
load_dygraph_pretrain(model, pretrained)
else:
raise RuntimeError(
"pretrained type is not available. Please use `string` or `boolean` type."
)
def MobileNetV3_small_x0_35(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_35"], use_ssld=use_ssld)
return model
def MobileNetV3_small_x0_5(**args):
model = MobileNetV3(model_name="small", scale=0.5, **args)
def MobileNetV3_small_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_5"], use_ssld=use_ssld)
return model
def MobileNetV3_small_x0_75(**args):
model = MobileNetV3(model_name="small", scale=0.75, **args)
def MobileNetV3_small_x0_75(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x0_75"], use_ssld=use_ssld)
return model
def MobileNetV3_small_x1_0(**args):
model = MobileNetV3(model_name="small", scale=1.0, **args)
def MobileNetV3_small_x1_0(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_0"], use_ssld=use_ssld)
return model
def MobileNetV3_small_x1_25(**args):
model = MobileNetV3(model_name="small", scale=1.25, **args)
def MobileNetV3_small_x1_25(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_small_x1_25"], use_ssld=use_ssld)
return model
def MobileNetV3_large_x0_35(**args):
model = MobileNetV3(model_name="large", scale=0.35, **args)
def MobileNetV3_large_x0_35(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_35"], use_ssld=use_ssld)
return model
def MobileNetV3_large_x0_5(**args):
model = MobileNetV3(model_name="large", scale=0.5, **args)
def MobileNetV3_large_x0_5(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_5"], use_ssld=use_ssld)
return model
def MobileNetV3_large_x0_75(**args):
model = MobileNetV3(model_name="large", scale=0.75, **args)
def MobileNetV3_large_x0_75(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x0_75"], use_ssld=use_ssld)
return model
def MobileNetV3_large_x1_0(**args):
model = MobileNetV3(model_name="large", scale=1.0, **args)
def MobileNetV3_large_x1_0(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_0"], use_ssld=use_ssld)
return model
def MobileNetV3_large_x1_25(**args):
model = MobileNetV3(model_name="large", scale=1.25, **args)
def MobileNetV3_large_x1_25(pretrained=False, use_ssld=False, **kwargs):
model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
_load_pretrained(pretrained, model, MODEL_URLS["MobileNetV3_large_x1_25"], use_ssld=use_ssld)
return model
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册