未验证 提交 db8b6e80 编写于 作者: W Wei Shengyu 提交者: GitHub

Merge pull request #1023 from PaddlePaddle/develop

Develop
......@@ -4,5 +4,4 @@ include docs/en/whl_en.md
recursive-include deploy/python predict_cls.py preprocess.py postprocess.py det_preprocess.py
recursive-include deploy/utils get_image_list.py config.py logger.py predictor.py
include ppcls/arch/backbone/__init__.py
recursive-include ppcls/arch/backbone/ *.py
\ No newline at end of file
recursive-include ppcls/ *.py *.txt
\ No newline at end of file
......@@ -24,6 +24,9 @@ PreProcess:
order: ''
- ToCHWImage:
PostProcess:
name: Topk
topk: 5
class_id_map_file: "../ppcls/utils/imagenet1k_label_list.txt"
\ No newline at end of file
main_indicator: Topk
Topk:
topk: 5
class_id_map_file: "../ppcls/utils/imagenet1k_label_list.txt"
SavePreLabel:
save_dir: ./pre_label/
\ No newline at end of file
......@@ -14,6 +14,8 @@
import os
import copy
import shutil
from functools import partial
import importlib
import numpy as np
import paddle
......@@ -23,11 +25,32 @@ import paddle.nn.functional as F
def build_postprocess(config):
if config is None:
return None
config = copy.deepcopy(config)
model_name = config.pop("name")
mod = importlib.import_module(__name__)
postprocess_func = getattr(mod, model_name)(**config)
return postprocess_func
config = copy.deepcopy(config)
main_indicator = config.pop(
"main_indicator") if "main_indicator" in config else None
main_indicator = main_indicator if main_indicator else ""
func_list = []
for func in config:
func_list.append(getattr(mod, func)(**config[func]))
return PostProcesser(func_list, main_indicator)
class PostProcesser(object):
def __init__(self, func_list, main_indicator="Topk"):
self.func_list = func_list
self.main_indicator = main_indicator
def __call__(self, x, image_file=None):
rtn = None
for func in self.func_list:
tmp = func(x, image_file)
if type(func).__name__ in self.main_indicator:
rtn = tmp
return rtn
class Topk(object):
......@@ -82,3 +105,24 @@ class Topk(object):
result["label_names"] = label_name_list
y.append(result)
return y
class SavePreLabel(object):
def __init__(self, save_dir):
if save_dir is None:
raise Exception(
"Please specify save_dir if SavePreLabel specified.")
self.save_dir = partial(os.path.join, save_dir)
def __call__(self, x, file_names=None):
if file_names is None:
return
assert x.shape[0] == len(file_names)
for idx, probs in enumerate(x):
index = probs.argsort(axis=0)[-1].astype("int32")
self.save(index, file_names[idx])
def save(self, id, image_file):
output_dir = self.save_dir(str(id))
os.makedirs(output_dir, exist_ok=True)
shutil.copy(image_file, output_dir)
......@@ -70,7 +70,7 @@ def main(config):
for idx, image_file in enumerate(image_list):
img = cv2.imread(image_file)[:, :, ::-1]
output = cls_predictor.predict(img)
output = cls_predictor.postprocess(output)
output = cls_predictor.postprocess(output, [image_file])
print(output)
return
......
# paddleclas package
# PaddleClas wheel package
## Get started quickly
## 1. Installation
### install package
* installing from pypi
install by pypi
```bash
pip install paddleclas==2.0.3
pip3 install paddleclas==2.2.0
```
build own whl package and install
* build own whl package and install
```bash
python3 setup.py bdist_wheel
pip3 install dist/paddleclas-x.x.x-py3-none-any.whl
pip3 install dist/*
```
### 1. Quick Start
* Assign `image_file='docs/images/whl/demo.jpg'`, Use inference model that Paddle provides `model_name='ResNet50'`
**Here is demo.jpg**
## 2. Quick Start
* Using the `ResNet50` model provided by PaddleClas, the following image(`'docs/images/whl/demo.jpg'`) as an example.
<div align="center">
<img src="../images/whl/demo.jpg" width = "400" />
</div>
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50', top_k=5)
image_file='docs/images/whl/demo.jpg'
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50')
infer_imgs='docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
**Note**: `PaddleClas.predict()` is a `generator`. Therefore you need to use `next()` or `for` call it iteratively. It will perform a prediction by `batch_size` and return the prediction result(s) when called. Examples of returned results are as follows:
```
>>> result
[{'class_ids': array([ 8, 7, 86, 82, 80]), 'scores': array([9.7967714e-01, 2.0280687e-02, 2.7053760e-05, 6.1860351e-06,
2.6378802e-06], dtype=float32), 'label_names': ['hen', 'cock', 'partridge', 'ruffed grouse, partridge, Bonasa umbellus', 'black grouse'], 'filename': 'docs/images/whl/demo.jpg'}
>>> result
[{'class_ids': [8, 7, 136, 80, 84], 'scores': [0.79368, 0.16329, 0.01853, 0.00959, 0.00239], 'label_names': ['hen', 'cock', 'European gallinule, Porphyrio porphyrio', 'black grouse', 'peacock']}]
```
* Using command line interactive programming
* CLI
```bash
paddleclas --model_name=ResNet50 --top_k=5 --image_file='docs/images/whl/demo.jpg'
```
```
>>> result
**********docs/images/whl/demo.jpg**********
filename: docs/images/whl/demo.jpg; class id: 8, 7, 86, 82, 80; scores: 0.9797, 0.0203, 0.0000, 0.0000, 0.0000; label: hen, cock, partridge, ruffed grouse, partridge, Bonasa umbellus, black grouse
Predict complete!
```
### 2. Definition of Parameters
* model_name(str): model's name. If not assigning `model_file`and`params_file`, you can assign this param. If using inference model based on ImageNet1k provided by Paddle, set as default='ResNet50'.
* image_file(str or numpy.ndarray): image's path. Support assigning single local image, internet image and folder containing series of images. Also Support numpy.ndarray, the channel order is [B, G, R].
* use_gpu(bool): Whether to use GPU or not, defalut=False。
* use_tensorrt(bool): whether to open tensorrt or not. Using it can greatly promote predict preformance, default=False.
* is_preprocessed(bool): Assign the image data has been preprocessed or not when the image_file is numpy.ndarray.
* resize_short(int): resize the minima between height and width into resize_short(int), default=256
* resize(int): resize image into resize(int), default=224.
* normalize(bool): whether normalize image or not, default=True.
* batch_size(int): batch number, default=1.
* model_file(str): path of inference.pdmodel. If not assign this param,you need assign `model_name` for downloading.
* params_file(str): path of inference.pdiparams. If not assign this param,you need assign `model_name` for downloading.
* ir_optim(bool): whether enable IR optimization or not, default=True.
* gpu_mem(int): GPU memory usages,default=8000。
* enable_profile(bool): whether enable profile or not,default=False.
* top_k(int): Assign top_k, default=1.
* enable_mkldnn(bool): whether enable MKLDNN or not, default=False.
* cpu_num_threads(int): Assign number of cpu threads, default=10.
* label_name_path(str): Assign path of label_name_dict you use. If using your own training model, you can assign this param. If using inference model based on ImageNet1k provided by Paddle, you may not assign this param.Defaults take ImageNet1k's label name.
paddleclas --model_name=ResNet50 --infer_imgs="docs/images/whl/demo.jpg"
```
```
>>> result
filename: docs/images/whl/demo.jpg, top-5, class_ids: [8, 7, 136, 80, 84], scores: [0.79368, 0.16329, 0.01853, 0.00959, 0.00239], label_names: ['hen', 'cock', 'European gallinule, Porphyrio porphyrio', 'black grouse', 'peacock']
Predict complete!
```
## 3. Definition of Parameters
The following parameters can be specified in Command Line or used as parameters of the constructor when instantiating the PaddleClas object in Python.
* model_name(str): If using inference model based on ImageNet1k provided by Paddle, please specify the model's name by the parameter.
* inference_model_dir(str): Local model files directory, which is valid when `model_name` is not specified. The directory should contain `inference.pdmodel` and `inference.pdiparams`.
* infer_imgs(str): The path of image to be predicted, or the directory containing the image files, or the URL of the image from Internet.
* use_gpu(bool): Whether to use GPU or not, default by `True`.
* gpu_mem(int): GPU memory usages,default by `8000`
* use_tensorrt(bool): Whether to open TensorRT or not. Using it can greatly promote predict preformance, default by `False`.
* enable_mkldnn(bool): Whether enable MKLDNN or not, default `False`.
* cpu_num_threads(int): Assign number of cpu threads, valid when `--use_gpu` is `False` and `--enable_mkldnn` is `True`, default by `10`.
* batch_size(int): Batch size, default by `1`.
* resize_short(int): Resize the minima between height and width into `resize_short`, default by `256`.
* crop_size(int): Center crop image to `crop_size`, default by `224`.
* topk(int): Print (return) the `topk` prediction results, default by `5`.
* class_id_map_file(str): The mapping file between class ID and label, default by `ImageNet1K` dataset's mapping.
* pre_label_image(bool): whether prelabel or not, default=False.
* pre_label_out_idr(str): If prelabeling, the path of output.
* save_dir(str): The directory to save the prediction results that can be used as pre-label, default by `None`, that is, not to save.
**Note**: If you want to use `Transformer series models`, such as `DeiT_***_384`, `ViT_***_384`, etc., please pay attention to the input size of model, and need to set `resize_short=384`, `resize=384` when building a `PaddleClas` object. The following is a demo.
**Note**: If you want to use `Transformer series models`, such as `DeiT_***_384`, `ViT_***_384`, etc., please pay attention to the input size of model, and need to set `resize_short=384`, `resize=384`. The following is a demo.
* Bash:
```bash
paddleclas --model_name=ViT_base_patch16_384 --image_file='docs/images/whl/demo.jpg' --resize_short=384 --resize=384
```
* CLI:
```bash
from paddleclas import PaddleClas, get_default_confg
paddleclas --model_name=ViT_base_patch16_384 --infer_imgs='docs/images/whl/demo.jpg' --resize_short=384 --crop_size=384
```
* Python:
```python
clas = PaddleClas(model_name='ViT_base_patch16_384', top_k=5, resize_short=384, resize=384)
```
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ViT_base_patch16_384', resize_short=384, crop_size=384)
```
## 4. Usage
### 3. Different Usages of Codes
PaddleClas provides two ways to use:
1. Python interative programming;
2. Bash command line programming.
**We provide two ways to use: 1. Python interative programming 2. Bash command line programming**
### 4.1 View help information
* check `help` information
* CLI
```bash
paddleclas -h
```
* Use user-specified model, you need to assign model's path `model_file` and parameters's path`params_file`
### 4.2 Prediction using inference model provide by PaddleClas
You can use the inference model provided by PaddleClas to predict, and only need to specify `model_name`. In this case, PaddleClas will automatically download files of specified model and save them in the directory `~/.paddleclas/`.
###### python
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_file='the path of model file',
params_file='the path of params file')
image_file = 'docs/images/whl/demo.jpg'
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50')
infer_imgs = 'docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_file='user-specified model path' --params_file='parmas path' --image_file='docs/images/whl/demo.jpg'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/whl/demo.jpg'
```
* Use inference model which PaddlePaddle provides to predict, you need to choose one of model proviede by PaddleClas to assign `model_name`. So there's no need to assign `model_file`. And the model you chosen will be download in `~/.paddleclas/`, which will be saved in folder named by `model_name`.
###### python
### 4.3 Prediction using local model files
You can use the local model files trained by yourself to predict, and only need to specify `inference_model_dir`. Note that the directory must contain `inference.pdmodel` and `inference.pdiparams`.
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = 'docs/images/whl/demo.jpg'
result=clas.predict(image_file)
print(result)
clas = PaddleClas(inference_model_dir='./inference/')
infer_imgs = 'docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/demo.jpg'
paddleclas --inference_model_dir='./inference/' --infer_imgs='docs/images/whl/demo.jpg'
```
* You can assign input as format `numpy.ndarray` which has been preprocessed `image_file=np.ndarray`. Note that the image data must be three channel. If need To preprocess the image, the image channels order must be [B, G, R].
### 4.4 Prediction by batch
You can predict by batch, only need to specify `batch_size` when `infer_imgs` is direcotry contain image files.
###### python
* Python
```python
import cv2
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = cv2.imread("docs/images/whl/demo.jpg")
result=clas.predict(image_file)
clas = PaddleClas(model_name='ResNet50', batch_size=2)
infer_imgs = 'docs/images/'
result=clas.predict(infer_imgs)
for r in result:
print(r)
```
* You can assign `image_file` as a folder path containing series of images.
* CLI
```bash
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/' --batch_size 2
```
###### python
### 4.5 Prediction of Internet image
You can predict the Internet image, only need to specify URL of Internet image by `infer_imgs`. In this case, the image file will be downloaded and saved in the directory `~/.paddleclas/images/`.
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
infer_imgs = 'https://raw.githubusercontent.com/paddlepaddle/paddleclas/release/2.2/docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/'
paddleclas --model_name='ResNet50' --infer_imgs='https://raw.githubusercontent.com/paddlepaddle/paddleclas/release/2.2/docs/images/whl/demo.jpg'
```
* You can assign `--pre_label_image=True`, `--pre_label_out_idr= './output_pre_label/'`. Then images will be copied into folder named by top-1 class_id.
###### python
### 4.6 Prediction of NumPy.array format image
In Python code, you can predict the NumPy.array format image, only need to use the `infer_imgs` to transfer variable of image data. Note that the image data must be 3 channels.
* python
```python
import cv2
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50', pre_label_image=True, pre_label_out_idr='./output_pre_label/')
image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50')
infer_imgs = cv2.imread("docs/images/whl/demo.jpg")
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
### 4.7 Save the prediction result(s)
You can save the prediction result(s) as pre-label, only need to use `pre_label_out_dir` to specify the directory to save.
* python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50', save_dir='./output_pre_label/')
infer_imgs = 'docs/images/whl/' # it can be infer_imgs folder path which contains all of images you want to predict.
result=clas.predict(infer_imgs)
print(next(result))
```
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' --pre_label_image=True --pre_label_out_idr='./output_pre_label/'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/whl/' --save_dir='./output_pre_label/'
```
* You can assign `--label_name_path` as your own label_dict_file, format should be as(class_id<space>class_name<\n>).
### 4.8 Specify the mapping between class id and label name
You can specify the mapping between class id and label name, only need to use `class_id_map_file` to specify the mapping file. PaddleClas uses ImageNet1K's mapping by default.
The content format of mapping file shall be:
```
0 tench, Tinca tinca
1 goldfish, Carassius auratus
2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
......
class_id<space>class_name<\n>
```
* If you use inference model that PaddleClas provides, you do not need assign `label_name_path`. Program will take `ppcls/utils/imagenet1k_label_list.txt` as defaults. If you hope using your own training model, you can provide `label_name_path` outputing 'label_name' and scores, otherwise no 'label_name' in output information.
For example:
###### python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_file= 'the path of model file', params_file = 'the path of params file', label_name_path='./ppcls/utils/imagenet1k_label_list.txt')
image_file = 'docs/images/whl/demo.jpg' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
```
###### bash
```bash
paddleclas --model_file='the path of model file' --params_file='the path of params file' --image_file='docs/images/whl/demo.jpg' --label_name_path='./ppcls/utils/imagenet1k_label_list.txt'
0 tench, Tinca tinca
1 goldfish, Carassius auratus
2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
......
```
###### python
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = 'docs/images/whl/' # it can be directory which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50', class_id_map_file='./ppcls/utils/imagenet1k_label_list.txt')
infer_imgs = 'docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/whl/demo.jpg' --class_id_map_file='./ppcls/utils/imagenet1k_label_list.txt'
```
# paddleclas package使用说明
# PaddleClas wheel package使用说明
## 快速上手
## 1. 安装
### 安装whl包
* pip安装
pip安装
```bash
pip install paddleclas==2.0.3
pip3 install paddleclas==2.2.0
```
本地构建并安装
* 本地构建并安装
```bash
python3 setup.py bdist_wheel
pip3 install dist/paddleclas-x.x.x-py3-none-any.whl # x.x.x是paddleclas的版本号,默认为0.0.0
pip3 install dist/*
```
### 1. 快速开始
* 指定`image_file='docs/images/whl/demo.jpg'`,使用Paddle提供的inference model,`model_name='ResNet50'`, 使用图片`docs/images/whl/demo.jpg`
**下图是使用的demo图片**
## 2. 快速开始
* 使用`ResNet50`模型,以下图(`'docs/images/whl/demo.jpg'`)为例进行说明。
<div align="center">
<img src="../images/whl/demo.jpg" width = "400" />
</div>
* 在Python代码中使用
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50', top_k=5)
image_file='docs/images/whl/demo.jpg'
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50')
infer_imgs='docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
**注意**`PaddleClas.predict()` 为可迭代对象(`generator`),因此需要使用 `next()` 函数或 `for` 循环对其迭代调用。每次调用将以 `batch_size` 为单位进行一次预测,并返回预测结果。返回结果示例如下:
```
>>> result
[{'class_ids': array([ 8, 7, 86, 82, 80]), 'scores': array([9.7967714e-01, 2.0280687e-02, 2.7053760e-05, 6.1860351e-06,
2.6378802e-06], dtype=float32), 'label_names': ['hen', 'cock', 'partridge', 'ruffed grouse, partridge, Bonasa umbellus', 'black grouse'], 'filename': 'docs/images/whl/demo.jpg'}]
>>> result
[{'class_ids': [8, 7, 136, 80, 84], 'scores': [0.79368, 0.16329, 0.01853, 0.00959, 0.00239], 'label_names': ['hen', 'cock', 'European gallinule, Porphyrio porphyrio', 'black grouse', 'peacock']}]
```
* 使用命令行式交互方法。直接获得结果。
```bash
paddleclas --model_name=ResNet50 --top_k=5 --image_file='docs/images/whl/demo.jpg'
```
```
>>> result
**********docs/images/whl/demo.jpg**********
filename: docs/images/whl/demo.jpg; class id: 8, 7, 86, 82, 80; scores: 0.9797, 0.0203, 0.0000, 0.0000, 0.0000; label: hen, cock, partridge, ruffed grouse, partridge, Bonasa umbellus, black grouse
Predict complete!
```
### 2. 参数解释
以下参数可在命令行交互使用时通过参数指定,或在Python代码中实例化PaddleClas对象时作为构造函数的参数使用。
* model_name(str): 模型名称,没有指定自定义的model_file和params_file时,可以指定该参数,使用PaddleClas提供的基于ImageNet1k的inference model,默认值为ResNet50。
* image_file(str or numpy.ndarray): 图像地址,支持指定单一图像的路径或图像的网址进行预测,支持指定包含图像的文件夹路径,支持numpy.ndarray格式的三通道图像数据,且通道顺序为[B, G, R]。
* use_gpu(bool): 是否使用GPU,如果使用,指定为True。默认为False。
* use_tensorrt(bool): 是否开启TensorRT预测,可提升GPU预测性能,需要使用带TensorRT的预测库。当使用TensorRT推理加速,指定为True。默认为False。
* is_preprocessed(bool): 当image_file为numpy.ndarray格式的图像数据时,图像数据是否已经过预处理。如果该参数为True,则不再对image_file数据进行预处理,否则将转换通道顺序后,按照resize_short,resize,normalize参数对图像进行预处理。默认值为False。
* resize_short(int): 将图像的高宽二者中小的值,调整到指定的resize_short值,大的值按比例放大。默认为256。
* resize(int): 将图像裁剪到指定的resize值大小,默认224。
* normalize(bool): 是否对图像数据归一化,默认True。
* batch_size(int): 预测时每个batch的样本数量,默认为1。
* model_file(str): 模型.pdmodel的路径,若不指定该参数,需要指定model_name,获得下载的模型。
* params_file(str): 模型参数.pdiparams的路径,若不指定,则需要指定model_name,以获得下载的模型。
* ir_optim(bool): 是否开启IR优化,默认为True。
* gpu_mem(int): 使用的GPU显存大小,默认为8000。
* enable_profile(bool): 是否开启profile功能,默认False。
* top_k(int): 指定的topk,打印(返回)预测结果的前k个类别和对应的分类概率,默认为1。
* enable_mkldnn(bool): 是否开启MKLDNN,默认False。
* cpu_num_threads(int): 指定cpu线程数,默认设置为10。
* label_name_path(str): 指定一个表示所有的label name的文件路径。当用户使用自己训练的模型,可指定这一参数,打印结果时可以显示图像对应的类名称。若用户使用Paddle提供的inference model,则可不指定该参数,使用imagenet1k的label_name,默认为空字符串。
* pre_label_image(bool): 是否需要进行预标注。
* pre_label_out_idr(str): 进行预标注后,输出结果的文件路径,默认为None。
**注意**: 如果使用`Transformer`系列模型,如`DeiT_***_384`, `ViT_***_384`等,请注意模型的输入数据尺寸,需要设置参数`resize_short=384`, `resize=384`,如下所示:
* 在命令行中使用:
```bash
paddleclas --model_name=ViT_base_patch16_384 --image_file='docs/images/whl/demo.jpg' --resize_short=384 --resize=384
```
* 在python代码中:
```python
clas = PaddleClas(model_name='ViT_base_patch16_384', top_k=5, resize_short=384, resize=384)
```
### 3. 代码使用方法
**提供两种使用方式:1、python交互式编程。2、bash命令行式编程**
* 查看帮助信息
###### bash
* 在命令行中使用
```bash
paddleclas -h
paddleclas --model_name=ResNet50 --infer_imgs="docs/images/whl/demo.jpg"
```
* 用户使用自己指定的模型,需要指定模型路径参数`model_file`和参数`params_file`
```
>>> result
filename: docs/images/whl/demo.jpg, top-5, class_ids: [8, 7, 136, 80, 84], scores: [0.79368, 0.16329, 0.01853, 0.00959, 0.00239], label_names: ['hen', 'cock', 'European gallinule, Porphyrio porphyrio', 'black grouse', 'peacock']
Predict complete!
```
## 3. 参数解释
以下参数可在命令行方式使用中通过参数指定,或在Python代码中实例化PaddleClas对象时作为构造函数的参数使用。
* model_name(str): 模型名称,使用PaddleClas提供的基于ImageNet1k的预训练模型。
* inference_model_dir(str): 本地模型文件目录,当未指定 `model_name` 时该参数有效。该目录下需包含 `inference.pdmodel``inference.pdiparams` 两个模型文件。
* infer_imgs(str): 待预测图片文件路径,或包含图片文件的目录,或网络图片的URL。
* use_gpu(bool): 是否使用GPU,默认为 `True`
* gpu_mem(int): 使用的GPU显存大小,当 `use_gpu``True` 时有效,默认为8000。
* use_tensorrt(bool): 是否开启TensorRT预测,可提升GPU预测性能,需要使用带TensorRT的预测库,默认为 `False`
* enable_mkldnn(bool): 是否开启MKLDNN,当 `use_gpu``False` 时有效,默认 `False`
* cpu_num_threads(int): cpu预测时的线程数,当 `use_gpu``False``enable_mkldnn``True` 时有效,默认值为 `10`
* batch_size(int): 预测时每个batch的样本数量,默认为 `1`
* resize_short(int): 按图像较短边进行等比例缩放,默认为 `256`
* crop_size(int): 将图像裁剪到指定大小,默认为 `224`
* topk(int): 打印(返回)预测结果的前 `topk` 个类别和对应的分类概率,默认为 `5`
* class_id_map_file(str): `class id``label` 的映射关系文件。默认使用 `ImageNet1K` 数据集的映射关系。
* save_dir(str): 将预测结果作为预标注数据保存的路径,默认为 `None`,即不保存。
**注意**: 如果使用`Transformer`系列模型,如`DeiT_***_384`, `ViT_***_384`等,请注意模型的输入数据尺寸,需要设置参数`resize_short=384`, `crop_size=384`,如下所示。
* 命令行中
```bash
from paddleclas import PaddleClas, get_default_confg
paddleclas --model_name=ViT_base_patch16_384 --infer_imgs='docs/images/whl/demo.jpg' --resize_short=384 --crop_size=384
```
###### python
* Python代码中
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_file='the path of model file',
params_file='the path of params file')
image_file = 'docs/images/whl/demo.jpg' # image_file 可指定为前缀是https的网络图片,也可指定为本地图片
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ViT_base_patch16_384', resize_short=384, crop_size=384)
```
###### bash
## 4. 使用示例
PaddleClas提供两种使用方式:
1. Python代码中使用;
2. 命令行中使用。
### 4.1 查看帮助信息
* CLI
```bash
paddleclas --model_file='user-specified model path' --params_file='parmas path' --image_file='docs/images/whl/demo.jpg'
paddleclas -h
```
* 用户使用PaddlePaddle训练好的inference model来预测,并通过参数`model_name`指定。
此时无需指定`model_file`,模型会根据`model_name`自动下载指定模型到当前目录,并保存在目录`~/.paddleclas/`下以`model_name`命名的文件夹中。
###### python
### 4.2 使用PaddleClas提供的预训练模型进行预测
可以使用PaddleClas提供的预训练模型来预测,并通过参数`model_name`指定。此时PaddleClas会根据`model_name`自动下载指定模型,并保存在目录`~/.paddleclas/`下。
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = 'docs/images/whl/demo.jpg' # image_file 可指定为前缀是https的网络图片,也可指定为本地图片
result=clas.predict(image_file)
print(result)
infer_imgs = 'docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/demo.jpg'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/whl/demo.jpg'
```
* 用户可以使用numpy.ndarray格式的图像数据,并通过参数`image_file`指定。注意该图像数据必须为三通道图像数据。如需对图像进行预处理,则图像通道顺序必须为[B, G, R]。
###### python
### 4.3 使用本地模型文件预测
可以使用本地的模型文件进行预测,通过参数`inference_model_dir`指定模型文件目录即可。需要注意,模型文件目录下必须包含`inference.pdmodel``inference.pdiparams`两个文件。
* Python
```python
import cv2
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = cv2.imread("docs/images/whl/demo.jpg")
result=clas.predict(image_file)
clas = PaddleClas(inference_model_dir='./inference/')
infer_imgs = 'docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
* 用户可以将`image_file`指定为包含图片的文件夹路径。
* CLI
```bash
paddleclas --inference_model_dir='./inference/' --infer_imgs='docs/images/whl/demo.jpg'
```
### 4.4 批量预测
当参数 `infer_imgs` 为包含图片文件的目录时,可以对图片进行批量预测,只需通过参数 `batch_size` 指定batch大小。
###### python
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50', batch_size=2)
infer_imgs = 'docs/images/'
result=clas.predict(infer_imgs)
for r in result:
print(r)
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/' --batch_size 2
```
* 用户可以指定`pre_label_image=True`, `pre_label_out_idr='./output_pre_label/'`,将图片按其top1预测结果保存到`pre_label_out_dir`目录下对应类别的文件夹中。
###### python
### 4.5 对网络图片进行预测
可以对网络图片进行预测,只需通过参数`infer_imgs`指定图片`url`。此时图片会下载并保存在`~/.paddleclas/images/`目录下。
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50', pre_label_image=True,pre_label_out_idr='./output_pre_label/')
image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50')
infer_imgs = 'https://raw.githubusercontent.com/paddlepaddle/paddleclas/release/2.2/docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' --pre_label_image=True --pre_label_out_idr='./output_pre_label/'
paddleclas --model_name='ResNet50' --infer_imgs='https://raw.githubusercontent.com/paddlepaddle/paddleclas/release/2.2/docs/images/whl/demo.jpg'
```
* 用户可以通过参数`label_name_path`指定模型的`label_dict_file`文件路径,文件内容格式应为(class_id<space>class_name<\n>),例如:
```
0 tench, Tinca tinca
1 goldfish, Carassius auratus
2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
......
### 4.6 对`NumPy.ndarray`格式数据进行预测
在Python中,可以对`Numpy.ndarray`格式的图像数据进行预测,只需通过参数`infer_imgs`指定即可。注意该图像数据必须为三通道图像数据。
* python
```python
import cv2
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
infer_imgs = cv2.imread("docs/images/whl/demo.jpg")
result=clas.predict(infer_imgs)
print(next(result))
```
* 用户如果使用Paddle提供的inference model,则不需要提供`label_name_path`,会默认使用`ppcls/utils/imagenet1k_label_list.txt`
如果用户希望使用自己的模型,则可以提供`label_name_path`,将label_name与结果一并输出。如果不提供将不会输出label_name信息。
### 4.7 保存预测结果
可以指定参数`pre_label_out_dir='./output_pre_label/'`,将图片按其top1预测结果保存到`pre_label_out_dir`目录下对应类别的文件夹中。
###### python
* python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_file='the path of model file', params_file ='the path of params file', label_name_path='./ppcls/utils/imagenet1k_label_list.txt')
image_file = 'docs/images/whl/demo.jpg' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50', save_dir='./output_pre_label/')
infer_imgs = 'docs/images/whl/' # it can be infer_imgs folder path which contains all of images you want to predict.
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_file='the path of model file' --params_file='the path of params file' --image_file='docs/images/whl/demo.jpg' --label_name_path='./ppcls/utils/imagenet1k_label_list.txt'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/whl/' --save_dir='./output_pre_label/'
```
### 4.8 指定label name
可以通过参数`class_id_map_file`指定`class id``lable`的对应关系。PaddleClas默认使用ImageNet1K的label_name(`ppcls/utils/imagenet1k_label_list.txt`)。
`class_id_map_file`文件内容格式应为:
```
class_id<space>class_name<\n>
```
例如:
```
0 tench, Tinca tinca
1 goldfish, Carassius auratus
2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
......
```
###### python
* Python
```python
from paddleclas import PaddleClas
clas = PaddleClas(model_name='ResNet50')
image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict.
result=clas.predict(image_file)
print(result)
clas = PaddleClas(model_name='ResNet50', class_id_map_file='./ppcls/utils/imagenet1k_label_list.txt')
infer_imgs = 'docs/images/whl/demo.jpg'
result=clas.predict(infer_imgs)
print(next(result))
```
###### bash
* CLI
```bash
paddleclas --model_name='ResNet50' --image_file='docs/images/whl/'
paddleclas --model_name='ResNet50' --infer_imgs='docs/images/whl/demo.jpg' --class_id_map_file='./ppcls/utils/imagenet1k_label_list.txt'
```
......@@ -162,73 +162,128 @@ class InputModelError(Exception):
super().__init__(message)
def args_cfg():
parser = config.parser()
other_options = [
("infer_imgs", str, None, "The image(s) to be predicted."),
("model_name", str, None, "The model name to be used."),
("inference_model_dir", str, None, "The directory of model files."),
("use_gpu", bool, True, "Whether use GPU. Default by True."), (
"enable_mkldnn", bool, False,
"Whether use MKLDNN. Default by False."),
("batch_size", int, 1, "Batch size. Default by 1.")
]
for name, opt_type, default, description in other_options:
parser.add_argument(
"--" + name, type=opt_type, default=default, help=description)
args = parser.parse_args()
for name, opt_type, default, description in other_options:
val = eval("args." + name)
full_name = "Global." + name
args.override.append(
f"{full_name}={val}") if val is not default else None
cfg = config.get_config(
args.config, overrides=args.override, show=args.verbose)
return cfg
def get_default_confg():
return {
def init_config(model_name,
inference_model_dir,
use_gpu=True,
batch_size=1,
topk=5,
**kwargs):
imagenet1k_map_path = os.path.join(
os.path.abspath(__dir__), "ppcls/utils/imagenet1k_label_list.txt")
cfg = {
"Global": {
"model_name": "MobileNetV3_small_x0_35",
"use_gpu": False,
"use_fp16": False,
"enable_mkldnn": False,
"cpu_num_threads": 1,
"use_tensorrt": False,
"ir_optim": False,
"infer_imgs": kwargs["infer_imgs"]
if "infer_imgs" in kwargs else False,
"model_name": model_name,
"inference_model_dir": inference_model_dir,
"batch_size": batch_size,
"use_gpu": use_gpu,
"enable_mkldnn": kwargs["enable_mkldnn"]
if "enable_mkldnn" in kwargs else False,
"cpu_num_threads": kwargs["cpu_num_threads"]
if "cpu_num_threads" in kwargs else 1,
"enable_benchmark": False,
"use_fp16": kwargs["use_fp16"] if "use_fp16" in kwargs else False,
"ir_optim": True,
"use_tensorrt": kwargs["use_tensorrt"]
if "use_tensorrt" in kwargs else False,
"gpu_mem": kwargs["gpu_mem"] if "gpu_mem" in kwargs else 8000,
"enable_profile": False
},
"PreProcess": {
"transform_ops": [{
"ResizeImage": {
"resize_short": 256
"resize_short": kwargs["resize_short"]
if "resize_short" in kwargs else 256
}
}, {
"CropImage": {
"size": 224
"size": kwargs["crop_size"]
if "crop_size" in kwargs else 224
}
}, {
"NormalizeImage": {
"scale": 0.00392157,
"mean": [0.485, 0.456, 0.406],
"std": [0.229, 0.224, 0.225],
"order": ""
"order": ''
}
}, {
"ToCHWImage": None
}]
},
"PostProcess": {
"name": "Topk",
"topk": 5,
"class_id_map_file": "./ppcls/utils/imagenet1k_label_list.txt"
"main_indicator": "Topk",
"Topk": {
"topk": topk,
"class_id_map_file": imagenet1k_map_path
}
}
}
if "save_dir" in kwargs:
if kwargs["save_dir"] is not None:
cfg["PostProcess"]["SavePreLabel"] = {
"save_dir": kwargs["save_dir"]
}
if "class_id_map_file" in kwargs:
if kwargs["class_id_map_file"] is not None:
cfg["PostProcess"]["Topk"]["class_id_map_file"] = kwargs[
"class_id_map_file"]
cfg = config.AttrDict(cfg)
config.create_attr_dict(cfg)
return cfg
def args_cfg():
def str2bool(v):
return v.lower() in ("true", "t", "1")
parser = argparse.ArgumentParser()
parser.add_argument(
"--infer_imgs",
type=str,
required=True,
help="The image(s) to be predicted.")
parser.add_argument(
"--model_name", type=str, help="The model name to be used.")
parser.add_argument(
"--inference_model_dir",
type=str,
help="The directory of model files. Valid when model_name not specifed."
)
parser.add_argument(
"--use_gpu", type=str, default=True, help="Whether use GPU.")
parser.add_argument("--gpu_mem", type=int, default=8000, help="")
parser.add_argument(
"--enable_mkldnn",
type=str2bool,
default=False,
help="Whether use MKLDNN. Valid when use_gpu is False")
parser.add_argument("--cpu_num_threads", type=int, default=1, help="")
parser.add_argument(
"--use_tensorrt", type=str2bool, default=False, help="")
parser.add_argument("--use_fp16", type=str2bool, default=False, help="")
parser.add_argument(
"--batch_size", type=int, default=1, help="Batch size. Default by 1.")
parser.add_argument(
"--topk",
type=int,
default=5,
help="Return topk score(s) and corresponding results. Default by 5.")
parser.add_argument(
"--class_id_map_file",
type=str,
help="The path of file that map class_id and label.")
parser.add_argument(
"--save_dir",
type=str,
help="The directory to save prediction results as pre-label.")
parser.add_argument("--resize_short", type=int, default=256, help="")
parser.add_argument("--crop_size", type=int, default=224, help="")
args = parser.parse_args()
return vars(args)
def print_info():
......@@ -292,7 +347,7 @@ def download_with_progressbar(url, save_path):
if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes or not os.path.isfile(
save_path):
raise Exception(
f"Something went wrong while downloading model/image from {url}")
f"Something went wrong while downloading file from {url}")
def check_model_file(model_name):
......@@ -334,15 +389,6 @@ def check_model_file(model_name):
return storage_directory()
def save_prelabel_results(class_id, input_file_path, output_dir):
"""Save the predicted image according to the prediction.
"""
output_dir = os.path.join(output_dir, str(class_id))
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
shutil.copy(input_file_path, output_dir)
class PaddleClas(object):
"""PaddleClas.
"""
......@@ -350,24 +396,24 @@ class PaddleClas(object):
print_info()
def __init__(self,
config: dict=None,
model_name: str=None,
inference_model_dir: str=None,
use_gpu: bool=None,
batch_size: int=None):
use_gpu: bool=True,
batch_size: int=1,
topk: int=5,
**kwargs):
"""Init PaddleClas with config.
Args:
config: The config of PaddleClas's predictor, default by None. If default, the default configuration is used. Please refer doc for more information.
model_name: The model name supported by PaddleClas, default by None. If specified, override config.
inference_model_dir: The directory that contained model file and params file to be used, default by None. If specified, override config.
use_gpu: Wheather use GPU, default by None. If specified, override config.
batch_size: The batch size to pridict, default by None. If specified, override config.
topk: Return the top k prediction results with the highest score.
"""
super().__init__()
self._config = config
self._check_config(model_name, inference_model_dir, use_gpu,
batch_size)
self._config = init_config(model_name, inference_model_dir, use_gpu,
batch_size, topk, **kwargs)
self._check_input_model()
self.cls_predictor = ClsPredictor(self._config)
......@@ -376,26 +422,6 @@ class PaddleClas(object):
"""
return self._config
def _check_config(self,
model_name=None,
inference_model_dir=None,
use_gpu=None,
batch_size=None):
if self._config is None:
self._config = get_default_confg()
warnings.warn("config is not provided, use default!")
self._config = config.AttrDict(self._config)
config.create_attr_dict(self._config)
if model_name is not None:
self._config.Global["model_name"] = model_name
if inference_model_dir is not None:
self._config.Global["inference_model_dir"] = inference_model_dir
if use_gpu is not None:
self._config.Global["use_gpu"] = use_gpu
if batch_size is not None:
self._config.Global["batch_size"] = batch_size
def _check_input_model(self):
"""Check input model name or model files.
"""
......@@ -407,11 +433,8 @@ class PaddleClas(object):
similar_names = similar_architectures(input_model_name,
candidate_model_names)
similar_names_str = ", ".join(similar_names)
if input_model_name not in similar_names_str:
err = f"{input_model_name} is not exist! Maybe you want: [{similar_names_str}]"
raise InputModelError(err)
if input_model_name not in candidate_model_names:
err = f"{input_model_name} is not provided by PaddleClas. If you want to use your own model, please input model_file as model path!"
err = f"{input_model_name} is not provided by PaddleClas. \nMaybe you want: [{similar_names_str}]. \nIf you want to use your own model, please specify inference_model_dir!"
raise InputModelError(err)
self._config.Global.inference_model_dir = check_model_file(
input_model_name)
......@@ -427,22 +450,29 @@ class PaddleClas(object):
raise InputModelError(err)
return
else:
err = f"Please specify the model name supported by PaddleClas or directory contained model file and params file."
err = f"Please specify the model name supported by PaddleClas or directory contained model files(inference.pdmodel, inference.pdiparams)."
raise InputModelError(err)
return
def predict(self, input_data, print_pred=True):
"""Predict label of img with paddleclas.
def predict(self, input_data, print_pred=False):
"""Predict input_data.
Args:
input_data(str, NumPy.ndarray):
image to be classified, support: str(local path of image file, internet URL, directory containing series of images) and NumPy.ndarray(preprocessed image data that has 3 channels and accords with [C, H, W], or raw image data that has 3 channels and accords with [H, W, C]).
Returns:
dict: {image_name: "", class_id: [], scores: [], label_names: []},if label name path == None,label_names will be empty.
input_data (str | NumPy.array): The path of image, or the directory containing images, or the URL of image from Internet.
print_pred (bool, optional): Wheather print the prediction result. Defaults to False.
Raises:
ImageTypeError: Illegal input_data.
Yields:
list: The prediction result(s) of input_data by batch_size. For every one image, prediction result(s) is zipped as a dict, that includs topk "class_ids", "scores" and "label_names". The format is as follow:
[{"class_ids": [...], "scores": [...], "label_names": [...]}, ...]
"""
if isinstance(input_data, np.ndarray):
return self.cls_predictor.predict(input_data)
outputs = self.cls_predictor.predict(input_data)
yield self.cls_predictor.postprocess(outputs)
elif isinstance(input_data, str):
if input_data.startswith("http"):
if input_data.startswith("http") or input_data.startswith("https"):
image_storage_dir = partial(os.path.join, BASE_IMAGES_DIR)
if not os.path.exists(image_storage_dir()):
os.makedirs(image_storage_dir())
......@@ -455,12 +485,10 @@ class PaddleClas(object):
image_list = get_image_list(input_data)
batch_size = self._config.Global.get("batch_size", 1)
pre_label_out_idr = self._config.Global.get("pre_label_out_idr",
False)
topk = self._config.PostProcess.get('topk', 1)
img_list = []
img_path_list = []
output_list = []
cnt = 0
for idx, img_path in enumerate(image_list):
img = cv2.imread(img_path)
......@@ -475,24 +503,19 @@ class PaddleClas(object):
if cnt % batch_size == 0 or (idx + 1) == len(image_list):
outputs = self.cls_predictor.predict(img_list)
output_list.append(outputs[0])
preds = self.cls_predictor.postprocess(outputs)
for nu, pred in enumerate(preds):
if pre_label_out_idr:
save_prelabel_results(pred["class_ids"][0],
img_path_list[nu],
pre_label_out_idr)
if print_pred:
pred_str_list = [
f"filename: {img_path_list[nu]}",
f"top-{self._config.PostProcess.get('topk', 1)}"
]
for k in pred:
pred_str_list.append(f"{k}: {pred[k]}")
print(", ".join(pred_str_list))
preds = self.cls_predictor.postprocess(outputs,
img_path_list)
if print_pred and preds:
for nu, pred in enumerate(preds):
pred_str = ", ".join(
[f"{k}: {pred[k]}" for k in pred])
print(
f"filename: {img_path_list[nu]}, top-{topk}, {pred_str}"
)
img_list = []
img_path_list = []
return output_list
yield preds
else:
err = "Please input legal image! The type of image supported by PaddleClas are: NumPy.ndarray and string of local path or Ineternet URL"
raise ImageTypeError(err)
......@@ -504,8 +527,11 @@ def main():
"""Function API used for commad line.
"""
cfg = args_cfg()
clas_engine = PaddleClas(cfg)
clas_engine.predict(cfg["Global"]["infer_imgs"], print_pred=True)
clas_engine = PaddleClas(**cfg)
res = clas_engine.predict(cfg["infer_imgs"], print_pred=True)
for _ in res:
pass
print("Predict complete!")
return
......
......@@ -45,12 +45,13 @@ __all__ = list(MODEL_URLS.keys())
def cal_attention_biases(attention_biases, attention_bias_idxs):
gather_list = []
attention_bias_t = paddle.transpose(attention_biases, (1, 0))
for idx in attention_bias_idxs:
gather = paddle.gather(attention_bias_t, idx)
nums = attention_bias_idxs.shape[0]
for idx in range(nums):
gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx])
gather_list.append(gather)
shape0, shape1 = attention_bias_idxs.shape
return paddle.transpose(paddle.concat(gather_list), (1, 0)).reshape(
(0, shape0, shape1))
gather = paddle.concat(gather_list)
return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1))
class Conv2d_BN(nn.Sequential):
......@@ -127,11 +128,12 @@ class Residual(nn.Layer):
def forward(self, x):
if self.training and self.drop > 0:
return x + self.m(x) * paddle.rand(
x.size(0), 1, 1,
device=x.device).ge_(self.drop).div(1 - self.drop).detach()
y = paddle.rand(
shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32")
y = y.divide(paddle.full_like(y, 1 - self.drop))
return paddle.add(x, y)
else:
return x + self.m(x)
return paddle.add(x, self.m(x))
class Attention(nn.Layer):
......@@ -203,9 +205,9 @@ class Attention(nn.Layer):
self.attention_bias_idxs)
else:
attention_biases = self.ab
attn = ((q @k_transpose) * self.scale + attention_biases)
attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases)
attn = F.softmax(attn)
x = paddle.transpose(attn @v, perm=[0, 2, 1, 3])
x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3])
x = paddle.reshape(x, [B, N, self.dh])
x = self.proj(x)
return x
......@@ -219,8 +221,9 @@ class Subsample(nn.Layer):
def forward(self, x):
B, N, C = x.shape
x = paddle.reshape(x, [B, self.resolution, self.resolution,
C])[:, ::self.stride, ::self.stride]
x = paddle.reshape(x, [B, self.resolution, self.resolution, C])
end1, end2 = x.shape[1], x.shape[2]
x = x[:, 0:end1:self.stride, 0:end2:self.stride]
x = paddle.reshape(x, [B, -1, C])
return x
......@@ -315,13 +318,14 @@ class AttentionSubsample(nn.Layer):
else:
attention_biases = self.ab
attn = (q @paddle.transpose(
k, perm=[0, 1, 3, 2])) * self.scale + attention_biases
attn = (paddle.matmul(
q, paddle.transpose(
k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases
attn = F.softmax(attn)
x = paddle.reshape(
paddle.transpose(
(attn @v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh])
x = self.proj(x)
return x
......@@ -422,6 +426,7 @@ class LeViT(nn.Layer):
x = paddle.transpose(x, perm=[0, 2, 1])
x = self.blocks(x)
x = x.mean(1)
x = paddle.reshape(x, [-1, x.shape[-1]])
if self.distillation:
x = self.head(x), self.head_dist(x)
if not self.training:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册