From 8a469799af008c185bbe59d7b67372f6b07ae938 Mon Sep 17 00:00:00 2001 From: Tingquan Gao Date: Fri, 26 Mar 2021 18:52:50 +0800 Subject: [PATCH] support bs>1 (#651) * support bs>1 --- MANIFEST.in | 4 +- deploy/hubserving/clas/module.py | 71 +++-------- deploy/hubserving/clas/test.py | 23 +++- deploy/lite/readme.md | 6 +- deploy/lite/readme_en.md | 2 +- docs/en/tutorials/quick_start_en.md | 7 +- docs/en/whl_en.md | 76 ++++++------ docs/zh_CN/tutorials/quick_start.md | 10 +- docs/zh_CN/whl.md | 87 +++++++------- paddleclas.py | 177 +++++++++++++++------------- tools/infer/infer.py | 90 +++++++------- tools/infer/predict.py | 132 +++++++++++---------- tools/infer/utils.py | 50 +++++--- tools/test_hubserving.py | 142 +++++++++++----------- 14 files changed, 445 insertions(+), 432 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 625d665c..a1e4b146 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,7 @@ include LICENSE.txt include README.md +include docs/en/whl_en.md recursive-include tools/infer utils.py predict.py -recursive-include ppcls/utils imagenet1k_label_list.txt \ No newline at end of file +recursive-include ppcls/utils logger.py +recursive-include ppcls/utils imagenet1k_label_list.txt diff --git a/deploy/hubserving/clas/module.py b/deploy/hubserving/clas/module.py index 96dc2352..113ab14a 100644 --- a/deploy/hubserving/clas/module.py +++ b/deploy/hubserving/clas/module.py @@ -24,8 +24,8 @@ import cv2 import numpy as np import paddle.nn as nn -import tools.infer.predict as paddle_predict -from tools.infer.utils import Base64ToCV2, create_paddle_predictor +from tools.infer.predict import Predictor +from tools.infer.utils import b64_to_np, postprocess from deploy.hubserving.clas.params import read_params @@ -62,65 +62,24 @@ class ClasSystem(nn.Layer): else: print("Use CPU") print("Enable MKL-DNN") if enable_mkldnn else None - self.predictor = create_paddle_predictor(self.args) + self.predictor = Predictor(self.args) - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - img = img[:, :, ::-1] - images.append(img) - return images + def predict(self, batch_input_data, top_k=1): + assert isinstance( + batch_input_data, + np.ndarray), "The input data is inconsistent with expectations." - def predict(self, images=[], paths=[], top_k=1): - """ - - Args: - images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths - paths (list[str]): The paths of images. If paths not images - Returns: - res (list): The result of chinese texts and save path of images. - """ - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError( - "The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - all_results = [] - for img in predicted_data: - if img is None: - logger.info("error in loading image") - all_results.append([]) - continue - - self.args.image_file = img - self.args.top_k = top_k - - starttime = time.time() - classes, scores = paddle_predict.predict(self.args, self.predictor) - elapse = time.time() - starttime - - logger.info("Predict time: {}".format(elapse)) - all_results.append([classes.tolist(), scores.tolist(), elapse]) - return all_results + starttime = time.time() + batch_outputs = self.predictor.predict(batch_input_data) + elapse = time.time() - starttime + batch_result_list = postprocess(batch_outputs, top_k) + return {"prediction": batch_result_list, "elapse": elapse} @serving - def serving_method(self, images, **kwargs): + def serving_method(self, images, revert_params, **kwargs): """ Run as a service. """ - to_cv2 = Base64ToCV2() - images_decode = [to_cv2(image) for image in images] - results = self.predict(images_decode, **kwargs) + input_data = b64_to_np(images, revert_params) + results = self.predict(batch_input_data=input_data, **kwargs) return results diff --git a/deploy/hubserving/clas/test.py b/deploy/hubserving/clas/test.py index 4555f29e..975260ee 100644 --- a/deploy/hubserving/clas/test.py +++ b/deploy/hubserving/clas/test.py @@ -12,11 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import sys +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.abspath(os.path.join(__dir__, '../../../'))) +import argparse +import numpy as np +import cv2 import paddlehub as hub +from tools.infer.utils import preprocess + +args = argparse.Namespace(resize_short=256, resize=224, normalize=True) + +img_path_list = ["./deploy/hubserving/ILSVRC2012_val_00006666.JPEG", ] -image_path = ["./deploy/hubserving/ILSVRC2012_val_00006666.JPEG", ] -top_k = 5 module = hub.Module(name="clas_system") -res = module.predict(paths=image_path, top_k=top_k) -for i, image in enumerate(image_path): - print("The returned result of {}: {}".format(image, res[i])) +for i, img_path in enumerate(img_path_list): + img = cv2.imread(img_path)[:, :, ::-1] + img = preprocess(img, args) + batch_input_data = np.expand_dims(img, axis=0) + res = module.predict(batch_input_data) + print("The returned result of {}: {}".format(img_path, res)) diff --git a/deploy/lite/readme.md b/deploy/lite/readme.md index 6926efed..039a7f6b 100644 --- a/deploy/lite/readme.md +++ b/deploy/lite/readme.md @@ -28,8 +28,10 @@ Paddle Lite是飞桨轻量化推理引擎,为手机、IOT端提供高效推理 |Android|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/Android/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.CV_ON.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/Android/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.CV_ON.tar.gz)| |iOS|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/iOS/inference_lite_lib.ios.armv7.with_extra.CV_ON.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.6.1/iOS/inference_lite_lib.ios64.armv8.with_extra.CV_ON.tar.gz)| - 注:1. 如果是从 Paddle-Lite [官方文档](https://paddle-lite.readthedocs.io/zh/latest/user_guides/release_lib.html#android-toolchain-gcc)下载的预测库, - 注意选择`with_extra=ON,with_cv=ON`的下载链接。2. 如果使用量化的模型部署在端侧,建议使用Paddle-Lite develop分支编译预测库。 + **注**: + 1. 如果是从 Paddle-Lite [官方文档](https://paddle-lite.readthedocs.io/zh/latest/quick_start/release_lib.html#android-toolchain-gcc)下载的预测库, + 注意选择`with_extra=ON,with_cv=ON`的下载链接。 + 2. 如果使用量化的模型部署在端侧,建议使用Paddle-Lite develop分支编译预测库。 2. 编译Paddle-Lite得到预测库,Paddle-Lite的编译方式如下: ```shell diff --git a/deploy/lite/readme_en.md b/deploy/lite/readme_en.md index a7dc383e..167e58ce 100644 --- a/deploy/lite/readme_en.md +++ b/deploy/lite/readme_en.md @@ -31,7 +31,7 @@ For the detailed compilation directions of different development environments, p **NOTE**: -1. If you download the inference library from [Paddle-Lite official document](https://paddle-lite.readthedocs.io/zh/latest/user_guides/release_lib.html#android-toolchain-gcc), please choose `with_extra=ON` , `with_cv=ON` . +1. If you download the inference library from [Paddle-Lite official document](https://paddle-lite.readthedocs.io/zh/latest/quick_start/release_lib.html#android-toolchain-gcc), please choose `with_extra=ON` , `with_cv=ON` . 2. It is recommended to build inference library using [Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) develop branch if you want to deploy the [quantitative](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/slim/quantization/README_en.md) model to mobile phones. Please refer to the [link](https://paddle-lite.readthedocs.io/zh/latest/user_guides/Compile/Android.html#id2) for more detailed information about compiling. diff --git a/docs/en/tutorials/quick_start_en.md b/docs/en/tutorials/quick_start_en.md index 955955fc..9dd86a93 100644 --- a/docs/en/tutorials/quick_start_en.md +++ b/docs/en/tutorials/quick_start_en.md @@ -116,12 +116,7 @@ python3 tools/infer/infer.py \ The output is as follows. Top-5 class ids and their scores are printed. ``` -Current image file: docs/images/quick_start/flowers102/image_06739.jpg - top1, class id: 0, probability: 0.5129 - top2, class id: 50, probability: 0.0671 - top3, class id: 18, probability: 0.0377 - top4, class id: 82, probability: 0.0238 - top5, class id: 54, probability: 0.0231 +File:image_06739.jpg, Top-5 result: class id(s): [0, 96, 18, 50, 51], score(s): [0.79, 0.02, 0.01, 0.01, 0.01] ``` * Note: Results are different for different models, so you might get different results for the command. diff --git a/docs/en/whl_en.md b/docs/en/whl_en.md index d25ea5a2..16d6f694 100644 --- a/docs/en/whl_en.md +++ b/docs/en/whl_en.md @@ -6,7 +6,7 @@ install by pypi ```bash -pip install paddleclas==2.0.0rc2 +pip install paddleclas==2.0.0rc3 ``` build own whl package and install @@ -27,7 +27,7 @@ pip3 install dist/paddleclas-x.x.x-py3-none-any.whl ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False,use_tensorrt=False) +clas = PaddleClas(model_name='ResNet50', top_k=5) image_file='docs/images/whl/demo.jpg' result=clas.predict(image_file) print(result) @@ -35,25 +35,28 @@ print(result) ``` >>> result - [{'filename': '/Users/mac/Downloads/PaddleClas/docs/images/whl/demo.jpg', 'class_ids': [8], 'scores': [0.9796774], 'label_names': ['hen']}] + [{'class_ids': array([ 8, 7, 86, 82, 80]), 'scores': array([9.7967714e-01, 2.0280687e-02, 2.7053760e-05, 6.1860351e-06, + 2.6378802e-06], dtype=float32), 'label_names': ['hen', 'cock', 'partridge', 'ruffed grouse, partridge, Bonasa umbellus', 'black grouse'], 'filename': 'docs/images/whl/demo.jpg'} ``` * Using command line interactive programming ```bash -paddleclas --model_name='ResNet50' --image_file='docs/images/whl/demo.jpg' +paddleclas --model_name=ResNet50 --top_k=5 --image_file='docs/images/whl/demo.jpg' ``` ``` >>> result - **********/Users/mac/Downloads/PaddleClas/docs/images/whl/demo.jpg********** - [{'filename': '/Users/mac/Downloads/PaddleClas/docs/images/whl/demo.jpg', 'class_ids': [8], 'scores': [0.9796774], 'label_names': ['hen']}] + **********docs/images/whl/demo.jpg********** + filename: docs/images/whl/demo.jpg; class id: 8, 7, 86, 82, 80; scores: 0.9797, 0.0203, 0.0000, 0.0000, 0.0000; label: hen, cock, partridge, ruffed grouse, partridge, Bonasa umbellus, black grouse + Predict complete! ``` ### 2. Definition of Parameters * model_name(str): model's name. If not assigning `model_file`and`params_file`, you can assign this param. If using inference model based on ImageNet1k provided by Paddle, set as default='ResNet50'. -* image_file(str): image's path. Support assigning single local image, internet image and folder containing series of images. Also Support numpy.ndarray. +* image_file(str or numpy.ndarray): image's path. Support assigning single local image, internet image and folder containing series of images. Also Support numpy.ndarray, the channel order is [B, G, R]. * use_gpu(bool): Whether to use GPU or not, defalut=False。 * use_tensorrt(bool): whether to open tensorrt or not. Using it can greatly promote predict preformance, default=False. +* is_preprocessed(bool): Assign the image data has been preprocessed or not when the image_file is numpy.ndarray. * resize_short(int): resize the minima between height and width into resize_short(int), default=256 * resize(int): resize image into resize(int), default=224. * normalize(bool): whether normalize image or not, default=True. @@ -84,80 +87,75 @@ paddleclas -h ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_file='user-specified model path', - params_file='parmas path', use_gpu=False, use_tensorrt=False) -image_file = '' +clas = PaddleClas(model_file='the path of model file', + params_file='the path of params file') +image_file = 'docs/images/whl/demo.jpg' result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_file='user-specified model path' --params_file='parmas path' --image_file='image path' +paddleclas --model_file='user-specified model path' --params_file='parmas path' --image_file='docs/images/whl/demo.jpg' ``` -* Use inference model which PaddlePaddle provides to predict, you need to choose one of model when initializing PaddleClas to assign `model_name`. You may not assign `model_file` , and the model you chosen will be download in `BASE_INFERENCE_MODEL_DIR` ,which will be saved in folder named by `model_name`,avoiding overlay different inference model. +* Use inference model which PaddlePaddle provides to predict, you need to choose one of model proviede by PaddleClas to assign `model_name`. So there's no need to assign `model_file`. And the model you chosen will be download in `~/.paddleclas/`, which will be saved in folder named by `model_name`. ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False) -image_file = '' +clas = PaddleClas(model_name='ResNet50') +image_file = 'docs/images/whl/demo.jpg' result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/demo.jpg' ``` -* You can assign input as format`np.ndarray` which has been preprocessed `--image_file=np.ndarray`. +* You can assign input as format `numpy.ndarray` which has been preprocessed `image_file=np.ndarray`. Note that the image data must be three channel. If need To preprocess the image, the image channels order must be [B, G, R]. ###### python ```python +import cv2 from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False) -image_file =np.ndarray # image_file 可指定为前缀是https的网络图片,也可指定为本地图片 +clas = PaddleClas(model_name='ResNet50') +image_file = cv2.imread("docs/images/whl/demo.jpg") result=clas.predict(image_file) ``` -###### bash -```bash -paddleclas --model_name='ResNet50' --image_file=np.ndarray -``` - - -* You can assign `image_file` as a folder path containing series of images, also can assign `top_k`. +* You can assign `image_file` as a folder path containing series of images. ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False,top_k=5) -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_name='ResNet50') +image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' --top_k=5 +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' ``` -* You can assign `--pre_label_image=True`, `--pre_label_out_idr= './output_pre_label/'`.Then images will be copied into folder named by top-1 class_id. +* You can assign `--pre_label_image=True`, `--pre_label_out_idr= './output_pre_label/'`. Then images will be copied into folder named by top-1 class_id. ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False,top_k=5, pre_label_image=True,pre_label_out_idr='./output_pre_label/') -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_name='ResNet50', pre_label_image=True, pre_label_out_idr='./output_pre_label/') +image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' --top_k=5 --pre_label_image=True --pre_label_out_idr='./output_pre_label/' +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' --pre_label_image=True --pre_label_out_idr='./output_pre_label/' ``` * You can assign `--label_name_path` as your own label_dict_file, format should be as(class_idclass_name<\n>). @@ -169,32 +167,32 @@ paddleclas --model_name='ResNet50' --image_file='image path' --top_k=5 --pre_lab ...... ``` -* If you use inference model that Paddle provides, you do not need assign `label_name_path`. Program will take `ppcls/utils/imagenet1k_label_list.txt` as defaults. If you hope using your own training model, you can provide `label_name_path` outputing 'label_name' and scores, otherwise no 'label_name' in output information. +* If you use inference model that PaddleClas provides, you do not need assign `label_name_path`. Program will take `ppcls/utils/imagenet1k_label_list.txt` as defaults. If you hope using your own training model, you can provide `label_name_path` outputing 'label_name' and scores, otherwise no 'label_name' in output information. ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_file= './inference.pdmodel',params_file = './inference.pdiparams',label_name_path='./ppcls/utils/imagenet1k_label_list.txt',use_gpu=False) -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_file= 'the path of model file', params_file = 'the path of params file', label_name_path='./ppcls/utils/imagenet1k_label_list.txt') +image_file = 'docs/images/whl/demo.jpg' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_file= './inference.pdmodel' --params_file = './inference.pdiparams' --image_file='image path' --label_name_path='./ppcls/utils/imagenet1k_label_list.txt' +paddleclas --model_file='the path of model file' --params_file='the path of params file' --image_file='docs/images/whl/demo.jpg' --label_name_path='./ppcls/utils/imagenet1k_label_list.txt' ``` ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False) -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_name='ResNet50') +image_file = 'docs/images/whl/' # it can be directory which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' ``` diff --git a/docs/zh_CN/tutorials/quick_start.md b/docs/zh_CN/tutorials/quick_start.md index 3415ba81..1bcd55bd 100644 --- a/docs/zh_CN/tutorials/quick_start.md +++ b/docs/zh_CN/tutorials/quick_start.md @@ -105,18 +105,14 @@ python3 tools/infer/infer.py \ -i docs/images/quick_start/flowers102/image_06739.jpg \ --model=ResNet50_vd \ --pretrained_model="output/ResNet50_vd/best_model/ppcls" \ - --class_num=102 + --class_num=102 \ + --top_k=5 ``` 最终可以得到如下结果,打印出了Top-5对应的class id以及score。 ``` -Current image file: docs/images/quick_start/flowers102/image_06739.jpg - top1, class id: 0, probability: 0.5129 - top2, class id: 50, probability: 0.0671 - top3, class id: 18, probability: 0.0377 - top4, class id: 82, probability: 0.0238 - top5, class id: 54, probability: 0.0231 +File:image_06739.jpg, Top-5 result: class id(s): [0, 96, 18, 50, 51], score(s): [0.79, 0.02, 0.01, 0.01, 0.01] ``` * 注意:这里每个模型的训练结果都不相同,因此结果可能稍有不同。 diff --git a/docs/zh_CN/whl.md b/docs/zh_CN/whl.md index d056d85e..076cbdd7 100644 --- a/docs/zh_CN/whl.md +++ b/docs/zh_CN/whl.md @@ -6,13 +6,13 @@ pip安装 ```bash -pip install paddleclas==2.0.0rc2 +pip install paddleclas==2.0.0rc3 ``` 本地构建并安装 ```bash python3 setup.py bdist_wheel -pip3 install dist/paddleclas-x.x.x-py3-none-any.whl # x.x.x是paddleclas的版本号 +pip3 install dist/paddleclas-x.x.x-py3-none-any.whl # x.x.x是paddleclas的版本号,默认为0.0.0 ``` ### 1. 快速开始 * 指定`image_file='docs/images/whl/demo.jpg'`,使用Paddle提供的inference model,`model_name='ResNet50'`, 使用图片`docs/images/whl/demo.jpg`。 @@ -25,7 +25,7 @@ pip3 install dist/paddleclas-x.x.x-py3-none-any.whl # x.x.x是paddleclas的版 ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False,use_tensorrt=False) +clas = PaddleClas(model_name='ResNet50', top_k=5) image_file='docs/images/whl/demo.jpg' result=clas.predict(image_file) print(result) @@ -33,35 +33,39 @@ print(result) ``` >>> result - [{'filename': '/Users/mac/Downloads/PaddleClas/docs/images/whl/demo.jpg', 'class_ids': [8], 'scores': [0.9796774], 'label_names': ['hen']}] + [{'class_ids': array([ 8, 7, 86, 82, 80]), 'scores': array([9.7967714e-01, 2.0280687e-02, 2.7053760e-05, 6.1860351e-06, + 2.6378802e-06], dtype=float32), 'label_names': ['hen', 'cock', 'partridge', 'ruffed grouse, partridge, Bonasa umbellus', 'black grouse'], 'filename': 'docs/images/whl/demo.jpg'}] ``` * 使用命令行式交互方法。直接获得结果。 ```bash -paddleclas --model_name='ResNet50' --image_file='docs/images/whl/demo.jpg' +paddleclas --model_name=ResNet50 --top_k=5 --image_file='docs/images/whl/demo.jpg' ``` ``` >>> result - **********/Users/mac/Downloads/PaddleClas/docs/images/whl/demo.jpg********** - [{'filename': '/Users/mac/Downloads/PaddleClas/docs/images/whl/demo.jpg', 'class_ids': [8], 'scores': [0.9796774], 'label_names': ['hen']}] + **********docs/images/whl/demo.jpg********** + filename: docs/images/whl/demo.jpg; class id: 8, 7, 86, 82, 80; scores: 0.9797, 0.0203, 0.0000, 0.0000, 0.0000; label: hen, cock, partridge, ruffed grouse, partridge, Bonasa umbellus, black grouse + Predict complete! ``` ### 2. 参数解释 +以下参数可在命令行交互使用时通过参数指定,或在Python代码中实例化PaddleClas对象时作为构造函数的参数使用。 * model_name(str): 模型名称,没有指定自定义的model_file和params_file时,可以指定该参数,使用PaddleClas提供的基于ImageNet1k的inference model,默认值为ResNet50。 -* image_file(str): 图像地址,支持指定单一图像的路径或图像的网址进行预测,支持指定包含图像的文件夹路径,支持经过预处理的np.ndarray形式输入。 +* image_file(str or numpy.ndarray): 图像地址,支持指定单一图像的路径或图像的网址进行预测,支持指定包含图像的文件夹路径,支持numpy.ndarray格式的三通道图像数据,且通道顺序为[B, G, R]。 * use_gpu(bool): 是否使用GPU,如果使用,指定为True。默认为False。 * use_tensorrt(bool): 是否开启TensorRT预测,可提升GPU预测性能,需要使用带TensorRT的预测库。当使用TensorRT推理加速,指定为True。默认为False。 +* is_preprocessed(bool): 当image_file为numpy.ndarray格式的图像数据时,图像数据是否已经过预处理。如果该参数为True,则不再对image_file数据进行预处理,否则将转换通道顺序后,按照resize_short,resize,normalize参数对图像进行预处理。默认值为False。 * resize_short(int): 将图像的高宽二者中小的值,调整到指定的resize_short值,大的值按比例放大。默认为256。 * resize(int): 将图像裁剪到指定的resize值大小,默认224。 * normalize(bool): 是否对图像数据归一化,默认True。 -* batch_size(int): 预测时每个batch的样本数,默认为1。 +* batch_size(int): 预测时每个batch的样本数量,默认为1。 * model_file(str): 模型.pdmodel的路径,若不指定该参数,需要指定model_name,获得下载的模型。 -* params_file(str): 模型参数.pdiparams的路径,若不与model_file指定,则需要指定model_name,以获得下载的模型。 +* params_file(str): 模型参数.pdiparams的路径,若不指定,则需要指定model_name,以获得下载的模型。 * ir_optim(bool): 是否开启IR优化,默认为True。 * gpu_mem(int): 使用的GPU显存大小,默认为8000。 * enable_profile(bool): 是否开启profile功能,默认False。 -* top_k(int): 指定的topk,预测的前k个类别和对应的分类概率,默认为1。 +* top_k(int): 指定的topk,打印(返回)预测结果的前k个类别和对应的分类概率,默认为1。 * enable_mkldnn(bool): 是否开启MKLDNN,默认False。 * cpu_num_threads(int): 指定cpu线程数,默认设置为10。 * label_name_path(str): 指定一个表示所有的label name的文件路径。当用户使用自己训练的模型,可指定这一参数,打印结果时可以显示图像对应的类名称。若用户使用Paddle提供的inference model,则可不指定该参数,使用imagenet1k的label_name,默认为空字符串。 @@ -84,84 +88,79 @@ paddleclas -h ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_file='user-specified model path', - params_file='parmas path', use_gpu=False, use_tensorrt=False) -image_file = '' # image_file 可指定为前缀是https的网络图片,也可指定为本地图片 +clas = PaddleClas(model_file='the path of model file', + params_file='the path of params file') +image_file = 'docs/images/whl/demo.jpg' # image_file 可指定为前缀是https的网络图片,也可指定为本地图片 result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_file='user-specified model path' --params_file='parmas path' --image_file='image path' +paddleclas --model_file='user-specified model path' --params_file='parmas path' --image_file='docs/images/whl/demo.jpg' ``` -* 用户使用PaddlePaddle训练好的inference model来预测,用户需要使用,初始化打印的模型的其中一个,并指定给`model_name`。 -用户可以不指定`model_file`,模型会自动下载到当前目录,并保存在以`model_name`命名的文件夹中,避免下载不同模型的覆盖问题。 +* 用户使用PaddlePaddle训练好的inference model来预测,并通过参数`model_name`指定。 +此时无需指定`model_file`,模型会根据`model_name`自动下载指定模型到当前目录,并保存在目录`~/.paddleclas/`下以`model_name`命名的文件夹中。 ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False) -image_file = '' # image_file 可指定为前缀是https的网络图片,也可指定为本地图片 +clas = PaddleClas(model_name='ResNet50') +image_file = 'docs/images/whl/demo.jpg' # image_file 可指定为前缀是https的网络图片,也可指定为本地图片 result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/demo.jpg' ``` -* 用户可以使用经过预处理的np.ndarray格式`--image_file=np.ndarray`。 +* 用户可以使用numpy.ndarray格式的图像数据,并通过参数`image_file`指定。注意该图像数据必须为三通道图像数据。如需对图像进行预处理,则图像通道顺序必须为[B, G, R]。 ###### python ```python +import cv2 from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False) -image_file =np.ndarray # image_file 可指定为前缀是https的网络图片,也可指定为本地图片 +clas = PaddleClas(model_name='ResNet50') +image_file = cv2.imread("docs/images/whl/demo.jpg") result=clas.predict(image_file) ``` -###### bash -```bash -paddleclas --model_name='ResNet50' --image_file=np.ndarray -``` - - -* 用户可以将`image_file`指定为包含图片的文件夹路径,可以指定`top_k`参数 +* 用户可以将`image_file`指定为包含图片的文件夹路径。 ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False,top_k=5) -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_name='ResNet50') +image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' --top_k=5 +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' ``` -* 用户可以指定`--pre_label_image=True`, `--pre_label_out_idr= './output_pre_label/'`,将图片复制到,以其top1对应的类别命名的文件夹中。 +* 用户可以指定`pre_label_image=True`, `pre_label_out_idr='./output_pre_label/'`,将图片按其top1预测结果保存到`pre_label_out_dir`目录下对应类别的文件夹中。 ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False, use_tensorrt=False,top_k=5, pre_label_image=True,pre_label_out_idr='./output_pre_label/') -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_name='ResNet50', pre_label_image=True,pre_label_out_idr='./output_pre_label/') +image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' --top_k=5 --pre_label_image=True --pre_label_out_idr='./output_pre_label/' +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' --pre_label_image=True --pre_label_out_idr='./output_pre_label/' ``` -* 用户可以指定`--label_name_path`,作为用户自己训练模型的`label_dict_file`,格式应为(class_idclass_name<\n>) +* 用户可以通过参数`label_name_path`指定模型的`label_dict_file`文件路径,文件内容格式应为(class_idclass_name<\n>),例如: ``` 0 tench, Tinca tinca @@ -177,27 +176,27 @@ paddleclas --model_name='ResNet50' --image_file='image path' --top_k=5 --pre_lab ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_file= './inference.pdmodel',params_file = './inference.pdiparams',label_name_path='./ppcls/utils/imagenet1k_label_list.txt',use_gpu=False) -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_file='the path of model file', params_file ='the path of params file', label_name_path='./ppcls/utils/imagenet1k_label_list.txt') +image_file = 'docs/images/whl/demo.jpg' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_file= './inference.pdmodel' --params_file = './inference.pdiparams' --image_file='image path' --label_name_path='./ppcls/utils/imagenet1k_label_list.txt' +paddleclas --model_file='the path of model file' --params_file='the path of params file' --image_file='docs/images/whl/demo.jpg' --label_name_path='./ppcls/utils/imagenet1k_label_list.txt' ``` ###### python ```python from paddleclas import PaddleClas -clas = PaddleClas(model_name='ResNet50',use_gpu=False) -image_file = '' # it can be image_file folder path which contains all of images you want to predict. +clas = PaddleClas(model_name='ResNet50') +image_file = 'docs/images/whl/' # it can be image_file folder path which contains all of images you want to predict. result=clas.predict(image_file) print(result) ``` ###### bash ```bash -paddleclas --model_name='ResNet50' --image_file='image path' +paddleclas --model_name='ResNet50' --image_file='docs/images/whl/' ``` diff --git a/paddleclas.py b/paddleclas.py index 1b496e7b..3cf57a50 100644 --- a/paddleclas.py +++ b/paddleclas.py @@ -30,14 +30,17 @@ import os import sys __dir__ = os.path.dirname(__file__) sys.path.append(os.path.join(__dir__, '')) +import argparse +import shutil import cv2 import numpy as np import tarfile import requests from tqdm import tqdm -import tools.infer.utils as utils -import shutil +from tools.infer.utils import get_image_list, preprocess, save_prelabel_results +from tools.infer.predict import Predictor + __all__ = ['PaddleClas'] BASE_DIR = os.path.expanduser("~/.paddleclas/") BASE_INFERENCE_MODEL_DIR = os.path.join(BASE_DIR, 'inference_model') @@ -98,7 +101,9 @@ def download_with_progressbar(url, save_path): file.write(data) progress_bar.close() if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes: - raise Exception("Something went wrong while downloading models") + raise Exception( + "Something went wrong while downloading model/image from {}". + format(url)) def maybe_download(model_storage_directory, url): @@ -130,20 +135,14 @@ def maybe_download(model_storage_directory, url): os.remove(tmp_path) -def save_prelabel_results(class_id, input_filepath, output_idr): - output_dir = os.path.join(output_idr, str(class_id)) - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - shutil.copy(input_filepath, output_dir) - - def load_label_name_dict(path): - result = {} if not os.path.exists(path): print( - 'Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!' + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" ) + return None else: + result = {} for line in open(path, 'r'): partition = line.split('\n')[0].partition(' ') try: @@ -155,8 +154,6 @@ def load_label_name_dict(path): def parse_args(mMain=True, add_help=True): - import argparse - def str2bool(v): return v.lower() in ("true", "t", "1") @@ -186,9 +183,7 @@ def parse_args(mMain=True, add_help=True): parser.add_argument("--enable_profile", type=str2bool, default=False) parser.add_argument("--top_k", type=int, default=1) parser.add_argument("--enable_mkldnn", type=str2bool, default=False) - parser.add_argument("--enable_benchmark", type=str2bool, default=False) parser.add_argument("--cpu_num_threads", type=int, default=10) - parser.add_argument("--hubserving", type=str2bool, default=False) # parameters for pre-label the images parser.add_argument("--label_name_path", type=str, default='') @@ -207,6 +202,7 @@ def parse_args(mMain=True, add_help=True): use_gpu=False, use_fp16=False, use_tensorrt=False, + is_preprocessed=False, resize_short=256, resize=224, normalize=True, @@ -218,9 +214,7 @@ def parse_args(mMain=True, add_help=True): enable_profile=False, top_k=1, enable_mkldnn=False, - enable_benchmark=False, cpu_num_threads=10, - hubserving=False, label_name_path='', pre_label_image=False, pre_label_out_idr=None) @@ -231,7 +225,6 @@ class PaddleClas(object): format(model_names), '\n') def __init__(self, **kwargs): - process_params = parse_args(mMain=False, add_help=False) process_params.__dict__.update(**kwargs) @@ -270,76 +263,96 @@ class PaddleClas(object): process_params.label_name_path) self.args = process_params - self.predictor = utils.create_paddle_predictor(process_params) - - def predict(self, img): + self.predictor = Predictor(process_params) + + def postprocess(self, output): + output = output.flatten() + classes = np.argpartition(output, -self.args.top_k)[-self.args.top_k:] + class_ids = classes[np.argsort(-output[classes])] + scores = output[class_ids] + label_names = [self.label_name_dict[c] + for c in class_ids] if self.label_name_dict else [] + return { + "class_ids": class_ids, + "scores": scores, + "label_names": label_names + } + + def predict(self, input_data): """ predict label of img with paddleclas Args: - img: input image for clas, support single image , internet url, folder path containing series of images + input_data(string, NumPy.ndarray): image to be classified, support: + string: local path of image file, internet URL, directory containing series of images; + NumPy.ndarray: preprocessed image data that has 3 channels and accords with [C, H, W], or raw image data that has 3 channels and accords with [H, W, C] Returns: - dict:{image_name: "", class_id: [], scores: [], label_names: []},if label name path == None,label_names will be empty. + dict: {image_name: "", class_id: [], scores: [], label_names: []},if label name path == None,label_names will be empty. """ - assert isinstance(img, (str, np.ndarray)) - - input_names = self.predictor.get_input_names() - input_tensor = self.predictor.get_input_handle(input_names[0]) - - output_names = self.predictor.get_output_names() - output_tensor = self.predictor.get_output_handle(output_names[0]) - if isinstance(img, str): + if isinstance(input_data, np.ndarray): + if not self.args.is_preprocessed: + input_data = input_data[:, :, ::-1] + input_data = preprocess(input_data, self.args) + input_data = np.expand_dims(input_data, axis=0) + batch_outputs = self.predictor.predict(input_data) + result = {"filename": "image"} + result.update(self.postprocess(batch_outputs[0])) + return result + elif isinstance(input_data, str): + input_path = input_data # download internet image - if img.startswith('http'): + if input_path.startswith('http'): if not os.path.exists(BASE_IMAGES_DIR): os.makedirs(BASE_IMAGES_DIR) - image_path = os.path.join(BASE_IMAGES_DIR, 'tmp.jpg') - download_with_progressbar(img, image_path) + file_path = os.path.join(BASE_IMAGES_DIR, 'tmp.jpg') + download_with_progressbar(input_path, file_path) print("Current using image from Internet:{}, renamed as: {}". - format(img, image_path)) - img = image_path - image_list = utils.get_image_list(img) + format(input_path, file_path)) + input_path = file_path + image_list = get_image_list(input_path) + + total_result = [] + batch_input_list = [] + img_path_list = [] + cnt = 0 + for idx, img_path in enumerate(image_list): + img = cv2.imread(img_path) + if img is None: + print( + "Warning: Image file failed to read and has been skipped. The path: {}". + format(img_path)) + continue + else: + img = img[:, :, ::-1] + data = preprocess(img, self.args) + batch_input_list.append(data) + img_path_list.append(img_path) + cnt += 1 + + if cnt % self.args.batch_size == 0 or (idx + 1 + ) == len(image_list): + batch_outputs = self.predictor.predict( + np.array(batch_input_list)) + for number, output in enumerate(batch_outputs): + result = {"filename": img_path_list[number]} + result.update(self.postprocess(output)) + + result_str = "top-{} result: {}".format( + self.args.top_k, result) + print(result_str) + + total_result.append(result) + if self.args.pre_label_image: + save_prelabel_results(result["class_ids"][0], + img_path_list[number], + self.args.pre_label_out_idr) + batch_input_list = [] + img_path_list = [] + return total_result else: - if isinstance(img, np.ndarray): - image_list = [img] - else: - print('Please input legal image!') - - total_result = [] - for filename in image_list: - if isinstance(filename, str): - image = cv2.imread(filename)[:, :, ::-1] - assert image is not None, "Error in loading image: {}".format( - filename) - inputs = utils.preprocess(image, self.args) - inputs = np.expand_dims( - inputs, axis=0).repeat( - 1, axis=0).copy() - else: - inputs = filename - - input_tensor.copy_from_cpu(inputs) - - self.predictor.run() - - outputs = output_tensor.copy_to_cpu() - classes, scores = utils.postprocess(outputs, self.args) - label_names = [] - if len(self.label_name_dict) != 0: - label_names = [self.label_name_dict[c] for c in classes] - result = { - "filename": filename if isinstance(filename, str) else 'image', - "class_ids": classes.tolist(), - "scores": scores.tolist(), - "label_names": label_names, - } - total_result.append(result) - if self.args.pre_label_image: - save_prelabel_results(classes[0], filename, - self.args.pre_label_out_idr) - print("\tSaving prelabel results in {}".format( - os.path.join(self.args.pre_label_out_idr, str(classes[ - 0])))) - return total_result + print( + "Error: Please input legal image! The type of image supported by PaddleClas are: NumPy.ndarray and string of local path or Ineternet URL" + ) + return [] def main(): @@ -347,9 +360,9 @@ def main(): args = parse_args(mMain=True) clas_engine = PaddleClas(**(args.__dict__)) print('{}{}{}'.format('*' * 10, args.image_file, '*' * 10)) - result = clas_engine.predict(args.image_file) - if result is not None: - print(result) + total_result = clas_engine.predict(args.image_file) + + print("Predict complete!") if __name__ == '__main__': diff --git a/tools/infer/infer.py b/tools/infer/infer.py index 67da0e31..b3391c09 100644 --- a/tools/infer/infer.py +++ b/tools/infer/infer.py @@ -14,7 +14,6 @@ import numpy as np import cv2 -import shutil import os import sys @@ -26,61 +25,60 @@ sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) from ppcls.utils.save_load import load_dygraph_pretrain +from ppcls.utils import logger from ppcls.modeling import architectures -import utils -from utils import get_image_list - - -def postprocess(outputs, topk=5): - output = outputs[0] - prob = np.array(output).flatten() - index = prob.argsort(axis=0)[-topk:][::-1].astype('int32') - return zip(index, prob[index]) - - -def save_prelabel_results(class_id, input_filepath, output_idr): - output_dir = os.path.join(output_idr, str(class_id)) - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - shutil.copy(input_filepath, output_dir) +from utils import parse_args, get_image_list, preprocess, postprocess, save_prelabel_results def main(): - args = utils.parse_args() + args = parse_args() # assign the place place = paddle.set_device('gpu' if args.use_gpu else 'cpu') net = architectures.__dict__[args.model](class_dim=args.class_num) load_dygraph_pretrain(net, args.pretrained_model, args.load_static_weights) image_list = get_image_list(args.image_file) - for idx, filename in enumerate(image_list): - img = cv2.imread(filename)[:, :, ::-1] - data = utils.preprocess(img, args) - data = np.expand_dims(data, axis=0) - data = paddle.to_tensor(data) - net.eval() - outputs = net(data) - if args.model == "GoogLeNet": - outputs = outputs[0] - outputs = F.softmax(outputs) - outputs = outputs.numpy() - probs = postprocess(outputs) - - top1_class_id = 0 - rank = 1 - print("Current image file: {}".format(filename)) - for idx, prob in probs: - print("\ttop{:d}, class id: {:d}, probability: {:.4f}".format( - rank, idx, prob)) - if rank == 1: - top1_class_id = idx - rank += 1 - - if args.pre_label_image: - save_prelabel_results(top1_class_id, filename, - args.pre_label_out_idr) - - return + batch_input_list = [] + img_path_list = [] + cnt = 0 + for idx, img_path in enumerate(image_list): + img = cv2.imread(img_path) + if img is None: + logger.warning( + "Image file failed to read and has been skipped. The path: {}". + format(img_path)) + continue + else: + img = img[:, :, ::-1] + data = preprocess(img, args) + batch_input_list.append(data) + img_path_list.append(img_path) + cnt += 1 + + if cnt % args.batch_size == 0 or (idx + 1) == len(image_list): + batch_tensor = paddle.to_tensor(batch_input_list) + net.eval() + batch_outputs = net(batch_tensor) + if args.model == "GoogLeNet": + batch_outputs = batch_outputs[0] + batch_outputs = F.softmax(batch_outputs) + batch_outputs = batch_outputs.numpy() + batch_result_list = postprocess(batch_outputs, args.top_k) + + for number, result_dict in enumerate(batch_result_list): + filename = img_path_list[number].split("/")[-1] + clas_ids = result_dict["clas_ids"] + scores_str = "[{}]".format(", ".join("{:.2f}".format( + r) for r in result_dict["scores"])) + print("File:{}, Top-{} result: class id(s): {}, score(s): {}". + format(filename, args.top_k, clas_ids, scores_str)) + + if args.pre_label_image: + save_prelabel_results(clas_ids[0], img_path_list[number], + args.pre_label_out_idr) + + batch_input_list = [] + img_path_list = [] if __name__ == "__main__": diff --git a/tools/infer/predict.py b/tools/infer/predict.py index fdcbb6a9..6c4b55ff 100644 --- a/tools/infer/predict.py +++ b/tools/infer/predict.py @@ -18,60 +18,76 @@ import time import sys sys.path.insert(0, ".") -import tools.infer.utils as utils -from tools.infer.utils import get_image_list - - -def predict(args, predictor): - input_names = predictor.get_input_names() - input_tensor = predictor.get_input_handle(input_names[0]) - - output_names = predictor.get_output_names() - output_tensor = predictor.get_output_handle(output_names[0]) - - test_num = 500 - test_time = 0.0 - if not args.enable_benchmark: - # for PaddleHubServing - if args.hubserving: - img_list = [args.image_file] - # for predict only - else: - img_list = get_image_list(args.image_file) - - for idx, img_name in enumerate(img_list): - if not args.hubserving: - img = cv2.imread(img_name)[:, :, ::-1] - assert img is not None, "Error in loading image: {}".format( - img_name) +from ppcls.utils import logger +from tools.infer.utils import parse_args, get_image_list, create_paddle_predictor, preprocess, postprocess + + +class Predictor(object): + def __init__(self, args): + # HALF precission predict only work when using tensorrt + if args.use_fp16 is True: + assert args.use_tensorrt is True + self.args = args + + self.paddle_predictor = create_paddle_predictor(args) + input_names = self.paddle_predictor.get_input_names() + self.input_tensor = self.paddle_predictor.get_input_handle(input_names[ + 0]) + + output_names = self.paddle_predictor.get_output_names() + self.output_tensor = self.paddle_predictor.get_output_handle( + output_names[0]) + + def predict(self, batch_input): + self.input_tensor.copy_from_cpu(batch_input) + self.paddle_predictor.run() + batch_output = self.output_tensor.copy_to_cpu() + return batch_output + + def normal_predict(self): + image_list = get_image_list(self.args.image_file) + batch_input_list = [] + img_name_list = [] + cnt = 0 + for idx, img_path in enumerate(image_list): + img = cv2.imread(img_path) + if img is None: + logger.warning( + "Image file failed to read and has been skipped. The path: {}". + format(img_path)) + continue else: - img = img_name - inputs = utils.preprocess(img, args) - inputs = np.expand_dims( - inputs, axis=0).repeat( - args.batch_size, axis=0).copy() - input_tensor.copy_from_cpu(inputs) - - predictor.run() - - output = output_tensor.copy_to_cpu() - classes, scores = utils.postprocess(output, args) - if args.hubserving: - return classes, scores - print("Current image file: {}".format(img_name)) - print("\ttop-1 class: {0}".format(classes[0])) - print("\ttop-1 score: {0}".format(scores[0])) - else: + img = img[:, :, ::-1] + img = preprocess(img, args) + batch_input_list.append(img) + img_name = img_path.split("/")[-1] + img_name_list.append(img_name) + cnt += 1 + + if cnt % args.batch_size == 0 or (idx + 1) == len(image_list): + batch_outputs = self.predict(np.array(batch_input_list)) + batch_result_list = postprocess(batch_outputs, self.args.top_k) + + for number, result_dict in enumerate(batch_result_list): + filename = img_name_list[number] + clas_ids = result_dict["clas_ids"] + scores_str = "[{}]".format(", ".join("{:.2f}".format( + r) for r in result_dict["scores"])) + print( + "File:{}, Top-{} result: class id(s): {}, score(s): {}". + format(filename, self.args.top_k, clas_ids, + scores_str)) + batch_input_list = [] + img_name_list = [] + + def benchmark_predict(self): + test_num = 500 + test_time = 0.0 for i in range(0, test_num + 10): inputs = np.random.rand(args.batch_size, 3, 224, 224).astype(np.float32) start_time = time.time() - input_tensor.copy_from_cpu(inputs) - - predictor.run() - - output = output_tensor.copy_to_cpu() - output = output.flatten() + batch_output = self.predict(inputs).flatten() if i >= 10: test_time += time.time() - start_time time.sleep(0.01) # sleep for T4 GPU @@ -83,19 +99,11 @@ def predict(args, predictor): / test_num)) -def main(args): +if __name__ == "__main__": + args = parse_args() + predictor = Predictor(args) if not args.enable_benchmark: - assert args.batch_size == 1 + predictor.normal_predict() else: assert args.model is not None - # HALF precission predict only work when using tensorrt - if args.use_fp16 is True: - assert args.use_tensorrt is True - - predictor = utils.create_paddle_predictor(args) - predict(args, predictor) - - -if __name__ == "__main__": - args = utils.parse_args() - main(args) + predictor.benchmark_predict() diff --git a/tools/infer/utils.py b/tools/infer/utils.py index 69fcd05a..639f599d 100644 --- a/tools/infer/utils.py +++ b/tools/infer/utils.py @@ -14,6 +14,8 @@ import os import argparse +import base64 +import shutil import cv2 import numpy as np @@ -68,6 +70,9 @@ def parse_args(): help="Whether to pre-label the images using the loaded weights") parser.add_argument("--pre_label_out_idr", type=str, default=None) + # parameters for test hubserving + parser.add_argument("--server_url", type=str) + return parser.parse_args() @@ -119,12 +124,18 @@ def preprocess(img, args): return img -def postprocess(output, args): - output = output.flatten() - classes = np.argpartition(output, -args.top_k)[-args.top_k:] - classes = classes[np.argsort(-output[classes])] - scores = output[classes] - return classes, scores +def postprocess(batch_outputs, topk=5): + batch_results = [] + for probs in batch_outputs: + results = [] + index = probs.argsort(axis=0)[-topk:][::-1].astype("int32") + clas_id_list = [] + score_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + batch_results.append({"clas_ids": clas_id_list, "scores": score_list}) + return batch_results def get_image_list(img_file): @@ -144,6 +155,13 @@ def get_image_list(img_file): return imgs_lists +def save_prelabel_results(class_id, input_file_path, output_dir): + output_dir = os.path.join(output_dir, str(class_id)) + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + shutil.copy(input_file_path, output_dir) + + class ResizeImage(object): def __init__(self, resize_short=None): self.resize_short = resize_short @@ -197,13 +215,15 @@ class ToTensor(object): return img -class Base64ToCV2(object): - def __init__(self): - pass +def b64_to_np(b64str, revert_params): + shape = revert_params["shape"] + dtype = revert_params["dtype"] + dtype = getattr(np, dtype) if isinstance(str, type(dtype)) else dtype + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, dtype).reshape(shape) + return data + - def __call__(self, b64str): - import base64 - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR)[:, :, ::-1] - return data +def np_to_b64(images): + img_str = base64.b64encode(images).decode('utf8') + return img_str, images.shape diff --git a/tools/test_hubserving.py b/tools/test_hubserving.py index 6a130eec..5c2820df 100644 --- a/tools/test_hubserving.py +++ b/tools/test_hubserving.py @@ -18,85 +18,95 @@ __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) +from tools.infer.utils import parse_args, get_image_list, preprocess, np_to_b64 from ppcls.utils import logger +import numpy as np import cv2 import time import requests import json import base64 -import imghdr -def get_image_file_list(img_file): - imgs_lists = [] - if img_file is None or not os.path.exists(img_file): - raise Exception("not found any img file in {}".format(img_file)) - - img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'} - if os.path.isfile(img_file) and imghdr.what(img_file) in img_end: - imgs_lists.append(img_file) - elif os.path.isdir(img_file): - for single_file in os.listdir(img_file): - file_path = os.path.join(img_file, single_file) - if imghdr.what(file_path) in img_end: - imgs_lists.append(file_path) - if len(imgs_lists) == 0: - raise Exception("not found any img file in {}".format(img_file)) - return imgs_lists - - -def cv2_to_base64(image): - return base64.b64encode(image).decode('utf8') - - -def main(url, image_path, top_k=1): - image_file_list = get_image_file_list(image_path) +def main(args): + image_path_list = get_image_list(args.image_file) headers = {"Content-type": "application/json"} + cnt = 0 - total_time = 0 - all_acc = 0.0 + predict_time = 0 + all_score = 0.0 + start_time = time.time() - for image_file in image_file_list: - file_str = image_file.split('/')[-1] - img = open(image_file, 'rb').read() + batch_input_list = [] + img_name_list = [] + cnt = 0 + for idx, img_path in enumerate(image_path_list): + img = cv2.imread(img_path) if img is None: - logger.error("Loading image:{} failed".format(image_file)) + logger.warning( + "Image file failed to read and has been skipped. The path: {}". + format(img_path)) continue - data = {'images': [cv2_to_base64(img)], 'top_k': top_k} - - try: - r = requests.post(url=url, headers=headers, data=json.dumps(data)) - r.raise_for_status() - except Exception as e: - logger.error("File:{}, {}".format(file_str, e)) - continue - if r.json()['status'] != '000': - logger.error( - "File:{}, The parameters returned by the server are: {}". - format(file_str, r.json()['msg'])) - continue - res = r.json()["results"][0] - classes, scores, elapse = res - all_acc += scores[0] - total_time += elapse - cnt += 1 - - scores = map(lambda x: round(x, 5), scores) - results = dict(zip(classes, scores)) - - message = "No.{}, File:{}, The top-{} result(s):{}, Time cost:{:.3f}".format( - cnt, file_str, top_k, results, elapse) - logger.info(message) - - logger.info("The average time cost: {}".format(float(total_time) / cnt)) - logger.info("The average top-1 score: {}".format(float(all_acc) / cnt)) + else: + img = img[:, :, ::-1] + data = preprocess(img, args) + batch_input_list.append(data) + img_name = img_path.split('/')[-1] + img_name_list.append(img_name) + cnt += 1 + if cnt % args.batch_size == 0 or (idx + 1) == len(image_path_list): + batch_input = np.array(batch_input_list) + b64str, revert_shape = np_to_b64(batch_input) + data = { + "images": b64str, + "revert_params": { + "shape": revert_shape, + "dtype": str(batch_input.dtype) + }, + "top_k": args.top_k + } + try: + r = requests.post( + url=args.server_url, + headers=headers, + data=json.dumps(data)) + r.raise_for_status + if r.json()["status"] != "000": + msg = r.json()["msg"] + raise Exception(msg) + except Exception as e: + logger.error("{}, in file(s): {} etc.".format(e, img_name_list[ + 0])) + continue + else: + results = r.json()["results"] + batch_result_list = results["prediction"] + elapse = results["elapse"] + + cnt += len(batch_result_list) + predict_time += elapse + + for number, result_list in enumerate(batch_result_list): + all_score += result_list[0]["score"] + result_str = ", ".join([ + "{}: {:.2f}".format(r["cls_id"], r["score"]) + for r in result_list + ]) + logger.info("File:{}, The top-{} result(s): {}".format( + img_name_list[number], args.top_k, result_str)) + + finally: + batch_input_list = [] + img_name_list = [] + + total_time = time.time() - start_time + logger.info("The average time of prediction cost: {:.3f} s/image".format( + predict_time / cnt)) + logger.info("The average time cost: {:.3f} s/image".format(total_time / + cnt)) + logger.info("The average top-1 score: {:.3f}".format(all_score / cnt)) if __name__ == '__main__': - if len(sys.argv) != 3 and len(sys.argv) != 4: - logger.info("Usage: %s server_url image_path" % sys.argv[0]) - else: - server_url = sys.argv[1] - image_path = sys.argv[2] - top_k = int(sys.argv[3]) if len(sys.argv) == 4 else 1 - main(server_url, image_path, top_k) + args = parse_args() + main(args) -- GitLab