diff --git a/demo/serving/module_serving/lexical_analysis_lac/lac_serving_demo.py b/demo/serving/module_serving/lexical_analysis_lac/lac_serving_demo.py index 05e40f8a63a68a8909da4a15018dc2fad353ee4d..96a585bfd12e0e2a650908fb25289cd6452923d9 100644 --- a/demo/serving/module_serving/lexical_analysis_lac/lac_serving_demo.py +++ b/demo/serving/module_serving/lexical_analysis_lac/lac_serving_demo.py @@ -7,7 +7,8 @@ if __name__ == "__main__": text = ["今天是个好日子", "天气预报说今天要下雨"] # 以key的方式指定text传入预测方法的时的参数,此例中为"data" # 对应本地部署,则为lac.analysis_lexical(data=text, batch_size=1) - data = {"texts": text, "batch_size": 1} + # 若使用lac版本低于2.2.0,需要将`text`参数改为`texts` + data = {"text": text, "batch_size": 1} # 指定预测方法为lac并发送post请求,content-type类型应指定json方式 url = "http://127.0.0.1:8866/predict/lac" # 指定post请求的headers为application/json方式 diff --git a/docs/Secondary_development/Secondary_index.rst b/docs/Secondary_development/Secondary_index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1be0aa1e66ee24969348663bfb9dfce0c334ccb1 --- /dev/null +++ b/docs/Secondary_development/Secondary_index.rst @@ -0,0 +1,13 @@ +二次开发 +================== + +本章节主要介绍如何通过修改源代码对已有的Task等内容进行自定义。 + +.. toctree:: + :maxdepth: 1 + + 自定义任务 + Hook机制 + + + diff --git a/docs/tutorial/hook.md b/docs/Secondary_development/hook.md similarity index 100% rename from docs/tutorial/hook.md rename to docs/Secondary_development/hook.md diff --git a/docs/tutorial/how_to_define_task.md b/docs/Secondary_development/how_to_define_task.md similarity index 100% rename from docs/tutorial/how_to_define_task.md rename to docs/Secondary_development/how_to_define_task.md diff --git a/docs/imgs/Howtofinetune1.png b/docs/imgs/Howtofinetune1.png new file mode 100644 index 0000000000000000000000000000000000000000..8194bfa22116bd22813cd28c52492df2fdc25eb9 Binary files /dev/null and b/docs/imgs/Howtofinetune1.png differ diff --git a/docs/imgs/humanseg_test.png b/docs/imgs/humanseg_test.png new file mode 100644 index 0000000000000000000000000000000000000000..8e909c46b7eb8e785a4cb53853a92b2439ed8f65 Binary files /dev/null and b/docs/imgs/humanseg_test.png differ diff --git a/docs/imgs/output_12_3.png b/docs/imgs/output_12_3.png new file mode 100644 index 0000000000000000000000000000000000000000..70d30edc56638bf418933de87fbe914d208b7d2f Binary files /dev/null and b/docs/imgs/output_12_3.png differ diff --git a/docs/imgs/output_15_3.png b/docs/imgs/output_15_3.png new file mode 100644 index 0000000000000000000000000000000000000000..891e29815cb669f50ee4c48a3039300e807b1abe Binary files /dev/null and b/docs/imgs/output_15_3.png differ diff --git a/docs/imgs/output_18_2.png b/docs/imgs/output_18_2.png new file mode 100644 index 0000000000000000000000000000000000000000..9eff25bb832afa0af56fe5242e17397e63690749 Binary files /dev/null and b/docs/imgs/output_18_2.png differ diff --git a/docs/imgs/output_4_1.png b/docs/imgs/output_4_1.png new file mode 100644 index 0000000000000000000000000000000000000000..24eef63ecab6fc074e37c16e4f09abd503f558c1 Binary files /dev/null and b/docs/imgs/output_4_1.png differ diff --git a/docs/imgs/output_8_3.png b/docs/imgs/output_8_3.png new file mode 100644 index 0000000000000000000000000000000000000000..24eef63ecab6fc074e37c16e4f09abd503f558c1 Binary files /dev/null and b/docs/imgs/output_8_3.png differ diff --git a/docs/index.rst b/docs/index.rst index de1436d247822062f08bae941025dd9e7ac3ae61..4b7df478136e195f1c56536bb2e25160d6f855f9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,10 +8,13 @@ PaddleHub 文档 :titlesonly: 概述 - 安装 - 快速体验 - 教程 + 安装 + 快速体验 + 迁移学习 + 部署预训练模型 + 二次开发 API + 命令行参考 FAQ - 社区贡献 + 社区贡献 更新历史 \ No newline at end of file diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 0000000000000000000000000000000000000000..5cb03855c157c92809ea34d55ab193162649ec0f --- /dev/null +++ b/docs/install.md @@ -0,0 +1,115 @@ +# PaddleHub安装 +## 环境准备 +PaddleHub需要与飞桨一起使用,其硬件和操作系统的适用范围与[飞桨](https://www.paddlepaddle.org.cn/install/quick)相同。 +> 注意:飞桨版本需要>= 1.7.0。 + + +```shell +# 查看是否安装飞桨 +$ python # 进入python解释器 +``` + +```python +import paddle.fluid +paddle.fluid.install_check.run_check() +``` + +> 如果出现`Your Paddle Fluid is installed successfully`,说明飞桨已成功安装。 + + +```shell +$ pip list | grep paddlepaddle # 查看飞桨版本。pip list查看所有的package版本,grep负责根据关键字筛选。 +``` + +## 安装操作 +使用 Python 包管理器pip安装PaddleHub。根据实际需要,在命令行下执行以下命令之一进行PaddleHub的安装(推荐使用第一个)。 + +> 1.安装过程中需要网络连接,请确保机器可以正常访问网络。成功安装之后,可以离线使用。 +2.如果已安装PaddleHub,再次执行安装操作将先卸载再安装。安装方式支持:安装指定版本和安装最新版本。 +3.由于国内网速的问题,直接pip安装包通常速度非常慢,而且经常会出现装到一半失败了的问题。使用国内镜像可以节省时间,提高pip安装的效率。 + ``` + 国内镜像源列表: + 清华大学:https://pypi.tuna.tsinghua.edu.cn/simple/ + 百度:https://mirror.baidu.com/pypi/simple + ``` + + +```shell +$ pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple # 安装最新版本,使用清华源 +``` + + +```shell +$ pip install paddlehub==1.6.1 -i https://pypi.tuna.tsinghua.edu.cn/simple # 安装指定版本(==1.6.1表示PaddleHub的版本),使用清华源 +``` + + +```shell +$ pip install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple # 安装最新版本,使用百度源 +``` + + +```shell +$ pip install paddlehub==1.6.1 -i https://mirror.baidu.com/pypi/simple # 安装指定版本(==1.6.1表示PaddleHub的版本),使用百度源 +``` + +> 等待片刻即安装完毕。如果出现`Successfully installed paddlehub`,说明PaddleHub安装成功。 + +## 验证安装 +检查PaddleHub是否安装成功。 + + +```shell +$ pip list | grep paddlehub # pip list查看所有的package版本,grep负责根据关键字筛选 +``` + + +```shell +$ pip show paddlehub # 查看PaddleHub详细信息 +``` + +PaddleHub详细信息的如下面所示,可以查看显示了PaddleHub的版本、位置等信息。 +``` +Name: paddlehub +Version: 1.7.1 +Summary: A toolkit for managing pretrained models of PaddlePaddle and helping user getting started with transfer learning more efficiently. +Home-page: https://github.com/PaddlePaddle/PaddleHub +Author: PaddlePaddle Author +Author-email: paddle-dev@baidu.com +License: Apache 2.0 +Location: /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages +Requires: pandas, pre-commit, gunicorn, flake8, visualdl, yapf, flask, protobuf, sentencepiece, six, cma, colorlog +Required-by: +``` + +## 如何卸载 +此卸载仅卸载PaddleHub,已下载的模型文件和数据集仍保留。 + + +```shell +$ pip uninstall paddlehub -y # 卸载PaddleHub +``` + +> 如果出现`Successfully uninstalled paddlehub`,表明PaddleHub卸载成功。 + +## pip常用命令 +pip是最为广泛使用的Python包管理器,可以帮助我们获得最新的Python包并进行管理。 +常用命令如下: +```shell +$ pip install [package-name] # 安装名为[package-name]的包 +$ pip install [package-name]==X.X # 安装名为[package-name]的包并指定版本X.X +$ pip install [package-name] --proxy=代理服务器IP:端口号 # 使用代理服务器安装 +$ pip install [package-name] --upgrade # 更新名为[package-name]的包 +$ pip uninstall [package-name] # 删除名为[package-name]的包 +$ pip list # 列出当前环境下已安装的所有包 +``` + +## 常见问题 +1. 已安装PaddleHub,可以升级飞桨版本吗? + 答复:可以。直接正常升级飞桨版本即可。 +2. 已安装PaddleHub,如何升级? + 答复:执行`pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple`,可以将PaddleHub升级到最新版本。 +3. `upgrade`安装与安装指定版本有什么区别? + 答复:`upgrade`安装的是最新版本,安装指定版本可以安装任意版本。 +4. 如何设置PaddleHub下载的缓存位置? + 答复:PaddleHub的Module默认会保存在用户目录下,可以通过修改环境变量`HUB_HOME`更改这个位置。 diff --git a/docs/output_4_1.png b/docs/output_4_1.png new file mode 100644 index 0000000000000000000000000000000000000000..8e909c46b7eb8e785a4cb53853a92b2439ed8f65 Binary files /dev/null and b/docs/output_4_1.png differ diff --git a/docs/quick_experience/cmd_quick_run.md b/docs/quick_experience/cmd_quick_run.md new file mode 100644 index 0000000000000000000000000000000000000000..4087040013c3c089011b2a3a92de5febeeccc522 --- /dev/null +++ b/docs/quick_experience/cmd_quick_run.md @@ -0,0 +1,182 @@ +# 通过命令行调用方式使用PaddleHub + +本页面的代码/命令可在[AIStudio](https://aistudio.baidu.com/aistudio/projectdetail/643120)上在线运行,类似notebook的环境,只需通过浏览器即可访问,无需准备环境,非常方便开发者快速体验。 + +PaddleHub在设计时,为模型的管理和使用提供了命令行工具,也提供了通过命令行调用PaddleHub模型完成预测的方式。比如,前面章节中人像分割和文本分词的任务也可以通过命令行调用的方式实现。 + +### 体验前请提前安装好PaddleHub + + +```shell +# 安装最新版本,使用清华源更稳定、更迅速 +$ pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +### 人像扣图 + + +```shell +# 下载待测试图片 +$ wget https://paddlehub.bj.bcebos.com/resources/test_image.jpg +# 通过命令行方式实现人像扣图任务 +$ hub run deeplabv3p_xception65_humanseg --input_path test_image.jpg --visualization=True --output_dir="humanseg_output" +``` + + --2020-07-22 12:19:52-- https://paddlehub.bj.bcebos.com/resources/test_image.jpg + Resolving paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)... 182.61.200.195, 182.61.200.229 + Connecting to paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)|182.61.200.195|:443... connected. + HTTP request sent, awaiting response... 200 OK + Length: 967120 (944K) [image/jpeg] + Saving to: ‘test_image.jpg’ + + test_image.jpg 100%[===================>] 944.45K 6.13MB/s in 0.2s + + 2020-07-22 12:19:53 (6.13 MB/s) - ‘test_image.jpg’ saved [967120/967120] + + [{'save_path': 'humanseg_output/test_image.png', 'data': array([[0., 0., 0., ..., 0., 0., 0.], + [0., 0., 0., ..., 0., 0., 0.], + [0., 0., 0., ..., 0., 0., 0.], + ..., + [0., 0., 0., ..., 0., 0., 0.], + [0., 0., 0., ..., 0., 0., 0.], + [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)}] + + + +![png](../imgs/humanseg_test_res.png) + + +### 中文分词 + + +```shell +#通过命令行方式实现文本分词任务 +$ hub run lac --input_text "今天是个好日子" +``` + + Install Module lac + Downloading lac + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpjcskpj8x/lac + [==================================================] 100.00% + Successfully installed lac-2.1.1 + [{'word': ['今天', '是', '个', '好日子'], 'tag': ['TIME', 'v', 'q', 'n']}] + + +上面的命令中包含四个部分,分别是: +- hub 表示PaddleHub的命令。 +- run 调用run执行模型的预测。 +- deeplabv3p_xception65_humanseg、lac 表示要调用的算法模型。 +- --input_path/--input_text 表示模型的输入数据,图像和文本的输入方式不同。 + +另外,命令行中`visualization=True`表示将结果可视化输出,`output_dir="humanseg_output"`表示预测结果的保存目录,可以到该路径下查看输出的图片。 + +再看一个文字识别和一个口罩检测的例子。 + +### OCR文字识别 + + +```shell +# 下载待测试的图片 +$ wget https://paddlehub.bj.bcebos.com/model/image/ocr/test_ocr.jpg + +# 该Module依赖于第三方库shapely和pyclipper,需提前安装 +$ pip install shapely +$ pip install pyclipper + +# 通过命令行方式实现文字识别任务 +$ hub run chinese_ocr_db_crnn_mobile --input_path test_ocr.jpg --visualization=True --output_dir='ocr_result' +``` + + --2020-07-22 15:00:50-- https://paddlehub.bj.bcebos.com/model/image/ocr/test_ocr.jpg + Resolving paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)... 182.61.200.195, 182.61.200.229 + Connecting to paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)|182.61.200.195|:443... connected. + HTTP request sent, awaiting response... 200 OK + Length: 48680 (48K) [image/jpeg] + Saving to: ‘test_ocr.jpg’ + + test_ocr.jpg 100%[===================>] 47.54K --.-KB/s in 0.02s + + 2020-07-22 15:00:51 (2.88 MB/s) - ‘test_ocr.jpg’ saved [48680/48680] + + Looking in indexes: https://pypi.mirrors.ustc.edu.cn/simple/ + Requirement already satisfied: shapely in /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages (1.7.0) + Looking in indexes: https://pypi.mirrors.ustc.edu.cn/simple/ + Requirement already satisfied: pyclipper in /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages (1.2.0) + [{'save_path': 'ocr_result/ndarray_1595401261.294494.jpg', 'data': [{'text': '纯臻营养护发素', 'confidence': 0.9438689351081848, 'text_box_position': [[24, 36], [304, 34], [304, 72], [24, 74]]}, {'text': '产品信息/参数', 'confidence': 0.9843138456344604, 'text_box_position': [[24, 80], [172, 80], [172, 104], [24, 104]]}, {'text': '(45元/每公斤,100公斤起订)', 'confidence': 0.9210420250892639, 'text_box_position': [[24, 109], [333, 109], [333, 136], [24, 136]]}, {'text': '每瓶22元,1000瓶起订)', 'confidence': 0.9685984253883362, 'text_box_position': [[22, 139], [283, 139], [283, 166], [22, 166]]}, {'text': '【品牌】', 'confidence': 0.9527574181556702, 'text_box_position': [[22, 174], [85, 174], [85, 198], [22, 198]]}, {'text': ':代加工方式/OEMODM', 'confidence': 0.9442129135131836, 'text_box_position': [[90, 176], [301, 176], [301, 196], [90, 196]]}, {'text': '【品名】', 'confidence': 0.8793742060661316, 'text_box_position': [[23, 205], [85, 205], [85, 229], [23, 229]]}, {'text': ':纯臻营养护发素', 'confidence': 0.9230973124504089, 'text_box_position': [[95, 204], [235, 206], [235, 229], [95, 227]]}, {'text': '【产品编号】', 'confidence': 0.9311650395393372, 'text_box_position': [[24, 238], [120, 238], [120, 260], [24, 260]]}, {'text': 'J:YM-X-3011', 'confidence': 0.8866629004478455, 'text_box_position': [[110, 239], [239, 239], [239, 256], [110, 256]]}, {'text': 'ODMOEM', 'confidence': 0.9916308522224426, 'text_box_position': [[414, 233], [430, 233], [430, 304], [414, 304]]}, {'text': '【净含量】:220ml', 'confidence': 0.8709315657615662, 'text_box_position': [[23, 268], [181, 268], [181, 292], [23, 292]]}, {'text': '【适用人群】', 'confidence': 0.9589888453483582, 'text_box_position': [[24, 301], [118, 301], [118, 321], [24, 321]]}, {'text': ':适合所有肤质', 'confidence': 0.935418963432312, 'text_box_position': [[131, 300], [254, 300], [254, 323], [131, 323]]}, {'text': '【主要成分】', 'confidence': 0.9366627335548401, 'text_box_position': [[24, 332], [117, 332], [117, 353], [24, 353]]}, {'text': '鲸蜡硬脂醇', 'confidence': 0.9033458828926086, 'text_box_position': [[138, 331], [235, 331], [235, 351], [138, 351]]}, {'text': '燕麦B-葡聚', 'confidence': 0.8497812747955322, 'text_box_position': [[248, 332], [345, 332], [345, 352], [248, 352]]}, {'text': '椰油酰胺丙基甜菜碱、', 'confidence': 0.8935506939888, 'text_box_position': [[54, 363], [232, 363], [232, 383], [54, 383]]}, {'text': '糖、', 'confidence': 0.8750994205474854, 'text_box_position': [[25, 364], [62, 364], [62, 383], [25, 383]]}, {'text': '泛酯', 'confidence': 0.5581164956092834, 'text_box_position': [[244, 363], [281, 363], [281, 382], [244, 382]]}, {'text': '(成品包材)', 'confidence': 0.9566792845726013, 'text_box_position': [[368, 367], [475, 367], [475, 388], [368, 388]]}, {'text': '【主要功能】', 'confidence': 0.9493741393089294, 'text_box_position': [[24, 395], [119, 395], [119, 416], [24, 416]]}, {'text': ':可紧致头发磷层', 'confidence': 0.9692543745040894, 'text_box_position': [[128, 397], [273, 397], [273, 414], [128, 414]]}, {'text': '美,从而达到', 'confidence': 0.8662520051002502, 'text_box_position': [[265, 395], [361, 395], [361, 415], [265, 415]]}, {'text': '即时持久改善头发光泽的效果,给干燥的头', 'confidence': 0.9690631031990051, 'text_box_position': [[25, 425], [372, 425], [372, 448], [25, 448]]}, {'text': '发足够的滋养', 'confidence': 0.8946213126182556, 'text_box_position': [[26, 457], [136, 457], [136, 477], [26, 477]]}]}] + + + +```shell +# 查看预测结果 +``` + + +![png](../imgs/ocr_res.jpg) + + +### 口罩检测 + + +```shell +# 下载待测试的图片 +$ wget https://paddlehub.bj.bcebos.com/resources/test_mask_detection.jpg + +# 通过命令行方式实现文字识别任务 +$ hub run pyramidbox_lite_mobile_mask --input_path test_mask_detection.jpg --visualization=True --output_dir='detection_result' +``` + + --2020-07-22 15:08:11-- https://paddlehub.bj.bcebos.com/resources/test_mask_detection.jpg + Resolving paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)... 182.61.200.229, 182.61.200.195 + Connecting to paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)|182.61.200.229|:443... connected. + HTTP request sent, awaiting response... 200 OK + Length: 299133 (292K) [image/jpeg] + Saving to: ‘test_mask_detection.jpg’ + + test_mask_detection 100%[===================>] 292.12K --.-KB/s in 0.06s + + 2020-07-22 15:08:11 (4.55 MB/s) - ‘test_mask_detection.jpg’ saved [299133/299133] + + Install Module pyramidbox_lite_mobile_mask + Downloading pyramidbox_lite_mobile_mask + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmp8oes9jid/pyramidbox_lite_mobile_mask + [==================================================] 100.00% + Successfully installed pyramidbox_lite_mobile_mask-1.3.0 + Downloading pyramidbox_lite_mobile + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpvhjhlr10/pyramidbox_lite_mobile + [==================================================] 100.00% + [{'data': [{'label': 'MASK', 'confidence': 0.9992434978485107, 'top': 181, 'bottom': 440, 'left': 457, 'right': 654}, {'label': 'MASK', 'confidence': 0.9224318265914917, 'top': 340, 'bottom': 578, 'left': 945, 'right': 1125}, {'label': 'NO MASK', 'confidence': 0.9996706247329712, 'top': 292, 'bottom': 500, 'left': 1166, 'right': 1323}], 'path': 'test_mask_detection.jpg'}] + + + +```shell +# 查看预测结果 +``` + + +![png](../imgs/test_mask_detection_result.jpg) + + +### PaddleHub命令行工具简介 + +PaddleHub的命令行工具在开发时借鉴了Anaconda和PIP等软件包管理的理念,可以方便快捷的完成模型的搜索、下载、安装、升级、预测等功能。 下面概要介绍一下PaddleHub支持的12个命令,详细介绍可查看[命令行参考](../tutorial/cmdintro.md)章节。: +* install:用于将Module安装到本地,默认安装在{HUB_HOME}/.paddlehub/modules目录下; +* uninstall:卸载本地Module; +* show:用于查看本地已安装Module的属性或者指定目录下确定的Module的属性,包括其名字、版本、描述、作者等信息; +* download:用于下载百度飞桨PaddleHub提供的Module; +* search:通过关键字在服务端检索匹配的Module,当想要查找某个特定模型的Module时,使用search命令可以快速得到结果,例如hub search ssd命令,会查找所有包含了ssd字样的Module,命令支持正则表达式,例如hub search ^s.\*搜索所有以s开头的资源; +* list:列出本地已经安装的Module; +* run:用于执行Module的预测; +* version:显示PaddleHub版本信息; +* help:显示帮助信息; +* clear:PaddleHub在使用过程中会产生一些缓存数据,这部分数据默认存放在${HUB_HOME}/.paddlehub/cache目录下,用户可以通过clear命令来清空缓存; +* autofinetune:用于自动调整Fine-tune任务的超参数,具体使用详情参考[PaddleHub AutoDL Finetuner](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.5/docs/tutorial/autofinetune.md)使用教程; +* config:用于查看和设置Paddlehub相关设置,包括对server地址、日志级别的设置; +* serving:用于一键部署Module预测服务,详细用法见[PaddleHub Serving一键服务部署](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.5/docs/tutorial/serving.md)。 + +## 小结 +PaddleHub的产品理念是模型即软件,通过Python API或命令行实现模型调用,可快速体验或集成飞桨特色预训练模型。 +此外,当用户想用少量数据来优化预训练模型时,PaddleHub也支持迁移学习,通过Fine-tune API,内置多种优化策略,只需少量代码即可完成预训练模型的Fine-tuning。具体可通过后面迁移学习的章节了解。 +>值得注意的是,不是所有的Module都支持通过命令行预测 (例如BERT/ERNIE Transformer类模型,一般需要搭配任务进行Fine-tune), 也不是所有的Module都可用于Fine-tune(例如一般不建议用户使用词法分析LAC模型Fine-tune)。建议提前阅读[预训练模型的介绍文档](https://www.paddlepaddle.org.cn/hublist)了解使用场景。 diff --git a/docs/quick_experience/more_demos.md b/docs/quick_experience/more_demos.md new file mode 100644 index 0000000000000000000000000000000000000000..967aae2a7d4ec17bef31c4582ebbf34e6b76b33e --- /dev/null +++ b/docs/quick_experience/more_demos.md @@ -0,0 +1,41 @@ +# PaddleHub更多体验Demos + +## PaddleHub官方Demo全集 + +官方Demo在AI Studio中均可在线运行,类似notebook的环境,只需通过浏览器即可访问,无需准备环境,非常方便开发者快速体验。 +并将持续更新,建议收藏。 +[https://aistudio.baidu.com/aistudio/personalcenter/thirdview/79927](https://aistudio.baidu.com/aistudio/personalcenter/thirdview/79927) + +## PaddleHub开发者趣味实践作品 + +以下为前期PaddleHub课程或活动中,开发者们基于PaddleHub创作的趣味实践作品,均收录在AI Studio中,可在线运行,欢迎访问,希望对您有所启发。 +1. [布剪刀石头【人脸识别切换本地窗口】](https://aistudio.baidu.com/aistudio/projectdetail/507630) +1. [秋水中的鱼【yesok舞蹈背景抠图转换并动漫风格迁移】](http://aistudio.baidu.com/aistudio/projectdetail/517066) +1. [Ninetailskim【在人脸上玩复古windows弹球】](https://aistudio.baidu.com/aistudio/projectdetail/518861) +1. [乌拉__【监控口罩,语音提醒,后台记录】](https://aistudio.baidu.com/aistudio/projectdetail/506931) +1. [九品炼丹师【影流之绿蛙蔡徐坤,人脸识别加头饰+ 人像分割变分身】](https://aistudio.baidu.com/aistudio/projectdetail/505168) +1. [七年期限【风格迁移以及本地部署】](https://aistudio.baidu.com/aistudio/projectdetail/520453) +1. [Fanas无敌【口红试色项目】](https://aistudio.baidu.com/aistudio/projectdetail/516520) +1. [skywalk163【用paddlehub统计飞桨源代码词频以及词云与人像展示】](https://aistudio.baidu.com/aistudio/projectdetail/519841) +1. [AIStudio261428【人脸识别+漫画表情包】](https://aistudio.baidu.com/aistudio/projectdetail/519616) +1. [土豆芽【六一儿童节邀请卡通人物来做客】](https://aistudio.baidu.com/aistudio/projectdetail/520925) +1. [大熊猫的感觉【变化的口罩】](https://aistudio.baidu.com/aistudio/projectdetail/520996) +1. [kly1997【一键旅游+戴墨镜】](https://aistudio.baidu.com/aistudio/projectdetail/518117) +1. [寞寞_默默【穿越到油画中】](https://aistudio.baidu.com/aistudio/projectdetail/516332) +1. [isse7【创意项目:风格“鬼脸”变换】](https://aistudio.baidu.com/aistudio/projectdetail/515307) +1. [Pda【人脸趣味变】](https://aistudio.baidu.com/aistudio/projectdetail/516306) +1. [Kgkzhiwen【我的新衣】](https://aistudio.baidu.com/aistudio/projectdetail/516663) +1. [哎呀呀好好学习【脸型自动调整】](https://aistudio.baidu.com/aistudio/projectdetail/513640) +1. [Tfboy【证件照换底】](https://aistudio.baidu.com/aistudio/projectdetail/509443) +1. [Leigangblog【我是明星脸】](https://aistudio.baidu.com/aistudio/projectdetail/505537) +1. [wpb3dm【时装模特换装】](https://aistudio.baidu.com/aistudio/projectdetail/519349) +1. [lsvine_bai【女友秒变神秘金发女神】](https://aistudio.baidu.com/aistudio/projectdetail/521784) +1. [Lemonadeqk【简单追星】](https://aistudio.baidu.com/aistudio/projectdetail/520488) +1. [XM1436gr【利用PaddleHub关键点检测实现AI换卡通脸】](https://aistudio.baidu.com/aistudio/projectdetail/514547) +1. [旺仔【人人都是圆眼萌仔】](https://aistudio.baidu.com/aistudio/projectdetail/519222) +1. [Arrowarcher【AI一键换发】](https://aistudio.baidu.com/aistudio/projectdetail/508270) +1. [WHY197598【移物换景基础】](https://aistudio.baidu.com/aistudio/projectdetail/517961) +1. [署名景逸【基于paddlehub人脸关键点检测的疲劳检测】](https://aistudio.baidu.com/aistudio/projectdetail/506024) +1. [thunder95【PaddleHub目光表情投票】](https://aistudio.baidu.com/aistudio/projectdetail/514205) +1. [上弦月C 【坟头蹦迪毕业照】](https://aistudio.baidu.com/aistudio/projectdetail/511253) +1. [如意_鸡蛋【左看像周润发,右看像刘德华】](https://aistudio.baidu.com/aistudio/projectdetail/507231) diff --git a/docs/quick_experience/python_use_hub.md b/docs/quick_experience/python_use_hub.md new file mode 100644 index 0000000000000000000000000000000000000000..1101e3f96297329ef2a6e15b7548a504c9a9b9cf --- /dev/null +++ b/docs/quick_experience/python_use_hub.md @@ -0,0 +1,297 @@ +# 通过Python代码调用方式使用PaddleHub + +本页面的代码/命令可在[AIStudio](https://aistudio.baidu.com/aistudio/projectdetail/635335)上在线运行,类似notebook的环境,只需通过浏览器即可访问,无需准备环境,非常方便开发者快速体验。 + +## 计算机视觉任务的PaddleHub示例 +先以计算机视觉任务为例,我们选用一张测试图片test.jpg,分别实现如下四项功能: +* 人像扣图([deeplabv3p_xception65_humanseg](https://www.paddlepaddle.org.cn/hubdetail?name=deeplabv3p_xception65_humanseg&en_category=ImageSegmentation)) +* 人体部位分割([ace2p](https://www.paddlepaddle.org.cn/hubdetail?name=ace2p&en_category=ImageSegmentation)) + +* 人脸检测([ultra_light_fast_generic_face_detector_1mb_640](https://www.paddlepaddle.org.cn/hubdetail?name=ultra_light_fast_generic_face_detector_1mb_640&en_category=FaceDetection)) +* 关键点检测([human_pose_estimation_resnet50_mpii](https://www.paddlepaddle.org.cn/hubdetail?name=human_pose_estimation_resnet50_mpii&en_category=KeyPointDetection)) + +>注:如果需要查找PaddleHub中可以调用哪些预训练模型,获取模型名称(如deeplabv3p_xception65_humanseg,后续代码中通过该名称调用模型),请参考[官网文档](https://www.paddlepaddle.org.cn/hublist),文档中已按照模型类别分好类,方便查找,并且提供了详细的模型介绍。 + + +### 体验前请提前安装好PaddleHub + + +```shell +# 安装最新版本,使用清华源更稳定、更迅速 +$ pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +### 原图展示 + + +```shell +# 下载待测试图片 +$ wget https://paddlehub.bj.bcebos.com/resources/test_image.jpg +``` + + --2020-07-22 12:22:19-- https://paddlehub.bj.bcebos.com/resources/test_image.jpg + Resolving paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)... 182.61.200.195, 182.61.200.229 + Connecting to paddlehub.bj.bcebos.com (paddlehub.bj.bcebos.com)|182.61.200.195|:443... connected. + HTTP request sent, awaiting response... 200 OK + Length: 967120 (944K) [image/jpeg] + Saving to: ‘test_image.jpg.1’ + + test_image.jpg.1 100%[===================>] 944.45K 5.51MB/s in 0.2s + + 2020-07-22 12:22:19 (5.51 MB/s) - ‘test_image.jpg.1’ saved [967120/967120] + + + + +![png](../imgs/humanseg_test.png) + + +### 人像扣图 + +PaddleHub采用模型即软件的设计理念,所有的预训练模型与Python软件包类似,具备版本的概念,通过`hub install`、`hub uninstall`命令可以便捷地完成模型的安装、升级和卸载。 +> 使用如下命令默认下载最新版本的模型,如果需要指定版本,可在后面接版本号,如`==1.1.1`。 + + +```shell +#安装预训练模型,deeplabv3p_xception65_humanseg是模型名称 +$ hub install deeplabv3p_xception65_humanseg +``` + + Downloading deeplabv3p_xception65_humanseg + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpo32jeve0/deeplabv3p_xception65_humanseg + [==================================================] 100.00% + Successfully installed deeplabv3p_xception65_humanseg-1.1.1 + + + +```python +# 导入paddlehub库 +import paddlehub as hub +# 指定模型名称、待预测的图片路径、输出结果的路径,执行并输出预测结果 +module = hub.Module(name="deeplabv3p_xception65_humanseg") +res = module.segmentation(paths = ["./test_image.jpg"], visualization=True, output_dir='humanseg_output') +``` + + [32m[2020-07-22 12:22:49,474] [ INFO] - Installing deeplabv3p_xception65_humanseg module [0m + + + Downloading deeplabv3p_xception65_humanseg + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpzrrl1duq/deeplabv3p_xception65_humanseg + [==================================================] 100.00% + + + [32m[2020-07-22 12:23:11,811] [ INFO] - Successfully installed deeplabv3p_xception65_humanseg-1.1.1 [0m + + + +![png](../imgs/output_8_3.png) + + +可以看到,使用Python代码调用PaddleHub只需要三行代码即可实现: +``` +import paddlehub as hub # 导入PaddleHub代码库 +module = hub.Module(name="deeplabv3p_xception65_humanseg") # 指定模型名称 +res = module.segmentation(paths = ["./test.jpg"], visualization=True, output_dir='humanseg_output') # 指定模型的输入和输出路径,执行并输出预测结果,其中visualization=True表示将结果可视化输出 +``` +* 模型名称均通过`hub.Module` API来指定; +* `module.segmentation`用于执行图像分割类的预测任务,不同类型任务设计了不同的预测API,比如人脸检测任务采用`face_detection`函数执行预测,建议调用预训练模型之前先仔细查阅对应的模型介绍文档。 +* 预测结果保存在`output_dir='humanseg_output'`目录下,可以到该路径下查看输出的图片。 + +其他任务的实现方式,均可参考这个“套路”。看一下接下来几个任务如何实现。 + +### 人体部位分割 + + +```shell +#安装预训练模型 +$ hub install ace2p +``` + + /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py:47: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses + import imp + Downloading ace2p + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpfsovt3f8/ace2p + [==================================================] 100.00% + Successfully installed ace2p-1.1.0 + + + +```python +# 导入paddlehub库 +import paddlehub as hub +# 指定模型名称、待预测的图片路径、输出结果的路径,执行并输出预测结果 +module = hub.Module(name="ace2p") +res = module.segmentation(paths = ["./test_image.jpg"], visualization=True, output_dir='ace2p_output') +``` + + [32m[2020-07-22 12:23:58,027] [ INFO] - Installing ace2p module [0m + + + Downloading ace2p + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmptrogpj6j/ace2p + [==================================================] 100.00% + + + [32m[2020-07-22 12:24:22,575] [ INFO] - Successfully installed ace2p-1.1.0 [0m + + + +![png](../imgs/output_12_3.png) + + +### 人脸检测 + + +```shell +#安装预训练模型 +$ hub install ultra_light_fast_generic_face_detector_1mb_640 +``` + + Downloading ultra_light_fast_generic_face_detector_1mb_640 + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpz82xnmy6/ultra_light_fast_generic_face_detector_1mb_640 + [==================================================] 100.00% + Successfully installed ultra_light_fast_generic_face_detector_1mb_640-1.1.2 + + + +```python +# 导入paddlehub库 +import paddlehub as hub +# 指定模型名称、待预测的图片路径、输出结果的路径,执行并输出预测结果 +module = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_640") +res = module.face_detection(paths = ["./test_image.jpg"], visualization=True, output_dir='face_detection_output') +``` + + [32m[2020-07-22 12:25:12,948] [ INFO] - Installing ultra_light_fast_generic_face_detector_1mb_640 module [0m + + + Downloading ultra_light_fast_generic_face_detector_1mb_640 + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpw44mo56p/ultra_light_fast_generic_face_detector_1mb_640 + [==================================================] 100.00% + + + [32m[2020-07-22 12:25:14,698] [ INFO] - Successfully installed ultra_light_fast_generic_face_detector_1mb_640-1.1.2 + + + +![png](../imgs/output_15_3.png) + + +### 关键点检测 + + +```shell +#安装预训练模型 +$ hub install human_pose_estimation_resnet50_mpii +``` + + /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/cloudpickle/cloudpickle.py:47: DeprecationWarning: the imp module is deprecated in favour of importlib; see the module's documentation for alternative uses + import imp + Downloading human_pose_estimation_resnet50_mpii + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmpn_ppwkzq/human_pose_estimation_resnet50_mpii + [======== ] 17.99% + + +```python +# 导入paddlehub库 +import paddlehub as hub +# 指定模型名称、待预测的图片路径、输出结果的路径,执行并输出预测结果 +module = hub.Module(name="human_pose_estimation_resnet50_mpii") +res = module.keypoint_detection(paths = ["./test_image.jpg"], visualization=True, output_dir='keypoint_output') +``` + + [32m[2020-07-23 11:27:33,989] [ INFO] - Installing human_pose_estimation_resnet50_mpii module [0m + [32m[2020-07-23 11:27:33,992] [ INFO] - Module human_pose_estimation_resnet50_mpii already installed in /home/aistudio/.paddlehub/modules/human_pose_estimation_resnet50_mpii [0m + + + image saved in keypoint_output/test_imagetime=1595474855.jpg + + + +![png](../imgs/output_18_2.png) + + +## 自然语言处理任务的PaddleHub示例 + +再看两个自然语言处理任务的示例,下面以中文分词和情感分类的任务为例介绍。 +* 中文分词([lac](https://www.paddlepaddle.org.cn/hubdetail?name=lac&en_category=LexicalAnalysis)) +* 情感分析([senta_bilstm](https://www.paddlepaddle.org.cn/hubdetail?name=senta_bilstm&en_category=SentimentAnalysis)) + +### 中文分词 + + +```shell +#安装预训练模型 +$ hub install lac +``` + + 2020-07-22 10:03:09,866-INFO: font search path ['/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf', '/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/afm', '/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/pdfcorefonts'] + 2020-07-22 10:03:10,208-INFO: generated new fontManager + Downloading lac + [==================================================] 100.00% + Uncompress /home/aistudio/.paddlehub/tmp/tmp8ukaz690/lac + [==================================================] 100.00% + Successfully installed lac-2.1.1 + + + +```python +# 导入paddlehub库 +import paddlehub as hub +# 指定模型名称、待分词的文本,执行并输出预测结果 +lac = hub.Module(name="lac") +test_text = ["1996年,曾经是微软员工的加布·纽维尔和麦克·哈灵顿一同创建了Valve软件公司。他们在1996年下半年从id software取得了雷神之锤引擎的使用许可,用来开发半条命系列。"] +res = lac.lexical_analysis(texts = test_text) +# 打印预测结果 +print("中文词法分析结果:", res) +``` + + [32m[2020-07-22 10:03:18,439] [ INFO] - Installing lac module[0m + [32m[2020-07-22 10:03:18,531] [ INFO] - Module lac already installed in /home/aistudio/.paddlehub/modules/lac [0m + + + 中文词法分析结果: [{'word': ['1996年', ',', '曾经', '是', '微软', '员工', '的', '加布·纽维尔', '和', '麦克·哈灵顿', '一同', '创建', '了', 'Valve软件公司', '。', '他们', '在', '1996年下半年', '从', 'id', ' ', 'software', '取得', '了', '雷神之锤', '引擎', '的', '使用', '许可', ',', '用来', '开发', '半条命', '系列', '。'], 'tag': ['TIME', 'w', 'd', 'v', 'ORG', 'n', 'u', 'PER', 'c', 'PER', 'd', 'v', 'u', 'ORG', 'w', 'r', 'p', 'TIME', 'p', 'nz', 'w', 'n', 'v', 'u', 'n', 'n', 'u', 'vn', 'vn', 'w', 'v', 'v', 'n', 'n', 'w']}] + + +可以看到,与计算机视觉任务相比,输入和输出接口(这里需要输入文本,以函数参数的形式传入)存在差异,这与任务类型相关,具体可查看对应预训练模型的API介绍。 + +### 情感分类 + + +```shell +#安装预训练模型 +$ hub install senta_bilstm +``` + + Module senta_bilstm-1.1.0 already installed in /home/aistudio/.paddlehub/modules/senta_bilstm + + + +```python +import paddlehub as hub +senta = hub.Module(name="senta_bilstm") +test_text = ["味道不错,确实不算太辣,适合不能吃辣的人。就在长江边上,抬头就能看到长江的风景。鸭肠、黄鳝都比较新鲜。"] +res = senta.sentiment_classify(texts = test_text) + +print("情感分析结果:", res) +``` + + [32m[2020-07-22 10:34:06,922] [ INFO] - Installing senta_bilstm module [0m + [32m[2020-07-22 10:34:06,984] [ INFO] - Module senta_bilstm already installed in /home/aistudio/.paddlehub/modules/senta_bilstm + [32m[2020-07-22 10:34:08,937] [ INFO] - Installing lac module[0m + [32m[2020-07-22 10:34:08,939] [ INFO] - Module lac already installed in /home/aistudio/.paddlehub/modules/lac [0m + + + 情感分析结果: [{'text': '味道不错,确实不算太辣,适合不能吃辣的人。就在长江边上,抬头就能看到长江的风景。鸭肠、黄鳝都比较新鲜。', 'sentiment_label': 1, 'sentiment_key': 'positive', 'positive_probs': 0.9771, 'negative_probs': 0.0229}] + + +## 总结 +PaddleHub提供了丰富的预训练模型,包括图像分类、语义模型、视频分类、图像生成、图像分割、文本审核、关键点检测等主流模型,只需要3行Python代码即可快速调用,即时输出预测结果,非常方便。您可以尝试一下,从[预训练模型列表](https://www.paddlepaddle.org.cn/hublist)中选择一些模型体验一下。 + diff --git a/docs/quick_experience/quick_index.rst b/docs/quick_experience/quick_index.rst new file mode 100644 index 0000000000000000000000000000000000000000..03dcd5130558b0e6df9ee11f240edef37cd20b92 --- /dev/null +++ b/docs/quick_experience/quick_index.rst @@ -0,0 +1,33 @@ +快速体验 +================== + +PaddleHub有两种使用方式:Python代码调用和命令行调用。 + +命令行方式只需要一行命令即可快速体验PaddleHub提供的预训练模型的效果,是快速体验的绝佳选择;Python代码调用方式也仅需要三行代码,如果需要使用自己的数据Fine-tune并生成模型,则采用该方式。 + +命令行示例: +:: + $ hub run chinese_ocr_db_crnn_mobile --input_path test_ocr.jpg + +Python代码示例: +:: + import paddlehub as hub + ocr = hub.Module(name="chinese_ocr_db_crnn_mobile") + result = ocr.recognize_text(paths = "./test_image.jpg"], visualization=True, output_dir='ocr_output') + +样例结果示例: + +.. image:: ../imgs/ocr_res.jpg + + +本章节提供这两种方法的快速体验方法,方便快速上手,同时提供了丰富的体验Demo,覆盖多场景领域,欢迎体验。具体使用时,当然还需要进一步了解详细的API接口参数、命令行参数解释,可参考后面的Python API接口和命令行参考章节。 + +.. toctree:: + :maxdepth: 1 + + 通过命令行调用方式使用PaddleHub + 通过Python代码调用方式使用PaddleHub + PaddleHub更多体验Demos + + + diff --git a/docs/tutorial/Serving_index.rst b/docs/tutorial/Serving_index.rst new file mode 100644 index 0000000000000000000000000000000000000000..80428ddf1dcdd3f5a9c12bb311b1e7dd4fad4a95 --- /dev/null +++ b/docs/tutorial/Serving_index.rst @@ -0,0 +1,15 @@ +部署预训练模型 +================== + +本文介绍了如何生成可用于服务化部署的模型、如何实现服务化部署以及如何获取ERNIE/BERT Embedding服务的方法。 + + +详细信息,参考以下教程: + +.. toctree:: + :maxdepth: 1 + + Fine-tune模型转化为PaddleHub Module + 服务化部署 + 文本Embedding服务 + diff --git a/docs/tutorial/how_to_finetune.md b/docs/tutorial/how_to_finetune.md new file mode 100644 index 0000000000000000000000000000000000000000..f14dce53f12dabd23d94e0caf8e423fd7609ea6a --- /dev/null +++ b/docs/tutorial/how_to_finetune.md @@ -0,0 +1,378 @@ +## **迁移学习** + +### **概述** +迁移学习 (Transfer Learning) 是属于深度学习的一个子研究领域,该研究领域的目标在于利用数据、任务、或模型之间的相似性,将在旧领域学习过的知识,迁移应用于新领域中。通俗的来讲,迁移学习就是运用已有的知识来学习新的知识,例如学会了骑自行车的人也能较快的学会骑电动车。较为常用的一种迁移学习方式是利用预训练模型进行微调,即用户基于当前任务的场景从PaddleHub中选择已训练成功的模型进行新任务训练,且该模型曾经使用的数据集与新场景的数据集情况相近,此时仅需要在当前任务场景的训练过程中使用新场景的数据对模型参数进行微调(**Fine-tune**),即可完成训练任务。迁移学习吸引了很多研究者投身其中,因为它能够很好的解决深度学习中的以下几个问题: +* 一些研究领域只有少量标注数据,且数据标注成本较高,不足以训练一个足够鲁棒的神经网络。 +* 大规模神经网络的训练依赖于大量的计算资源,这对于一般用户而言难以实现。 +* 应对于普适化需求的模型,在特定应用上表现不尽如人意。 + + +为了让开发者更便捷地应用迁移学习,飞桨开源了预训练模型管理工具 PaddleHub。开发者仅仅使用十余行的代码,就能完成迁移学习。本文将为读者全面介绍使用PaddleHub完成迁移学习的方法。 + +### **前置条件** +在开始迁移学习之前,用户需要做好如下工作: +* 用户已安装PaddleHub。 +* 准备好用于迁移学习的数据,用户可以选择使用PaddleHub提供的数据集或者自定义数据集,如果是自定义数据,需要参考“自定义数据集如何Fine-tune”对数据集处理。 +* 使用hub install命令安装或更新用于训练的module,以使用ERNIE模型为例,命令格式如下所示。用户可能在之前的任务中已经安装过相关的预训练模型,但是仍然推荐用户在开始训练前执行此步骤,这样可以保证预训练模型是最新版本。 + + +```python +$ hub install ernie==1.2.0 +``` + +### **迁移学习流程** +用户完成迁移学习前需要先编写好用于迁移学习的脚本。用户编写脚本的过程非常简单,仅需要十余行代码即可完成。整个脚本的编写过程,可以分为如下几个步骤: +1. 导入必要的包。 +2. 加载预训练模型(Module),即加载PaddleHub提供的预训练模型。 +3. 加载数据集(Dataset),用户可以选择使用dataset API加载PaddleHub自带的数据集或者自行编写加载数据集的类来加载自定义数据集。 +4. 配置数据读取器(Reader),负责将dataset的数据进行预处理,以特定格式组织并输入给模型进行训练。 +5. 选择优化策略(Strategy),优化策略包含了多种预训练参数,例如使用什么学习率变化策略,使用哪种类型的优化器,使用什么类型的正则化等。 +6. 设置运行配置(RunConfig),RunConfig包含了一些训练相关的配置,包括是否使用GPU、训练的轮数(Epoch)、训练批次大小(batch_size)等。 +7. 组建训练任务(Task),一个迁移学习训练任务中会包含与该任务相关的Program和上面设置好的数据读取器Reader、运行配置等内容。 +8. 启动Fine-tune,使用Finetune_and_eval函数完成训练和评估。 + +### **学会编写迁移学习训练脚本** +PaddleHub提供了Finetune API和预训练模型完成多种不同任务场景的迁移学习,包括图像分类、文本分类、多标签分类、序列标注、检索式问答任务、回归任务、句子语义相似度计算、阅读理解任务等。本文将以文本分类为例介绍迁移学习脚本的编写方法。 +#### **1. 导入必要的包。** + + +```python +import paddlehub as hub +``` + +#### **2. 加载预训练模型** +使用如下代码加载预训练模型,本例使用ERNIE预训练模型来完成文本分类任务。ERNIE(Enhanced Representation through kNowledge IntEgration)是百度提出的语义表示模型,以Transformer Encoder为网络基本组件,其预训练过程利用了更丰富的语义知识和更多的语义任务,用户可以使用该预训练模型随时逐步引入不同的自定义任务,例如命名实体预测、语篇关系识别、句子顺序预测任务、情感分析等。 + + +```python +module = hub.Module(name="ernie") +``` + + +PaddleHub还提供很多了其它可用于迁移学习的预训练模型, 在PaddleHub的官网上,图像分类、语义模型和情感分析几个目录下的预训练模型都支持迁移学习,用户仅需要将name的取值换成预训练模型名称即可,例如右侧红框中的示例。 + +![](../imgs/Howtofinetune1.png) + + + +#### **3. 加载数据集** +在加载好预训练模型后,我们来加载数据集。用于迁移学习的数据集来源可以分为两种,用户自定义数据集和PaddleHub提供的数据集,使用不同类型的数据集加载方式也有所不同。 +##### **加载PaddleHub自带数据集** +如果用户使用的是PaddleHub自带数据集,则可以通过PaddleHub的数据集API编写一行代码完成加载数据集的动作。 + + +```python +dataset = hub.dataset.ChnSentiCorp() +``` + +其中ChnSentiCorp是中文情感分析数据集,其目标是判断一段文本的情感态度。例如文本是“这道菜很好吃”,则对应的标签为“1”,表示正向评价,又例如“房间太小了”,对应标签为“0”,表示负面评价。PaddleHub还提供了其他的文本分类数据集,用户可以自行选择数据集对应的API替换上面代码中dataset的取值,具体信息如下表所示。 + +|数据集|名称|API| +|:--------:|:--------:|:--------:| +|ChnSentiCorp|中文情感分析数据集|hub.dataset.ChnSentiCorp()| +|LCQMC|哈尔滨工业大学在自然语言处理国际顶会 COLING2018 构建的问答匹配中文数据集,其目标是判断两个问题的语义是否相同。|hub.dataset.LCQMC()| +|NLPCC-DPQA|国际自然语言处理和中文计算会议NLPCC于2016年举办的评测任务数据集,,其目标是选择能够回答问题的答案。|hub.dataset.NLPCC_DPQA()| +|MSRA-NER|微软亚研院发布的数据集,其目标是命名实体识别|hub.dataset.MSRA-NER()| +|Toxic|英文多标签分类数据集|hub.dataset.Toxic()| +|SQUAD|英文阅读理解数据集|hub.dataset.SQUAD()| +|GLUE-CoLA|文本分类任务数据集|hub.dataset.GLUE("CoLA")| +|GLUE-SST-2|情感分析任务数据集|hub.dataset.GLUE("SST-2")| +|GLUE-MNLI|文本推理任务数据集|hub.dataset.GLUE("MNLI_m")| +|GLUE-QQP|句子对分类任务数据集|hub.dataset.GLUE("QQP")| +|GLUE-QNLI|问题推理任务数据集|hub.dataset.GLUE("QNLI")| +|GLUE-STS-B|回归任务数据集|hub.dataset.GLUE("STS-B")| +|GLUE-MRPC|句子对分类任务数据集|hub.dataset.GLUE("MRPC")| +|GLUE-RTE|文本蕴含任务数据集|hub.dataset.GLUE("RTE")| +|XNLI|跨语言自然语言推理数据集|hub.dataset.XNLI(language=zh)| +|ChineseGLUE-TNEWS|今日头条中文新闻(短文本)分类数据集|hub.dataset.TNews()| +|ChineseGLUE-INEWS|互联网情感分析任务数据集|hub.dataset.INews()| +|DRCD|台达阅读理解数据集,属于通用领域繁体中文机器阅读理解数据集|hub.dataset.DRCD()| +|CMRC2018|中文机器阅读理解的跨度提取数据集|hub.dataset.CMRC2018()| +|ChinesGLUE-BQ|智能客服中文问句匹配数据集|hub.dataset.BQ()| +|ChineseGLUE-IFLYTEK|中文长文本分类数据集,该数据集共有1.7万多条关于app应用描述的长文本标注数据|hub.dataset.IFLYTEK()| +|ChineseGLUE-THUCNEWS|中文长文本分类数据集,该数据集共有4万多条中文新闻长文本标注数据,共14个类别|hub.dataset.THUCNEWS()| +|DogCatDataset|由Kaggle提供的数据集,用于图像二分类|hub.dataset.DogCatDataset()| +|Food101|由Kaggle提供的食品图片数据集,含有101种类别|hub.dataset.Food101()| +|Indoor67|由麻省理工学院发布的数据集,其包含67种室内场景,其目标是识别一张室内图片的场景类别。|hub.dataset.Indoor67()| +|Flowers|花卉数据集,数据集有5种类型,包括"roses","tulips","daisy","sunflowers","dandelion"|hub.dataset.Flowers()| +|StanfordDogs|斯坦福大学发布的数据集,其包含120个种类的狗,用于做图像分类。|hub.dataset.StanfordDogs()| + + +##### **加载自定义数据集** +* 加载文本类自定义数据集。用户仅需要继承基类BaseNLPDatast,修改数据集存放地址以及类别即可,具体可以参考如下代码。 +```python +from paddlehub.dataset.base_nlp_dataset import BaseNLPDataset +# 构建数据集的类 +class DemoDataset(BaseNLPDataset): + def __init__(self): + # 数据集实际路径 + self.dataset_dir = "path/to/dataset" + super(DemoDataset, self).__init__( + base_path=self.dataset_dir, + train_file="train.tsv", # 训练集存放地址 + dev_file="dev.tsv", # 验证集存放地址 + test_file="test.tsv", # 测试集存放地址 + # 如果还有预测数据(不需要文本类别label),可以放在predict.tsv + predict_file="predict.tsv", + train_file_with_header=True, # 训练集文件是否有列说明 + dev_file_with_header=True, # 验证集文件是否有列说明 + test_file_with_header=True, # 测试集文件是否有列说明 + predict_file_with_header=True, # 预测集文件是否有列说明 + # 数据集类别集合 + label_list=["0", "1"]) +# 通过创建Dataset对象加载自定义文本数据集 +dataset = DemoDataset() +``` + +然后就可以通过DemoDataset()获取自定义数据集了。进而配合数据预处理器以及预训练模型如ERNIE完成文本类任务。 + +* 加载图像类自定义数据集。用用户仅需要继承基类BaseCVDatast,修改数据集存放地址即可,具体可以参考如下代码。 +```python +from paddlehub.dataset.base_cv_dataset import BaseCVDataset + +class DemoDataset(BaseCVDataset): + def __init__(self): + # 数据集存放位置 + self.dataset_dir = "/test/data" + super(DemoDataset, self).__init__( + base_path=self.dataset_dir, + train_list_file="train_list.txt", # 训练集存放地址 + validate_list_file="validate_list.txt", # 验证集存放地址 + test_list_file="test_list.txt", # 测试集存放地址 + predict_file="predict_list.txt", # 预测集存放地址 + label_list_file="label_list.txt", # 数据集类别文件所在地址 + # 如果您的数据集类别较少,可以不用定义label_list.txt,可以在最后设置label_list=["数据集所有类别"]。 + ) +# 通过创建Dataset对象加载图像类数据集 +dataset = DemoDataset() +``` + +然后就可以通过DemoDataset()获取自定义数据集了。进而配合数据预处理器以及预训练模型完成视觉类的迁移学习任务。 + +#### **4. 配置数据预处理器** + +通过使用PaddleHub的数据预处理器API来读取NLP或CV的数据集数据。 + + + +```python +reader = hub.reader.ClassifyReader( + dataset=dataset, + vocab_path=module.get_vocab_path(), # 返回预训练模型对应的词表 + max_seq_len=128, # 需要与Step1中context接口传入的序列长度保持一致 + sp_model_path=module.get_spm_path(), # 若module为ernie_tiny则返回对应的子词切分模型,否则返回None + word_dict_path=module.get_word_dict_path()) # 若module为ernie_tiny则返回对应的词语切分模型,否则返回None + +``` + +对于不同的任务类型,用户可以选择不同的Reader。 + +|数据读取器|描述|任务类型|API示例| +|:--------:|:--------:|:--------:|:--------| +|ClassifyReader|适用于Transformer预训练模型(ERNIE/BERT)的数据预处理器。|NLP|reader = hub.reader.ClassifyReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=128,
        sp_model_path=module.get_spm_path(),
        word_dict_path=module.get_word_dict_path()) | +|LACClassifyReader|以LAC模块为切词器的数据预处理器,适用于Senta、ELMo等需要以词粒度分词的任务。|NLP|reader = hub.reader.LACClassifyReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path())| +|SequenceLabelReader|适用于Transformer类模型(ERNIE/BERT)的序列标注预处理器。|NLP|reader = hub.reader.SequenceLabelReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=128,
        sp_model_path=module.get_spm_path(),
        word_dict_path=module.get_word_dict_path())| +|MultiLabelClassifyReader|适用于Transformer类模型(ERNIE/BERT)的多标签分类预处理器。|NLP|reader = hub.reader.MultiLabelClassifyReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=128)| +|ReadingComprehensionReader|适用于Transformer类模型(ERNIE/BERT)的阅读理解任务预处理器。|NLP|reader = hub.reader.ReadingComprehensionReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_length=384)| +|RegressionReader|适用于回归任务的数据预处理器。|NLP|reader = hub.reader.RegressionReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=args.max_seq_len)| +|ImageClassificationReader|适用于图像分类数据的预处理器。会修改输入图像的尺寸、进行标准化处理、图像增广处理等操作。|CV|reader = hub.reader.ImageClassificationReader(
        image_width=module.get_expected_image_width(),
        image_height=module.get_expected_image_height(),
        images_mean=module.get_pretrained_images_mean(),
        images_std=module.get_pretrained_images_std(),
        dataset=dataset)| + + +#### **5. 选择优化策略** +在PaddleHub中,Strategy类封装了一系列适用于迁移学习的Fine-tuning策略。Strategy包含了对预训练参数使用什么学习率变化策略,使用哪种类型的优化器,使用什么类型的正则化等。在我们要做的文本分类任务中,我们使用AdamWeightDecayStrategy优化策略。具体可以参考如下代码: + + +```python +strategy = hub.AdamWeightDecayStrategy( + learning_rate=5e-5, # Fine-tune过程中的最大学习率 + weight_decay=0.01, # 模型的正则项参数,默认0.01,如果模型有过拟合倾向,可适当调高这一参数 + warmup_proportion=0.1, #如果warmup_proportion>0, 例如0.1, 则学习率会在前10%的steps中线性增长至最高值learning_rate + # 有两种策略可选: + # (1)linear_decay策略学习率会在最高点后以线性方式衰减; + # (2)noam_decay策略学习率会在最高点以多项式形式衰减; + lr_scheduler="linear_decay", +) +``` + +包括AdamWeightDecayStrategy在内,PaddleHub还提供了多种优化策略的API。 + +|优化策略|描述|API示例| +|:--------:|:--------|:--------| +|DefaultFinetuneStrategy|默认的优化策略。其对应参数如下:
* learning_rate: 全局学习率。默认为1e-4。
* optimizer_name: 优化器名称。默认adam。
* regularization_coeff: 正则化的λ参数。默认为1e-3。
在图像分类任务中推荐使用此优化策略。|strategy = hub.DefaultFinetuneStrategy(
        learning_rate=1e-4,
        optimizer_name="adam",
        regularization_coeff=1e-3)| +|AdamWeightDecayStrategy|基于Adam优化器的学习率衰减策略。其对应参数如下:
* learning_rate: 全局学习率,默认为1e-4。
* lr_scheduler: 学习率调度方法,默认为"linear_decay"。
* warmup_proportion: warmup所占比重。
* weight_decay: 学习率衰减率。
* optimizer_name: 优化器名称,默认为adam。
在文本分类、阅读理解等任务中推荐使用此优化策略。|strategy = hub.AdamWeightDecayStrategy(
        learning_rate=1e-4,
        lr_scheduler="linear_decay",
        warmup_proportion=0.0,
        weight_decay=0.01,
        optimizer_name="adam")| +|L2SPFinetuneStrategy|使用L2SP正则作为惩罚因子的Finetune策略。其对应参数如下:
* learning_rate: 全局学习率。默认为1e-4。
* optimizer_name: 优化器名称。默认adam。
* regularization_coeff: 正则化的λ参数。默认为1e-3。|strategy = hub.L2SPFinetuneStrategy(
        learning_rate=1e-4,
        optimizer_name="adam",
        regularization_coeff=1e-3)| +|ULMFiTStrategy|该策略实现了ULMFiT论文中提出的三种策略:
* Slanted triangular learning rates是一种学习率先上升再下降的策略。
* Discriminative fine-tuning是一种学习率逐层递减的策略,通过该策略可以减缓底层的更新速度。
* Gradual unfreezing是一种逐层解冻的策略,通过该策略可以优先更新上层,再慢慢解冻下层参与更新。
其对应参数如下:
* learning_rate: 全局学习率。默认为1e-4。
* optimizer_name: 优化器名称。默认为adam。
* cut_fraction: 设置Slanted triangular learning rates学习率上升的步数在整个训练总步数中的比例。默认为0.1,如果设置为0,则不采用Slanted triangular learning rates。
* ratio: 设置Slanted triangular learning rates下降的最小学习率与上升的最大学习率的比例关系,默认为32,表示最小学习率是最大学习率的1/32。
* dis_blocks: 设置 Discriminative fine-tuning中的块数。默认为3,如果设置为0,则不采用Discriminative fine-tuning。
* factor: 设置Discriminative fine-tuning的衰减率。默认为2.6,表示下一层的学习率是上一层的1/2.6。
* frz_blocks: 设置Gradual unfreezing中的块数。块的概念同“dis_blocks”中介绍的概念。|strategy = hub.ULMFiTStrategy(
        learning_rate=1e-4,
        optimizer_name="adam",
        cut_fraction=0.1,
        ratio=32,
        dis_blocks=3,
        factor=2.6,
        frz_blocks=3)| + + + +#### **6. 设置运行配置。** +在PaddleHub中,用户可以使用Finetune API中的RunConfig配置Task进行Finetune时使用的参数,包括运行的Epoch次数、batch的大小、是否使用GPU训练等。代码示例如下所示。 + + +```python +config = hub.RunConfig(use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy) +``` + +#### **7. 组建训练任务。** +有了合适的预训练模型,并加载好要迁移的数据集后,我们开始组建一个Task。在PaddleHub中,Task代表了一个Fine-tune的任务。任务中包含了执行该任务相关的Program、数据读取器Reader、运行配置等内容。在这里可以找到文本分类任务对应的Task说明[TextClassifierTask](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.6/docs/reference/task/text_classify_task.md)。具体实现方案如下: +1. 获取module(PaddleHub的预训练模型)的上下文环境,包括输入和输出的变量,以及Paddle Program(可执行的模型格式)。 +2. 从预训练模型的输出变量中找到特征图提取层feature_map,在feature_map后面接入一个全连接层,如下代码中通过hub.TextClassifierTask的pooled_output参数指定。 +3. 网络的输入层保持不变,依然从输入层开始,如下代码中通过hub.TextClassifierTask的参数feed_list变量指定。 +hub.TextClassifierTask就是通过这两个参数明确我们的截取模型网络的要求,按照这样的配置,我们截取的网络是从输入层一直到特征提取的最后一层“pooled_output”,表示我将使用截出的网络来进行迁移学习训练。 + + +```python +# 获取Module的上下文信息,得到输入、输出以及预训练的Paddle Program副本。 +# trainable设置为True时,Module中的参数在Fine-tune时也会随之训练,否则保持不变。 +# 其中最大序列长度max_seq_len为可调整的参数,建议值为128,根据任务文本长度不同可以进行修改,但最大不超过512。 +# 若序列长度不足,会通过padding方式补到max_seq_len, 若序列长度大于该值,则会以截断方式让序列长度为max_seq_len。 +inputs, outputs, program = module.context(trainable=True, max_seq_len=128) +# 返回ERNIE/BERT模型对应的[CLS]向量,可以用于句子或句对的特征表达。 +pooled_output = outputs["pooled_output"] + +# feed_list的Tensor顺序不可以调整 +# 指定ERNIE中的输入tensor的顺序,与ClassifyReader返回的结果一致 +feed_list = [ + inputs["input_ids"].name, + inputs["position_ids"].name, + inputs["segment_ids"].name, + inputs["input_mask"].name, +] +# 通过输入特征,label与迁移的类别数,可以生成适用于文本分类的迁移任务 +cls_task = hub.TextClassifierTask( + data_reader=reader, # 读取数据的reader + feature=pooled_output, # 从预训练提取的特征矩阵 + feed_list=feed_list, # 待feed变量的名字列表 + num_classes=dataset.num_labels, # 数据集的类别数量 + metrics_choices = ["acc"], + config=config) # 运行配置 +``` + +PaddleHub预置了常见任务的Task,每种Task都有特定的应用场景并提供了对应的度量指标,满足用户的不同需求。 + +|Task类型|描述|任务类型| +|:--------:|:--------:|:--------:| +|ImageClassifierTask|该Task基于输入的特征,添加一个或多个全连接层来创建一个分类任务用于Fine-tune,度量指标为准确率,损失函数为交叉熵Loss。|图像分类任务| +|TextClassifierTask|该Task基于输入的特征,添加一个Dropout层,以及一个或多个全连接层来创建一个文本分类任务用于finetune,度量指标为准确率,损失函数为交叉熵Loss。|文本分类任务| +|SequenceLabelTask|该Task基于输入的特征,添加一个全连接层或者一个全连接层和CRF层来创建一个序列标注任务用于Fine-tune,度量指标为F1,损失函数为交叉熵Loss。|序列标注任务| +|MultiLabelClassifierTask|该Task基于输入的特征,添加一个或多个全连接层来创建一个多标签分类任务用于finetune,度量指标为多个标签的平均AUC,损失函数为多个标签的平均交叉熵。|多标签分类任务| +|RegressionTask|该Task基于输入的特征,添加一个Dropout层,以及一个或多个全连接层来创建一个文本回归任务用于finetune,度量指标为准确率,损失函数为均方差损失函数。|文本回归任务| +|ReadingComprehensionTask|该Task基于输入的特征,添加一个全连接层来创建一个阅读理解任务用于Fine-tune,损失函数为交叉熵Loss。|阅读理解任务| + +在设定每个Task前,用户需要提前了解待迁移学习的预训练模型的输入与输出,即对应上面代码中的“feed_list”和“pooled_output”。具体的输入输出代码可以分为如下几类: +* 图像分类模型 +``` +input_dict, output_dict, program = module.context(trainable=True) +feature_map = output_dict["feature_map"] +feed_list = [input_dict["image"].name] +``` +* 自然语言处理模型(不包括word2vec_skipgram、simnet_bow、文本匹配和文本生成几个模型) +``` +inputs, outputs, program = module.context(trainable=True, max_seq_len=128) +pooled_output = outputs["pooled_output"] +feed_list = [ + inputs["input_ids"].name, + inputs["position_ids"].name, + inputs["segment_ids"].name, + inputs["input_mask"].name, +] +``` +* word2vec_skipgram模型 +``` +inputs, outputs, program = module.context(trainable=True) +word_ids = inputs["word_ids"] +embedding = outputs["word_embs"] +``` +* simnet_bow模型 +``` +inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len, num_slots=2) +query = outputs["emb"] +title = outputs['emb_2'] +``` +* Pairwise文本匹配模型 +``` +inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len, num_slots=3) +query = outputs["emb"] +left = outputs['emb_2'] +right = outputs['emb_3'] +``` +* Pointwise文本匹配 +``` +inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len, num_slots=2) +query = outputs["emb"] +title = outputs['emb_2'] +``` +* 文本生成模型 +``` +inputs, outputs, program = module.context(trainable=True, max_seq_len=128) +pooled_output = outputs["pooled_output"] +sequence_output = outputs["sequence_output"] +``` + +#### **8 启动Fine-tune,使用Finetune_and_eval函数完成训练和评估。** + + +```python +cls_task.finetune_and_eval() +``` + +显示信息如下例所示。可以看到训练的评估结果,Loss值和准确率等等。 +``` +[2020-07-28 21:28:21,658] [ TRAIN] - step 810 / 900: loss=0.05022 acc=0.97813 [step/sec: 4.07] +[2020-07-28 21:28:24,115] [ TRAIN] - step 820 / 900: loss=0.04719 acc=0.98125 [step/sec: 4.07] +[2020-07-28 21:28:26,574] [ TRAIN] - step 830 / 900: loss=0.06895 acc=0.98125 [step/sec: 4.07] +[2020-07-28 21:28:29,035] [ TRAIN] - step 840 / 900: loss=0.07830 acc=0.97813 [step/sec: 4.07] +[2020-07-28 21:28:31,490] [ TRAIN] - step 850 / 900: loss=0.07279 acc=0.97500 [step/sec: 4.08] +[2020-07-28 21:28:33,939] [ TRAIN] - step 860 / 900: loss=0.03220 acc=0.99375 [step/sec: 4.09] +[2020-07-28 21:28:36,388] [ TRAIN] - step 870 / 900: loss=0.05016 acc=0.98750 [step/sec: 4.09] +[2020-07-28 21:28:38,840] [ TRAIN] - step 880 / 900: loss=0.05604 acc=0.98750 [step/sec: 4.08] +[2020-07-28 21:28:41,293] [ TRAIN] - step 890 / 900: loss=0.05622 acc=0.98125 [step/sec: 4.08] +[2020-07-28 21:28:43,748] [ TRAIN] - step 900 / 900: loss=0.06642 acc=0.97813 [step/sec: 4.08] +[2020-07-28 21:28:43,750] [ INFO] - Evaluation on dev dataset start +[2020-07-28 21:28:46,654] [ EVAL] - [dev dataset evaluation result] loss=0.17890 acc=0.94079 [step/sec: 13.23] +[2020-07-28 21:28:46,657] [ INFO] - Evaluation on dev dataset start +[2020-07-28 21:28:49,527] [ EVAL] - [dev dataset evaluation result] loss=0.17890 acc=0.94079 [step/sec: 13.39] +[2020-07-28 21:28:49,529] [ INFO] - Load the best model from ckpt_20200728212416/best_model +[2020-07-28 21:28:50,112] [ INFO] - Evaluation on test dataset start +[2020-07-28 21:28:52,987] [ EVAL] - [test dataset evaluation result] loss=0.14264 acc=0.94819 [step/sec: 13.36] +[2020-07-28 21:28:52,988] [ INFO] - Saving model checkpoint to ckpt_20200728212416/step_900 +[2020-07-28 21:28:55,789] [ INFO] - PaddleHub finetune finished. +``` + +通过Fine-tune完成模型训练后,在对应的ckpt目录(CKPT_DIR)下,PaddleHub会自动保存验证集上效果最好的模型。用户可以参考如下代码进行预测,其中推理出的标签值0表示负向评价,1表示正向评价。。 + + +```python +import numpy as np + + +# 待预测数据 +data = [ + ["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], + ["交通方便;环境很好;服务态度很好 房间较小"], + ["19天硬盘就罢工了,算上运来的一周都没用上15天,可就是不能换了。唉,你说这算什么事呀!"] +] + +index = 0 +run_states = cls_task.predict(data=data) +results = [run_state.run_results for run_state in run_states] +for batch_result in results: + # 预测类别取最大分类概率值 + batch_result = np.argmax(batch_result[0], axis=1) + for result in batch_result: + print("%s\tpredict=%s" % (data[index][0], result)) + index += 1 +``` + +预测结果如下所示。 +``` +[2020-07-28 18:06:45,441] [ INFO] - PaddleHub predict start +[2020-07-28 18:06:45,442] [ INFO] - The best model has been loaded +[2020-07-28 18:06:48,406] [ INFO] - PaddleHub predict finished. + +这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般 predict=0 +交通方便;环境很好;服务态度很好 房间较小 predict=1 +19天硬盘就罢工了,算上运来的一周都没用上15天,可就是不能换了。唉,你说这算什么事呀! predict=0 +``` + + diff --git a/docs/tutorial/how_to_load_data.md b/docs/tutorial/how_to_load_data.md index 97c8ae2f853a42b994b908462dba583c35495957..73e3ae8a068429a9b37fd1efd709bdace1509c32 100644 --- a/docs/tutorial/how_to_load_data.md +++ b/docs/tutorial/how_to_load_data.md @@ -1,13 +1,13 @@ # 自定义数据 -训练一个新任务时,如果从零开始训练时,这将是一个耗时的过程,并且效果可能达不到理想的效果,此时您可以利用PaddleHub提供的预训练模型进行具体任务的Fine-tune。您只需要对自定义数据进行相应的预处理,随后输入预训练模型中,即可得到相应的结果。 +训练一个新任务时,如果从零开始训练时,这将是一个耗时的过程,并且效果可能达不到理想的效果,此时您可以利用PaddleHub提供的预训练模型进行具体任务的Fine-tune。您只需要对自定义数据进行相应的预处理,随后输入预训练模型中,即可得到相应的结果。请参考如下内容设置数据集的结构。 ## 一、NLP类任务如何自定义数据 本文以预训练模型ERNIE对文本分类任务进行Fine-tune为例,说明如何利用PaddleHub适配自定义数据完成Fine-tune。 -### 数据准备 +数据目录如下所示。 ``` ├─data: 数据目录 @@ -30,49 +30,13 @@ text_a label 1.接电源没有几分钟,电源适配器热的不行. 2.摄像头用不起来. 3.机盖的钢琴漆,手不能摸,一摸一个印. 4.硬盘分区不好办. 0 ``` -### 自定义数据加载 -加载文本类自定义数据集,用户仅需要继承基类BaseNLPDatast,修改数据集存放地址以及类别即可。具体使用如下: - -**NOTE:** -* 数据集文件编码格式建议为utf8格式。 -* 如果相应的数据集文件没有上述的列说明,如train.tsv文件没有第一行的`text_a label`,则train_file_with_header=False。 -* 如果您还有预测数据(没有文本类别),可以将预测数据存放在predict.tsv文件,文件格式和train.tsv类似。去掉label一列即可。 -* 分类任务中,数据集的label必须从0开始计数 - - -```python -from paddlehub.dataset.base_nlp_dataset import BaseNLPDataset - -class DemoDataset(BaseNLPDataset): - """DemoDataset""" - def __init__(self): - # 数据集存放位置 - self.dataset_dir = "path/to/dataset" - super(DemoDataset, self).__init__( - base_path=self.dataset_dir, - train_file="train.tsv", - dev_file="dev.tsv", - test_file="test.tsv", - # 如果还有预测数据(不需要文本类别label),可以放在predict.tsv - predict_file="predict.tsv", - train_file_with_header=True, - dev_file_with_header=True, - test_file_with_header=True, - predict_file_with_header=True, - # 数据集类别集合 - label_list=["0", "1"]) -dataset = DemoDataset() -``` -之后,您就可以通过DemoDataset()获取自定义数据集了。进而配合ClassifyReader以及预训练模型如ERNIE完成文本分类任务。 ## 二、CV类任务如何自定义数据 利用PaddleHub迁移CV类任务使用自定义数据时,用户需要自己切分数据集,将数据集且分为训练集、验证集和测试集。 -### 数据准备 - -需要三个文本文件来记录对应的图片路径和标签,此外还需要一个标签文件用于记录标签的名称。 +数据目录如下所示。需要三个文本文件来记录对应的图片路径和标签,此外还需要一个标签文件用于记录标签的名称。 ``` ├─data: 数据目录   ├─train_list.txt:训练集数据列表 @@ -108,33 +72,3 @@ cat dog ``` - -### 自定义数据加载 - -加载图像类自定义数据集,用户仅需要继承基类BaseCVDatast,修改数据集存放地址即可。具体使用如下: - -**NOTE:** -* 数据集文件编码格式建议为utf8格式。 -* dataset_dir为数据集实际路径,需要填写全路径,以下示例以`/test/data`为例。 -* 训练/验证/测试集的数据列表文件中的图片路径需要相对于dataset_dir的相对路径,例如图片的实际位置为`/test/data/dog/dog1.jpg`。base_path为`/test/data`,则文件中填写的路径应该为`dog/dog1.jpg`。 -* 如果您还有预测数据(没有文本类别),可以将预测数据存放在predict_list.txt文件,文件格式和train_list.txt类似。去掉label一列即可 -* 如果您的数据集类别较少,可以不用定义label_list.txt,可以选择定义label_list=["数据集所有类别"]。 -* 分类任务中,数据集的label必须从0开始计数 - - ```python -from paddlehub.dataset.base_cv_dataset import BaseCVDataset - -class DemoDataset(BaseCVDataset): - def __init__(self): - # 数据集存放位置 - self.dataset_dir = "/test/data" - super(DemoDataset, self).__init__( - base_path=self.dataset_dir, - train_list_file="train_list.txt", - validate_list_file="validate_list.txt", - test_list_file="test_list.txt", - predict_file="predict_list.txt", - label_list_file="label_list.txt", - # label_list=["数据集所有类别"]) -dataset = DemoDataset() -``` diff --git a/docs/tutorial/tutorial_index.rst b/docs/tutorial/tutorial_index.rst index 44295eccbad69c55772f6606062bf6ca64f39f45..e4963b9b4829b376c27e0d5a18e6325acca67d80 100644 --- a/docs/tutorial/tutorial_index.rst +++ b/docs/tutorial/tutorial_index.rst @@ -1,7 +1,7 @@ -教程 +迁移学习 ================== -以下是关于PaddleHub的使用教程,介绍了命令行使用、如何自定义数据完成Finetune、如何自定义迁移任务、如何服务化部署预训练模型、如何获取ERNIE/BERT Embedding、如何用word2vec完成语义相似度计算、ULMFit优化策略介绍、如何使用超参优化AutoDL Finetuner、如何用Hook机制改写Task内置方法。 +本文将介绍介绍了如何自定义数据、如何完成Finetune以及如何使用超参优化AutoDL Finetuner。 详细信息,参考以下教程: @@ -9,13 +9,8 @@ .. toctree:: :maxdepth: 1 - 命令行工具 自定义数据 - Fine-tune模型转化为PaddleHub Module - 自定义任务 - 服务化部署 - 文本Embedding服务 - 语义相似度计算 ULMFit优化策略 + 如何迁移学习 超参优化 - Hook机制 + diff --git a/hub_module/modules/image/humanseg/humanseg_lite/README.md b/hub_module/modules/image/humanseg/humanseg_lite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4a1b29f955575b3437399ad6603bb1c545a6cb07 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_lite/README.md @@ -0,0 +1,205 @@ +## 模型概述 + +HumanSeg_lite是基于ShuffleNetV2网络结构的基础上进行优化的人像分割模型,进一步减小了网络规模,网络大小只有541K,量化后只有187K,适用于手机自拍人像分割等实时分割场景。 + + +## 命令行预测 + +``` +hub run humanseg_lite --input_path "/PATH/TO/IMAGE" + +``` + + +## API + +```python +def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_lite_output') +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + +```python +def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): +``` + +预测API,用于逐帧对视频人像分割。 + +**参数** + +* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* frame_id (int): 当前帧的编号; +* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; +* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + + +**返回** + +* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 +* cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; +* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 + + +```python +def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_lite_video_result'): +``` + +预测API,用于视频人像分割。 + +**参数** + +* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + + +```python +def save_inference_model(dirname='humanseg_lite_model', + model_filename=None, + params_filename=None, + combined=True) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +图片分割及视频分割代码示例: +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module('humanseg_lite') +im = cv2.imread('/PATH/TO/IMAGE') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = human_seg.segment(images=[im],visualization=True) +print(res[0]['data']) +human_seg.video_segment('/PATH/TO/VIDEO') +human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + +``` +视频流预测代码示例: +```python +import cv2 +import numpy as np +import paddlehub as hub + +human_seg = hub.Module('humanseg_lite') +cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') +fps = cap_video.get(cv2.CAP_PROP_FPS) +save_path = 'humanseg_lite_video.avi' +width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) +height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) +cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) +prev_gray = None +prev_cfd = None +while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + +cap_video.release() +cap_out.release() + +``` +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m humanseg_lite +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_lite" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存图片 +mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) +rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) +cv2.imwrite("segment_human_lite.png", rgba) +``` +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/HumanSeg + + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/humanseg/humanseg_lite/__init__.py b/hub_module/modules/image/humanseg/humanseg_lite/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/image/humanseg/humanseg_lite/data_feed.py b/hub_module/modules/image/humanseg/humanseg_lite/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..03c230b7bc675d4f5ae60f6f00007ea7d919c3b3 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_lite/data_feed.py @@ -0,0 +1,65 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader', 'preprocess_v'] + + +def preprocess_v(img, w, h): + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + #print(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (192, 192)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/hub_module/modules/image/humanseg/humanseg_lite/module.py b/hub_module/modules/image/humanseg/humanseg_lite/module.py new file mode 100644 index 0000000000000000000000000000000000000000..220798671ea7b863bd4937b0e8b825fbe500d505 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_lite/module.py @@ -0,0 +1,462 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import os.path as osp +import argparse + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_lite.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_lite.data_feed import reader, preprocess_v +from humanseg_lite.optimal import postprocess_v, threshold_mask + + +@moduleinfo( + name="humanseg_lite", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="humanseg_lite is a semantic segmentation model.", + version="1.1.0") +class ShufflenetHumanSeg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "humanseg_lite_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, + '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, + '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, + self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_lite_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([ + batch_image + ]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = output[1].as_ndarray() + output = np.expand_dims(output[:, 1, :, :], axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + """ + API for human video segmentation. + + Args: + frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. + frame_id (int): index of the frame to be decoded. + prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] + prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] + use_gpu (bool): Whether to use gpu. + + Returns: + img_matting (numpy.ndarray): data of segmentation mask. + cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] + optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] + + """ + resize_h = 192 + resize_w = 192 + is_init = True + width = int(frame_org.shape[0]) + height = int(frame_org.shape[1]) + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + if frame_id == 1: + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + else: + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) + + return [img_matting, cur_gray, optflow_map] + + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_lite_video_result'): + """ + API for human video segmentation. + + Args: + video_path (str): The path to take the video under preprocess. If video_path is None, it will capture + the vedio from your camera. + use_gpu (bool): Whether to use gpu. + save_dir (str): The path to store output video. + + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. " + "If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + resize_h = 192 + resize_w = 192 + if not video_path: + cap_video = cv2.VideoCapture(0) + else: + cap_video = cv2.VideoCapture(video_path) + + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(video_path)) + + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + fps = cap_video.get(cv2.CAP_PROP_FPS) + + if video_path is not None: + print('Please wait. It is computing......') + if not osp.exists(save_dir): + os.makedirs(save_dir) + save_path = osp.join(save_dir, 'result' + '.avi') + cap_out = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose( + np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), + cv2.INTER_LINEAR) + img_matting = np.repeat( + img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() + else: + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose( + np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), + cv2.INTER_LINEAR) + img_matting = np.repeat( + img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap_video.release() + + def save_inference_model(self, + dirname='humanseg_lite_model', + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='humanseg_lite_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='humanseg_lite_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + self.arg_config_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = ShufflenetHumanSeg() + #shuffle.video_segment() + img = cv2.imread('photo.jpg') + # res = m.segment(images=[img], visualization=True) + # print(res[0]['data']) + # m.video_segment('') + cap_video = cv2.VideoCapture('video_test.mp4') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'result_frame.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = m.video_stream_segment( + frame_org=frame_org, + frame_id=cap_video.get(1), + prev_gray=prev_gray, + prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype( + np.uint8) + cap_out.write(comb) + else: + break + + cap_video.release() + cap_out.release() diff --git a/hub_module/modules/image/humanseg/humanseg_lite/optimal.py b/hub_module/modules/image/humanseg/humanseg_lite/optimal.py new file mode 100644 index 0000000000000000000000000000000000000000..7d01c98020b12ab7d7a54dd0f79942500ab48c0b --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_lite/optimal.py @@ -0,0 +1,109 @@ +# -*- coding:utf-8 -* +import numpy as np + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all( + np.abs(flow_fw) == 0, axis=-1) * np.all( + np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + ( + 1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[ + index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[ + index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess_v(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking( + prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + + return fusion_cfd diff --git a/hub_module/modules/image/humanseg/humanseg_lite/processor.py b/hub_module/modules/image/humanseg/humanseg_lite/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..718a6bd2b0d8e470d221b379c1763d3d69153e22 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_lite/processor.py @@ -0,0 +1,80 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, + visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = (logit * 255).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = logit + else: + result['data'] = logit + print("result['data'] shape", result['data'].shape) + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/humanseg/humanseg_mobile/README.md b/hub_module/modules/image/humanseg/humanseg_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fee6a83c4fa25b04790ae3b78005482377383396 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_mobile/README.md @@ -0,0 +1,208 @@ +## 模型概述 + +HumanSeg-mobile是基于HRNet(Deep High-Resolution Representation Learning for Visual Recognition)的人像分割网络。HRNet在特征提取过程中保持了高分辨率的信息,保持了物体的细节信息,并可通过控制每个分支的通道数调整模型的大小。HumanSeg-mobile采用了HRNet_w18_small_v1的网络结构,模型大小只有5.8M, 适用于移动端或服务端CPU的前置摄像头场景。 + +## 命令行预测 + +``` +hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_mobile_output') +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + + +```python +def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): +``` + +预测API,用于逐帧对视频人像分割。 + +**参数** + +* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* frame_id (int): 当前帧的编号; +* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; +* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + + +**返回** + +* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 +* cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; +* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 + + +```python +def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_mobile_video_result'): +``` + +预测API,用于视频人像分割。 + +**参数** + +* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + + +```python +def save_inference_model(dirname='humanseg_mobile_model', + model_filename=None, + params_filename=None, + combined=True) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +图片分割及视频分割代码示例: + +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module('humanseg_mobile') +im = cv2.imread('/PATH/TO/IMAGE') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = human_seg.segment(images=[im],visualization=True) +print(res[0]['data']) +human_seg.video_segment('/PATH/TO/VIDEO') +human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + +``` +视频流预测代码示例: + +```python +import cv2 +import numpy as np +import paddlehub as hub + +human_seg = hub.Module('humanseg_mobile') +cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') +fps = cap_video.get(cv2.CAP_PROP_FPS) +save_path = 'humanseg_mobile_video.avi' +width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) +height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) +cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) +prev_gray = None +prev_cfd = None +while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + +cap_video.release() +cap_out.release() + +``` + +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m humanseg_mobile +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_mobile" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存图片 +mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) +rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) +cv2.imwrite("segment_human_mobile.png", rgba) +``` + +### 查看代码 + + + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/humanseg/humanseg_mobile/__init__.py b/hub_module/modules/image/humanseg/humanseg_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/image/humanseg/humanseg_mobile/data_feed.py b/hub_module/modules/image/humanseg/humanseg_mobile/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..704f4aff3a1dfcbce13a64a35d5fc357abff1e1c --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_mobile/data_feed.py @@ -0,0 +1,64 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader', 'preprocess_v'] + + +def preprocess_v(img, w, h): + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + #print(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (192, 192)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/hub_module/modules/image/humanseg/humanseg_mobile/module.py b/hub_module/modules/image/humanseg/humanseg_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..874f397ee78b64ea412e98bad37cf047f332f2e4 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_mobile/module.py @@ -0,0 +1,448 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import os.path as osp +import argparse + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_mobile.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_mobile.data_feed import reader, preprocess_v +from humanseg_mobile.optimal import postprocess_v, threshold_mask + + +@moduleinfo( + name="humanseg_mobile", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="HRNet_w18_samll_v1 is a semantic segmentation model.", + version="1.1.0") +class HRNetw18samllv1humanseg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "humanseg_mobile_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, + '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, + '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, + self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_mobile_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly." + "If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([ + batch_image + ]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = output[1].as_ndarray() + output = np.expand_dims(output[:, 1, :, :], axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + """ + API for human video segmentation. + + Args: + frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. + frame_id (int): index of the frame to be decoded. + prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] + prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] + use_gpu (bool): Whether to use gpu. + + Returns: + img_matting (numpy.ndarray): data of segmentation mask. + cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] + optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] + + """ + resize_h = 192 + resize_w = 192 + is_init = True + width = int(frame_org.shape[0]) + height = int(frame_org.shape[1]) + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + if frame_id == 1: + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + else: + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) + return [img_matting, cur_gray, optflow_map] + + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_mobile_video_result'): + """ + API for human video segmentation. + + Args: + video_path (str): The path to take the video under preprocess. If video_path is None, it will capture + the vedio from your camera. + use_gpu (bool): Whether to use gpu. + save_dir (str): The path to store output video. + + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. " + "If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + resize_h = 192 + resize_w = 192 + if not video_path: + cap_video = cv2.VideoCapture(0) + else: + cap_video = cv2.VideoCapture(video_path) + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(video_path)) + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + fps = cap_video.get(cv2.CAP_PROP_FPS) + if video_path is not None: + print('Please wait. It is computing......') + if not osp.exists(save_dir): + os.makedirs(save_dir) + save_path = osp.join(save_dir, 'result' + '.avi') + cap_out = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose( + np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), + cv2.INTER_LINEAR) + img_matting = np.repeat( + img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() + else: + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose( + np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), + cv2.INTER_LINEAR) + img_matting = np.repeat( + img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap_video.release() + + def save_inference_model(self, + dirname='humanseg_mobile_model', + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='humanseg_mobile_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='humanseg_mobile_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + self.arg_config_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = HRNetw18samllv1humanseg() + img = cv2.imread('photo.jpg') + #res = m.segment(images=[img], visualization=True) + #print(res[0]['data']) + #m.video_segment('') + cap_video = cv2.VideoCapture('video_test.mp4') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'result_frame.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = m.video_stream_segment( + frame_org=frame_org, + frame_id=cap_video.get(1), + prev_gray=prev_gray, + prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype( + np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() diff --git a/hub_module/modules/image/humanseg/humanseg_mobile/optimal.py b/hub_module/modules/image/humanseg/humanseg_mobile/optimal.py new file mode 100644 index 0000000000000000000000000000000000000000..98d5e480046687b1c1f0e7545949309c853a581f --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_mobile/optimal.py @@ -0,0 +1,110 @@ +# -*- coding:utf-8 -*- + +import numpy as np + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all( + np.abs(flow_fw) == 0, axis=-1) * np.all( + np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + ( + 1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[ + index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[ + index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess_v(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking( + prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + + return fusion_cfd diff --git a/hub_module/modules/image/humanseg/humanseg_mobile/processor.py b/hub_module/modules/image/humanseg/humanseg_mobile/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..5e5d9b30df897808cd8930d18a58f0e6731ee064 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_mobile/processor.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, + org_im, + org_im_shape, + org_im_path, + output_dir, + visualization, + thresh=120): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + thresh (float): threshold. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = (logit * 255).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = logit + else: + result['data'] = logit + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/humanseg/humanseg_server/README.md b/hub_module/modules/image/humanseg/humanseg_server/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d5343beb54385254c9c47248317590681669f1d9 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_server/README.md @@ -0,0 +1,210 @@ +## 模型概述 + +高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 模型大小为158M,网络结构如图: +

+
+

+ +## 命令行预测 + +``` +hub run humanseg_server --input_path "/PATH/TO/IMAGE" +``` + + + +## API + +```python +def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_server_output'): +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + +```python +def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): +``` + +预测API,用于逐帧对视频人像分割。 + +**参数** + +* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* frame_id (int): 当前帧的编号; +* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; +* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + + +**返回** + +* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明); +* cur_gray (numpy.ndarray): 当前帧输入分割网络图像的灰度图; +* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图。 + + +```python +def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_server_video'): +``` + +预测API,用于视频人像分割。 + +**参数** + +* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果; +* use\_gpu (bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + + +```python +def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True): +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +图片分割及视频分割代码示例: +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module('humanseg_server') +im = cv2.imread('/PATH/TO/IMAGE') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = human_seg.segment(images=[im],visualization=True) +print(res[0]['data']) +human_seg.video_segment('/PATH/TO/VIDEO') +human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + +``` +视频流预测代码示例: +```python +import cv2 +import numpy as np +import paddlehub as hub + +human_seg = hub.Module('humanseg_server') +cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') +fps = cap_video.get(cv2.CAP_PROP_FPS) +save_path = 'humanseg_server_video.avi' +width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) +height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) +cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) +prev_gray = None +prev_cfd = None +while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + +cap_video.release() +cap_out.release() + +``` + +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m humanseg_server +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_server" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存图片 +mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) +rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) +cv2.imwrite("segment_human_server.png", rgba) +``` + + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/HumanSeg + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/humanseg/humanseg_server/__init__.py b/hub_module/modules/image/humanseg/humanseg_server/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/image/humanseg/humanseg_server/data_feed.py b/hub_module/modules/image/humanseg/humanseg_server/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..a35d66ab7f36d0ec1f3070bfea7ea05b835219f7 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_server/data_feed.py @@ -0,0 +1,64 @@ +# coding=utf-8 +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader', 'preprocess_v'] + + +def preprocess_v(img, w, h): + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (513, 513)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/hub_module/modules/image/humanseg/humanseg_server/module.py b/hub_module/modules/image/humanseg/humanseg_server/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c6733bfc39a5f2ce9b515a8f07f197158383942a --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_server/module.py @@ -0,0 +1,432 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import os.path as osp +import argparse + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_server.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_server.data_feed import reader, preprocess_v +from humanseg_server.optimal import postprocess_v, threshold_mask + + +@moduleinfo( + name="humanseg_server", + type="CV/semantic_segmentation", + author="baidu-vis", + author_email="", + summary="DeepLabv3+ is a semantic segmentation model.", + version="1.1.0") +class DeeplabV3pXception65HumanSeg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "humanseg_server_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, + '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, + '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, + self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_server_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([ + batch_image + ]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = output[1].as_ndarray() + output = np.expand_dims(output[:, 1, :, :], axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + """ + API for human video segmentation. + + Args: + frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. + frame_id (int): index of the frame to be decoded. + prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] + prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] + use_gpu (bool): Whether to use gpu. + + Returns: + img_matting (numpy.ndarray): data of segmentation mask. + cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] + optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] + + """ + resize_h = 512 + resize_w = 512 + is_init = True + width = int(frame_org.shape[0]) + height = int(frame_org.shape[1]) + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + if frame_id == 1: + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + else: + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) + return [img_matting, cur_gray, optflow_map] + + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_server_video'): + resize_h = 512 + resize_w = 512 + if not video_path: + cap_video = cv2.VideoCapture(0) + else: + cap_video = cv2.VideoCapture(video_path) + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(video_path)) + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + fps = cap_video.get(cv2.CAP_PROP_FPS) + if video_path is not None: + print('Please wait. It is computing......') + if not osp.exists(save_dir): + os.makedirs(save_dir) + save_path = osp.join(save_dir, 'result' + '.avi') + cap_out = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose( + np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), + cv2.INTER_LINEAR) + img_matting = np.repeat( + img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() + else: + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + frame = preprocess_v(frame_org, resize_w, resize_h) + image = PaddleTensor(np.array([frame.copy()])) + output = self.gpu_predictor.run( + [image]) if use_gpu else self.cpu_predictor.run([image]) + score_map = output[1].as_ndarray() + frame = np.transpose(frame, axes=[1, 2, 0]) + score_map = np.transpose( + np.squeeze(score_map, 0), axes=[1, 2, 0]) + cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess_v(cur_gray, score_map, prev_gray, + prev_cfd, disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = cv2.resize(optflow_map, (width, height), + cv2.INTER_LINEAR) + img_matting = np.repeat( + img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap_video.release() + + def save_inference_model(self, + dirname='humanseg_server_model', + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='humanseg_server_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='humanseg_server_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + self.arg_config_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = DeeplabV3pXception65HumanSeg() + # img = cv2.imread('photo.jpg') + # res = m.segment(images=[img]) + # print(res[0]['data']) + # m.save_inference_model() + #m.video_segment(video_path='video_test.mp4') + img = cv2.imread('photo.jpg') + # res = m.segment(images=[img], visualization=True) + # print(res[0]['data']) + # m.video_segment('') + cap_video = cv2.VideoCapture('video_test.mp4') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'result_frame.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = m.video_stream_segment( + frame_org=frame_org, + frame_id=cap_video.get(1), + prev_gray=prev_gray, + prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype( + np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() diff --git a/hub_module/modules/image/humanseg/humanseg_server/optimal.py b/hub_module/modules/image/humanseg/humanseg_server/optimal.py new file mode 100644 index 0000000000000000000000000000000000000000..9f70e7981ea493692dcb8d9b78e459a877f057f2 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_server/optimal.py @@ -0,0 +1,109 @@ +# -*- coding:utf-8 -*- +import numpy as np + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all( + np.abs(flow_fw) == 0, axis=-1) * np.all( + np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + ( + 1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[ + index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[ + index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess_v(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking( + prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + + return fusion_cfd diff --git a/hub_module/modules/image/humanseg/humanseg_server/processor.py b/hub_module/modules/image/humanseg/humanseg_server/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..cc6924d77738ebc8ac74b46f8820979c87cbb179 --- /dev/null +++ b/hub_module/modules/image/humanseg/humanseg_server/processor.py @@ -0,0 +1,78 @@ +# -*- coding:utf-8 -*- +import os +import time + +import base64 +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, + visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = (logit * 255).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = rgba[:, :, 3] + else: + result['data'] = rgba[:, :, 3] + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_lite/README.md b/hub_module/modules/image/semantic_segmentation/humanseg_lite/README.md new file mode 100644 index 0000000000000000000000000000000000000000..727a918ea63525225fc8ce7d8b78ea26a4fda8c9 --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_lite/README.md @@ -0,0 +1,122 @@ +## 模型概述 + +HUmanSeg_lite是在ShuffleNetV2网络结构的基础上进行优化,进一步减小了网络规模,网络大小只有541K,量化后只有187K,适用于手机自拍人像分割,且能在移动端进行实时分割。 + + +## 命令行预测 + +``` +hub run humanseg_lite --input_path "/PATH/TO/IMAGE" + +``` + + +## API + +```python +def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=True, + output_dir='humanseg_output') +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + +```python +def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module('humanseg_lite') +im = cv2.imread('/PATH/TO/IMAGE') +res = human_seg.segment(images=[im],visualization=True) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m humanseg_lite +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread('/PATH/TO/IMAGE'))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_lite" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(base64_to_cv2(r.json()["results"][0]['data'])) +``` + + + + +### 依赖 + +paddlepaddle >= 1.8.1 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_lite/__init__.py b/hub_module/modules/image/semantic_segmentation/humanseg_lite/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_lite/data_feed.py b/hub_module/modules/image/semantic_segmentation/humanseg_lite/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..6034a41dceed76ef666cd7b9c3456a6a05074153 --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_lite/data_feed.py @@ -0,0 +1,55 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + #print(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (192, 192)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_lite/module.py b/hub_module/modules/image/semantic_segmentation/humanseg_lite/module.py new file mode 100644 index 0000000000000000000000000000000000000000..810af5a938baef10509655cbe83c703c23e5047f --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_lite/module.py @@ -0,0 +1,257 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_lite.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_lite.data_feed import reader + + +@moduleinfo( + name="humanseg_lite", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="humanseg_lite is a semantic segmentation model.", + version="1.0.0") +class ShufflenetHumanSeg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "humanseg_lite_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, + '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, + '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, + self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + visualization=True, + output_dir='humanseg_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + data (dict): key is 'image', the corresponding value is the path to image. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + if data and 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([ + batch_image + ]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname, + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='humanseg_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='humanseg_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=True, + help="whether to save output as images.") + self.arg_config_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = ShufflenetHumanSeg() + import cv2 + img = cv2.imread('./meditation.jpg') + res = m.segment(images=[img]) diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_lite/processor.py b/hub_module/modules/image/semantic_segmentation/humanseg_lite/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..b3714d04ff45e22edbacfe5100d2a13adcb7a73a --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_lite/processor.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, + org_im, + org_im_shape, + org_im_path, + output_dir, + visualization, + thresh=120): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + thresh (float): threshold. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = np.squeeze(logit * 255, axis=2).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = logit + else: + result['data'] = logit + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_mobile/README.md b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d39e1d2a540776b1f279e24a47860d99a8c5236a --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/README.md @@ -0,0 +1,120 @@ +## 模型概述 + +HumanSeg-mobile是基于HRNet(Deep High-Resolution Representation Learning for Visual Recognition)的人像分割网络。HRNet在特征提取过程中保持了高分辨率的信息,保持了物体的细节信息,并可通过控制每个分支的通道数调整模型的大小。HumanSeg-mobile采用了HRNet_w18_small_v1的网络结构,模型大小只有5.8M, 适用于移动端或服务端CPU的前置摄像头场景。 + +## 命令行预测 + +``` +hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=True, + output_dir='humanseg_output') +``` + +预测API,用于人像分割。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* batch\_size (int): batch 的大小; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + +```python +def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +human_seg = hub.Module('humanseg_mobile') +im = cv2.imread('/PATH/TO/IMAGE') +res = human_seg.segment(images=[im],visualization=True) +``` + +## 服务部署 + +PaddleHub Serving可以部署一个人像分割的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m humanseg_mobile +``` + +这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +data = {'images':[cv2_to_base64(cv2.imread('/PATH/TO/IMAGE'))]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/humanseg_mobile" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 打印预测结果 +print(base64_to_cv2(r.json()["results"][0]['data'])) +``` + + + +### 依赖 + +paddlepaddle >= 1.8.1 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_mobile/__init__.py b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_mobile/data_feed.py b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..34b8b1dab50e7aa0ae733d4cfab1ac5f1f8e876b --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/data_feed.py @@ -0,0 +1,54 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + #print(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.resize(img, (192, 192)).astype(np.float32) + img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) + img = img.transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + element['image'] = img + yield element diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_mobile/module.py b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8af7b153b02c690f5bfc87059964b92db0dc518b --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/module.py @@ -0,0 +1,258 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from humanseg_mobile.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from humanseg_mobile.data_feed import reader + + +@moduleinfo( + name="humanseg_mobile", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="HRNet_w18_samll_v1 is a semantic segmentation model.", + version="1.0.0") +class HRNetw18samllv1humanseg(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "humanseg_mobile_inference") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = os.path.join(self.default_pretrained_model_path, + '__model__') + self.params_file_path = os.path.join(self.default_pretrained_model_path, + '__params__') + cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path, + self.params_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def segment(self, + images=None, + paths=None, + data=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_output'): + """ + API for human segmentation. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + data (dict): key is 'image', the corresponding value is the path to image. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + # compatibility with older versions + + if data and 'image' in data: + if paths is None: + paths = list() + paths += data['image'] + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + batch_image = PaddleTensor(batch_image.copy()) + output = self.gpu_predictor.run([ + batch_image + ]) if use_gpu else self.cpu_predictor.run([batch_image]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + # postprocess one by one + for i in range(len(batch_data)): + out = postprocess( + data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname, + model_filename=None, + params_filename=None, + combined=True): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.segment(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.segment( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='humanseg_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='humanseg_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + self.arg_config_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + m = HRNetw18samllv1humanseg() + import cv2 + img = cv2.imread('./meditation.jpg') + res = m.segment(images=[img], visualization=True) + print(res[0]['data']) diff --git a/hub_module/modules/image/semantic_segmentation/humanseg_mobile/processor.py b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..b3714d04ff45e22edbacfe5100d2a13adcb7a73a --- /dev/null +++ b/hub_module/modules/image/semantic_segmentation/humanseg_mobile/processor.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, + org_im, + org_im_shape, + org_im_path, + output_dir, + visualization, + thresh=120): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + thresh (float): threshold. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for logit in data_out: + logit = np.squeeze(logit * 255, axis=2).astype(np.uint8) + logit = cv2.resize(logit, (org_im_shape[1], org_im_shape[0])) + rgba = np.concatenate((org_im, np.expand_dims(logit, axis=2)), axis=2) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, rgba) + result['save_path'] = save_im_path + result['data'] = logit + else: + result['data'] = logit + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/super_resolution/dcscn/README.md b/hub_module/modules/image/super_resolution/dcscn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e9099f288b7eb846cbf88616efc915b31ff58fc8 --- /dev/null +++ b/hub_module/modules/image/super_resolution/dcscn/README.md @@ -0,0 +1,134 @@ +## 模型概述 + +DCSCN是基于Fast and Accurate Image Super Resolution by Deep CNN with Skip Connection and Network in Network设计的轻量化超分辨模型。该模型使用残差结构和跳连的方式构建网络来提取局部和全局特征,同时使用并行1*1的卷积网络学习细节特征提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run dcscn --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='dcscn_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module('dcscn') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m dcscn +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 + +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/dcscn" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +sr = np.expand_dims(cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY), axis=2) +shape =sr.shape +org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) +uv = cv2.resize(org_im[...,1:], (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC) +combine_im = cv2.cvtColor(np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) +cv2.imwrite('dcscn_X2.png', combine_im) +print("save image as dcscn_X2.png") + +``` +### 查看代码 + +https://github.com/jiny2001/dcscn-super-resolution + + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/super_resolution/dcscn/data_feed.py b/hub_module/modules/image/super_resolution/dcscn/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..194b320c53979e151792edc5f4426a1176ec4938 --- /dev/null +++ b/hub_module/modules/image/super_resolution/dcscn/data_feed.py @@ -0,0 +1,67 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_x = np.expand_dims(img[:, :, 0], axis=2) + img_x2 = np.expand_dims( + cv2.resize( + img_x, (shape[1] * 2, shape[0] * 2), + interpolation=cv2.INTER_CUBIC), + axis=2) + img_x = img_x.transpose((2, 0, 1)) / 255 + img_x2 = img_x2.transpose(2, 0, 1) / 255 + img_x = img_x.astype(np.float32) + img_x2 = img_x2.astype(np.float32) + element['img_x'] = img_x + element['img_x2'] = img_x2 + yield element + + +if __name__ == "__main__": + path = ['photo.jpg'] + reader(paths=path) diff --git a/hub_module/modules/image/super_resolution/dcscn/module.py b/hub_module/modules/image/super_resolution/dcscn/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2d44d7eda3f1238dd9689fb22856b528a65e8c6b --- /dev/null +++ b/hub_module/modules/image/super_resolution/dcscn/module.py @@ -0,0 +1,231 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from dcscn.data_feed import reader +from dcscn.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="dcscn", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="dcscn is a super resolution model.", + version="1.0.0") +class Dcscn(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "dcscn_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_x = np.array([all_data[i]['img_x']]) + image_x2 = np.array([all_data[i]['img_x2']]) + dropout = np.array([0]) + image_x = PaddleTensor(image_x.copy()) + image_x2 = PaddleTensor(image_x2.copy()) + drop_out = PaddleTensor(dropout.copy()) + output = self.gpu_predictor.run([ + image_x, image_x2 + ]) if use_gpu else self.cpu_predictor.run([image_x, image_x2]) + + output = np.expand_dims(output[0].as_ndarray(), axis=1) + + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='dcscn_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='dcscn_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='dcscn_save_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=True, + help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Dcscn() + #module.reconstruct(paths=["BSD100_001.png","BSD100_002.png"]) + import cv2 + img = cv2.imread("BSD100_001.png").astype('float32') + res = module.reconstruct(images=[img]) + module.save_inference_model() diff --git a/hub_module/modules/image/super_resolution/dcscn/processor.py b/hub_module/modules/image/super_resolution/dcscn/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..1c92beaacff794ecb1ee2af235aa194a86ed4556 --- /dev/null +++ b/hub_module/modules/image/super_resolution/dcscn/processor.py @@ -0,0 +1,87 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, + visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + shape = sr.shape + if visualization: + org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) + uv = cv2.resize( + org_im[..., 1:], (shape[1], shape[0]), + interpolation=cv2.INTER_CUBIC) + combine_im = cv2.cvtColor( + np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, combine_im) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/super_resolution/falsr_a/README.md b/hub_module/modules/image/super_resolution/falsr_a/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9cd4bf2f7b9f11731bb6e049ec62341a65fba728 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_a/README.md @@ -0,0 +1,126 @@ +## 模型概述 + +falsr_a是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run falsr_a --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_a_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='falsr_a_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module('falsr_a') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m falsr_a +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/falsr_a" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +sr = base64_to_cv2(r.json()["results"][0]['data']) +cv2.imwrite('falsr_a_X2.png', sr) +print("save image as falsr_a_X2.png") +``` +### 查看代码 + +https://github.com/xiaomi-automl/FALSR + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/super_resolution/falsr_a/data_feed.py b/hub_module/modules/image/super_resolution/falsr_a/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1d4bdd158e81ce5a740652e5e7097ff2267216a5 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_a/data_feed.py @@ -0,0 +1,63 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_scale = cv2.resize( + img, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC) + img_y = np.expand_dims(img[:, :, 0], axis=2) + img_scale_pbpr = img_scale[..., 1:] + img_y = img_y.transpose((2, 0, 1)) / 255 + img_scale_pbpr = img_scale_pbpr.transpose(2, 0, 1) / 255 + element['img_y'] = img_y + element['img_scale_pbpr'] = img_scale_pbpr + yield element + + +if __name__ == "__main__": + path = ['BSD100_001.png'] + reader(paths=path) diff --git a/hub_module/modules/image/super_resolution/falsr_a/module.py b/hub_module/modules/image/super_resolution/falsr_a/module.py new file mode 100644 index 0000000000000000000000000000000000000000..40a2aab340d626c40a93559856c65d916650380f --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_a/module.py @@ -0,0 +1,226 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from falsr_a.data_feed import reader +from falsr_a.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="falsr_a", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="falsr_a is a super resolution model.", + version="1.0.0") +class Falsr_A(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "falsr_a_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_a_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_y = np.array([all_data[i]['img_y']]) + image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) + image_y = PaddleTensor(image_y.copy()) + image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) + output = self.gpu_predictor.run([ + image_y, image_scale_pbpr + ]) if use_gpu else self.cpu_predictor.run( + [image_y, image_scale_pbpr]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='falsr_a_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='falsr_a_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='falsr_a_save_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=True, + help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Falsr_A() + module.reconstruct( + paths=["BSD100_001.png", "BSD100_002.png", "Set5_003.png"]) + module.save_inference_model() diff --git a/hub_module/modules/image/super_resolution/falsr_a/processor.py b/hub_module/modules/image/super_resolution/falsr_a/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..875208608403963f2e72e778d3c581f45fb57a93 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_a/processor.py @@ -0,0 +1,82 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, + visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + sr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, sr) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/super_resolution/falsr_b/README.md b/hub_module/modules/image/super_resolution/falsr_b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c66d25e0dd5cab44b32115edb55d61441026d625 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_b/README.md @@ -0,0 +1,126 @@ +## 模型概述 + +falsr_b是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。falsr_b较falsr_a更轻量化。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run falsr_b --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=True, + output_dir="falsr_b_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='falsr_b_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module('falsr_b') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m falsr_b +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/falsr_b" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +sr = base64_to_cv2(r.json()["results"][0]['data']) +cv2.imwrite('falsr_b_X2.png', sr) +print("save image as falsr_b_X2.png") +``` + +### 查看代码 + +https://github.com/xiaomi-automl/FALSR + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/super_resolution/falsr_b/data_feed.py b/hub_module/modules/image/super_resolution/falsr_b/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1d4bdd158e81ce5a740652e5e7097ff2267216a5 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_b/data_feed.py @@ -0,0 +1,63 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_scale = cv2.resize( + img, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC) + img_y = np.expand_dims(img[:, :, 0], axis=2) + img_scale_pbpr = img_scale[..., 1:] + img_y = img_y.transpose((2, 0, 1)) / 255 + img_scale_pbpr = img_scale_pbpr.transpose(2, 0, 1) / 255 + element['img_y'] = img_y + element['img_scale_pbpr'] = img_scale_pbpr + yield element + + +if __name__ == "__main__": + path = ['BSD100_001.png'] + reader(paths=path) diff --git a/hub_module/modules/image/super_resolution/falsr_b/module.py b/hub_module/modules/image/super_resolution/falsr_b/module.py new file mode 100644 index 0000000000000000000000000000000000000000..77aca9f37ec0f51f480d035f2ef9de13c8cf0c17 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_b/module.py @@ -0,0 +1,226 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from falsr_b.data_feed import reader +from falsr_b.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="falsr_b", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="falsr_b is a super resolution model.", + version="1.0.0") +class Falsr_B(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "falsr_b_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_b_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_y = np.array([all_data[i]['img_y']]) + image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) + image_y = PaddleTensor(image_y.copy()) + image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) + output = self.gpu_predictor.run([ + image_y, image_scale_pbpr + ]) if use_gpu else self.cpu_predictor.run( + [image_y, image_scale_pbpr]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='falsr_b_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='falsr_b_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='falsr_b_save_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=True, + help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Falsr_B() + module.reconstruct( + paths=["BSD100_001.png", "BSD100_002.png", "Set5_003.png"]) + module.save_inference_model() diff --git a/hub_module/modules/image/super_resolution/falsr_b/processor.py b/hub_module/modules/image/super_resolution/falsr_b/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..875208608403963f2e72e778d3c581f45fb57a93 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_b/processor.py @@ -0,0 +1,82 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, + visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + sr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, sr) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/modules/image/super_resolution/falsr_c/README.md b/hub_module/modules/image/super_resolution/falsr_c/README.md new file mode 100644 index 0000000000000000000000000000000000000000..41a5a3dd3b19231acd77364c55b720f7eebb1d7e --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_c/README.md @@ -0,0 +1,127 @@ +## 模型概述 + +falsr_c是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 + +## 命令行预测 + +``` +$ hub run falsr_c --input_path "/PATH/TO/IMAGE" + +``` + +## API + +```python +def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output") +``` + +预测API,用于图像超分辨率。 + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; +* paths (list\[str\]): 图片的路径; +* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; +* visualization (bool): 是否将识别结果保存为图片文件; +* output\_dir (str): 图片的保存路径。 + +**返回** + +* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 + +```python +def save_inference_model(self, + dirname='falsr_c_save_model', + model_filename=None, + params_filename=None, + combined=False) +``` + +将模型保存到指定路径。 + +**参数** + +* dirname: 存在模型的目录名称 +* model\_filename: 模型文件名称,默认为\_\_model\_\_ +* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) +* combined: 是否将参数保存到统一的一个文件中 + +## 代码示例 + +```python +import cv2 +import paddlehub as hub + +sr_model = hub.Module('falsr_c') +im = cv2.imread('/PATH/TO/IMAGE').astype('float32') +#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 +res = sr_model.reconstruct(images=[im], visualization=True) +print(res[0]['data']) +sr_model.save_inference_model() +``` + +## 服务部署 + +PaddleHub Serving可以部署一个图像超分的在线服务。 + +## 第一步:启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m falsr_c +``` + +这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +## 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import base64 + +import cv2 +import numpy as np + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/falsr_c" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +sr = base64_to_cv2(r.json()["results"][0]['data']) +cv2.imwrite('falsr_c_X2.png', sr) +print("save image as falsr_c_X2.png") +``` + +### 查看代码 + +https://github.com/xiaomi-automl/FALSR + + +### 依赖 + +paddlepaddle >= 1.8.0 + +paddlehub >= 1.7.1 diff --git a/hub_module/modules/image/super_resolution/falsr_c/data_feed.py b/hub_module/modules/image/super_resolution/falsr_c/data_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..1d4bdd158e81ce5a740652e5e7097ff2267216a5 --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_c/data_feed.py @@ -0,0 +1,63 @@ +# -*- coding:utf-8 -*- +import os +import time +from collections import OrderedDict + +import cv2 +import numpy as np +from PIL import Image + +__all__ = ['reader'] + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C] + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path).astype('float32') + each['org_im'] = im + each['org_im_path'] = im_path + each['org_im_shape'] = im.shape + component.append(each) + if images is not None: + assert type(images) is list, "images should be a list." + for im in images: + im = im.astype(np.float32) + each = OrderedDict() + each['org_im'] = im + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) + each['org_im_shape'] = im.shape + component.append(each) + + for element in component: + img = element['org_im'].copy() + img = cv2.cvtColor(img, cv2.COLOR_BGR2YUV) + shape = img.shape + img_scale = cv2.resize( + img, (shape[1] * 2, shape[0] * 2), interpolation=cv2.INTER_CUBIC) + img_y = np.expand_dims(img[:, :, 0], axis=2) + img_scale_pbpr = img_scale[..., 1:] + img_y = img_y.transpose((2, 0, 1)) / 255 + img_scale_pbpr = img_scale_pbpr.transpose(2, 0, 1) / 255 + element['img_y'] = img_y + element['img_scale_pbpr'] = img_scale_pbpr + yield element + + +if __name__ == "__main__": + path = ['BSD100_001.png'] + reader(paths=path) diff --git a/hub_module/modules/image/super_resolution/falsr_c/module.py b/hub_module/modules/image/super_resolution/falsr_c/module.py new file mode 100644 index 0000000000000000000000000000000000000000..69a1be406ad7b283795ecbbc6a77596d7f171e6c --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_c/module.py @@ -0,0 +1,228 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import os +import argparse + +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub +from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddlehub.module.module import moduleinfo, runnable, serving + +from falsr_c.data_feed import reader +from falsr_c.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir + + +@moduleinfo( + name="falsr_c", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="falsr_c is a super resolution model.", + version="1.0.0") +class Falsr_C(hub.Module): + def _initialize(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "falsr_c_model") + self._set_config() + + def _set_config(self): + """ + predictor config setting + """ + self.model_file_path = self.default_pretrained_model_path + cpu_config = AnalysisConfig(self.model_file_path) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_paddle_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = AnalysisConfig(self.model_file_path) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_paddle_predictor(gpu_config) + + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output"): + """ + API for super resolution. + + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C], the color space is BGR. + paths (list[str]): The paths of images. + use_gpu (bool): Whether to use gpu. + visualization (bool): Whether to save image or not. + output_dir (str): The path to store output images. + + Returns: + res (list[dict]): each element in the list is a dict, the keys and values are: + save_path (str, optional): the path to save images. (Exists only if visualization is True) + data (numpy.ndarray): data of post processed image. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + res = list() + + for i in range(total_num): + image_y = np.array([all_data[i]['img_y']]) + image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) + image_y = PaddleTensor(image_y.copy()) + image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) + output = self.gpu_predictor.run([ + image_y, image_scale_pbpr + ]) if use_gpu else self.cpu_predictor.run( + [image_y, image_scale_pbpr]) + output = np.expand_dims(output[0].as_ndarray(), axis=1) + out = postprocess( + data_out=output, + org_im=all_data[i]['org_im'], + org_im_shape=all_data[i]['org_im_shape'], + org_im_path=all_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) + res.append(out) + return res + + def save_inference_model(self, + dirname='falsr_c_save_model', + model_filename=None, + params_filename=None, + combined=False): + if combined: + model_filename = "__model__" if not model_filename else model_filename + params_filename = "__params__" if not params_filename else params_filename + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + program, feeded_var_names, target_vars = fluid.io.load_inference_model( + dirname=self.default_pretrained_model_path, executor=exe) + + fluid.io.save_inference_model( + dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.reconstruct(images=images_decode, **kwargs) + results = [{ + 'data': cv2_to_base64(result['data']) + } for result in results] + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.reconstruct( + paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + if args.save_dir is not None: + check_dir(args.save_dir) + self.save_inference_model(args.save_dir) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument( + '--output_dir', + type=str, + default='falsr_c_output', + help="The directory to save output images.") + self.arg_config_group.add_argument( + '--save_dir', + type=str, + default='falsr_c_save_model', + help="The directory to save model.") + self.arg_config_group.add_argument( + '--visualization', + type=ast.literal_eval, + default=True, + help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") + + +if __name__ == "__main__": + module = Falsr_C() + #module.reconstruct(paths=["BSD100_001.png","BSD100_002.png", "Set5_003.png"]) + import cv2 + img = cv2.imread("BSD100_001.png").astype('float32') + res = module.reconstruct(images=[img]) + module.save_inference_model() diff --git a/hub_module/modules/image/super_resolution/falsr_c/processor.py b/hub_module/modules/image/super_resolution/falsr_c/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..d447efaad7f4017c6fb433572ed346652f77a19c --- /dev/null +++ b/hub_module/modules/image/super_resolution/falsr_c/processor.py @@ -0,0 +1,83 @@ +# -*- coding:utf-8 -*- +import os +import time +import base64 + +import cv2 +import numpy as np + +__all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, + visualization): + """ + Postprocess output of network. one image at a time. + + Args: + data_out (numpy.ndarray): output of network. + org_im (numpy.ndarray): original image. + org_im_shape (list): shape pf original image. + org_im_path (list): path of riginal image. + output_dir (str): output directory to store image. + visualization (bool): whether to save image or not. + + Returns: + result (dict): The data of processed image. + """ + result = dict() + for sr in data_out: + sr = np.squeeze(sr, 0) + sr = np.clip(sr * 255, 0, 255) + sr = sr.astype(np.uint8) + sr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR) + + if visualization: + check_dir(output_dir) + save_im_path = get_save_image_name(org_im, org_im_path, output_dir) + cv2.imwrite(save_im_path, sr) + print("save image at: ", save_im_path) + result['save_path'] = save_im_path + result['data'] = sr + else: + result['data'] = sr + print("result['data'] shape", result['data'].shape) + + return result + + +def check_dir(dir_path): + if not os.path.exists(dir_path): + os.makedirs(dir_path) + elif os.path.isfile(dir_path): + os.remove(dir_path) + os.makedirs(dir_path) + + +def get_save_image_name(org_im, org_im_path, output_dir): + """ + Get save image name from source image path. + """ + # name prefix of orginal image + org_im_name = os.path.split(org_im_path)[-1] + im_prefix = os.path.splitext(org_im_name)[0] + ext = '.png' + # save image path + save_im_path = os.path.join(output_dir, im_prefix + ext) + if os.path.exists(save_im_path): + save_im_path = os.path.join( + output_dir, im_prefix + 'time={}'.format(int(time.time())) + ext) + + return save_im_path diff --git a/hub_module/scripts/configs/dcscn.yml b/hub_module/scripts/configs/dcscn.yml new file mode 100644 index 0000000000000000000000000000000000000000..46bfe93eeb479ed389de8156fc9b4d307ef8688e --- /dev/null +++ b/hub_module/scripts/configs/dcscn.yml @@ -0,0 +1,9 @@ +name: dcscn +dir: "modules/image/super_resolution/dcscn" +exclude: + - README.md +resources: + - + url: https://bj.bcebos.com/paddlehub/model/image/image_editing/dcscn_model.tar.gz + dest: . + uncompress: True diff --git a/hub_module/scripts/configs/falsr_a.yml b/hub_module/scripts/configs/falsr_a.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/scripts/configs/falsr_b.yml b/hub_module/scripts/configs/falsr_b.yml new file mode 100644 index 0000000000000000000000000000000000000000..b2f921f4507051ddf78e427d6005ceb94f665192 --- /dev/null +++ b/hub_module/scripts/configs/falsr_b.yml @@ -0,0 +1,9 @@ +name: falsr_b +dir: "modules/image/super_resolution/falsr_b" +exclude: + - README.md +resources: + - + url: https://bj.bcebos.com/paddlehub/model/image/image_editing/falsr_B_model.tar.gz + dest: . + uncompress: True diff --git a/hub_module/scripts/configs/falsr_c.yml b/hub_module/scripts/configs/falsr_c.yml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/scripts/configs/humanseg_lite.yml b/hub_module/scripts/configs/humanseg_lite.yml new file mode 100644 index 0000000000000000000000000000000000000000..ccb5e744f1a78f41a61e27d4d325fb24831c3ed8 --- /dev/null +++ b/hub_module/scripts/configs/humanseg_lite.yml @@ -0,0 +1,9 @@ +name: humanseg_lite +dir: "modules/image/semantic_segmentation/humanseg_lite" +exclude: + - README.md +resources: + - + url: https://bj.bcebos.com/paddlehub/model/image/semantic_segmentation/humanseg_lite_inference.tar.gz + dest: . + uncompress: True diff --git a/hub_module/scripts/configs/humanseg_mobile.yml b/hub_module/scripts/configs/humanseg_mobile.yml new file mode 100644 index 0000000000000000000000000000000000000000..f07130a00428d20607173e42739a4dbac4d8f71c --- /dev/null +++ b/hub_module/scripts/configs/humanseg_mobile.yml @@ -0,0 +1,9 @@ +name: humanseg_mobile +dir: "modules/image/semantic_segmentation/humanseg_mobile" +exclude: + - README.md +resources: + - + url: https://bj.bcebos.com/paddlehub/model/image/semantic_segmentation/humanseg_mobile_inference.tar.gz + dest: . + uncompress: True diff --git a/hub_module/tests/image_dataset/human_segmentation/image/ache-adult-depression-expression-41253.jpg b/hub_module/tests/image_dataset/human_segmentation/image/ache-adult-depression-expression-41253.jpg new file mode 100755 index 0000000000000000000000000000000000000000..503931e9f5577f36747dc1abbc51e6779da44902 Binary files /dev/null and b/hub_module/tests/image_dataset/human_segmentation/image/ache-adult-depression-expression-41253.jpg differ diff --git a/hub_module/tests/image_dataset/human_segmentation/image/allergy-cold-disease-flu-41284.jpg b/hub_module/tests/image_dataset/human_segmentation/image/allergy-cold-disease-flu-41284.jpg new file mode 100755 index 0000000000000000000000000000000000000000..bf91253e6ea5bccb3c34aa618d9860a4821c1d2c Binary files /dev/null and b/hub_module/tests/image_dataset/human_segmentation/image/allergy-cold-disease-flu-41284.jpg differ diff --git a/hub_module/tests/image_dataset/human_segmentation/image/bored-female-girl-people-41321.jpg b/hub_module/tests/image_dataset/human_segmentation/image/bored-female-girl-people-41321.jpg new file mode 100755 index 0000000000000000000000000000000000000000..553dbee5491b786fcc5defab2b6ff04e6fcb7cee Binary files /dev/null and b/hub_module/tests/image_dataset/human_segmentation/image/bored-female-girl-people-41321.jpg differ diff --git a/hub_module/tests/image_dataset/human_segmentation/image/colors-hairdresser-cutting-colorimetry-159780.jpg b/hub_module/tests/image_dataset/human_segmentation/image/colors-hairdresser-cutting-colorimetry-159780.jpg new file mode 100755 index 0000000000000000000000000000000000000000..55bd588aefcffaff1ffcb8f10b300ceb5017e953 Binary files /dev/null and b/hub_module/tests/image_dataset/human_segmentation/image/colors-hairdresser-cutting-colorimetry-159780.jpg differ diff --git a/hub_module/tests/image_dataset/human_segmentation/image/pexels-photo-206339.jpg b/hub_module/tests/image_dataset/human_segmentation/image/pexels-photo-206339.jpg new file mode 100755 index 0000000000000000000000000000000000000000..501bd472f16909fee13fbd3a79882e653f39cf8f Binary files /dev/null and b/hub_module/tests/image_dataset/human_segmentation/image/pexels-photo-206339.jpg differ diff --git a/hub_module/tests/image_dataset/human_segmentation/path.txt b/hub_module/tests/image_dataset/human_segmentation/path.txt new file mode 100644 index 0000000000000000000000000000000000000000..0203cac11956fd9aebcfee0a9ad05674704f5f0c --- /dev/null +++ b/hub_module/tests/image_dataset/human_segmentation/path.txt @@ -0,0 +1,5 @@ +../image_dataset/human_segmentation/image/ache-adult-depression-expression-41253.jpg +../image_dataset/human_segmentation/image/allergy-cold-disease-flu-41284.jpg +../image_dataset/human_segmentation/image/bored-female-girl-people-41321.jpg +../image_dataset/human_segmentation/image/colors-hairdresser-cutting-colorimetry-159780.jpg +../image_dataset/human_segmentation/image/pexels-photo-206339.jpg diff --git a/hub_module/tests/image_dataset/super_resolution/BSD100_001.png b/hub_module/tests/image_dataset/super_resolution/BSD100_001.png new file mode 100644 index 0000000000000000000000000000000000000000..1033205eadd43d70efd9343553afa611006c0d71 Binary files /dev/null and b/hub_module/tests/image_dataset/super_resolution/BSD100_001.png differ diff --git a/hub_module/tests/image_dataset/super_resolution/BSD100_002.png b/hub_module/tests/image_dataset/super_resolution/BSD100_002.png new file mode 100644 index 0000000000000000000000000000000000000000..ac557bec5a452c83f394fea80a22d0349d903fda Binary files /dev/null and b/hub_module/tests/image_dataset/super_resolution/BSD100_002.png differ diff --git a/hub_module/tests/image_dataset/super_resolution/BSD100_003.png b/hub_module/tests/image_dataset/super_resolution/BSD100_003.png new file mode 100644 index 0000000000000000000000000000000000000000..5d5946b272589f5f179eaf72ab0521042c940e3b Binary files /dev/null and b/hub_module/tests/image_dataset/super_resolution/BSD100_003.png differ diff --git a/hub_module/tests/unittests/test_dcscn.py b/hub_module/tests/unittests/test_dcscn.py new file mode 100644 index 0000000000000000000000000000000000000000..44e61c2270ecc93414a41cead7a918516fc3d1dc --- /dev/null +++ b/hub_module/tests/unittests/test_dcscn.py @@ -0,0 +1,71 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import unittest + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +imgpath = [ + '../image_dataset/super_resolution/BSD100_001.png', + '../image_dataset/super_resolution/BSD100_002.png', + '../image_dataset/super_resolution/BSD100_003.png', +] + + +class TestHumanSeg(unittest.TestCase): + @classmethod + def setUpClass(self): + """Prepare the environment once before execution of all tests.\n""" + self.sr_model = hub.Module(name="dcscn") + + @classmethod + def tearDownClass(self): + """clean up the environment after the execution of all tests.\n""" + self.sr_model = None + + def setUp(self): + "Call setUp() to prepare environment\n" + self.test_prog = fluid.Program() + + def tearDown(self): + "Call tearDown to restore environment.\n" + self.test_prog = None + + def test_single_pic(self): + with fluid.program_guard(self.test_prog): + img = cv2.imread(imgpath[0]) + result = self.sr_model.super_resolution( + images=[img], use_gpu=False, visualization=True) + print(result[0]['data']) + + def test_ndarray(self): + with fluid.program_guard(self.test_prog): + + for pic_path in imgpath: + img = cv2.imread(pic_path) + result = self.sr_model.super_resolution( + images=[img], + output_dir='test_dcscn_model_output', + use_gpu=False, + visualization=True) + + def test_save_inference_model(self): + with fluid.program_guard(self.test_prog): + self.sr_model.save_inference_model( + dirname='test_dcscn_model', combined=True) + + +if __name__ == "__main__": + suite = unittest.TestSuite() + suite.addTest(TestHumanSeg('test_single_pic')) + suite.addTest(TestHumanSeg('test_ndarray')) + suite.addTest(TestHumanSeg('test_save_inference_model')) + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) diff --git a/hub_module/tests/unittests/test_falsr_A.py b/hub_module/tests/unittests/test_falsr_A.py new file mode 100644 index 0000000000000000000000000000000000000000..20edcf6b5147a101bba3419d5d2e4cfbe753e836 --- /dev/null +++ b/hub_module/tests/unittests/test_falsr_A.py @@ -0,0 +1,71 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import unittest + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +imgpath = [ + '../image_dataset/super_resolution/BSD100_001.png', + '../image_dataset/super_resolution/BSD100_002.png', + '../image_dataset/super_resolution/BSD100_003.png', +] + + +class TestHumanSeg(unittest.TestCase): + @classmethod + def setUpClass(self): + """Prepare the environment once before execution of all tests.\n""" + self.sr_model = hub.Module(name="falsr_A") + + @classmethod + def tearDownClass(self): + """clean up the environment after the execution of all tests.\n""" + self.sr_model = None + + def setUp(self): + "Call setUp() to prepare environment\n" + self.test_prog = fluid.Program() + + def tearDown(self): + "Call tearDown to restore environment.\n" + self.test_prog = None + + def test_single_pic(self): + with fluid.program_guard(self.test_prog): + img = cv2.imread(imgpath[0]) + result = self.sr_model.super_resolution( + images=[img], use_gpu=False, visualization=True) + print(result[0]['data']) + + def test_ndarray(self): + with fluid.program_guard(self.test_prog): + + for pic_path in imgpath: + img = cv2.imread(pic_path) + result = self.sr_model.super_resolution( + images=[img], + output_dir='test_falsr_A_model_output', + use_gpu=False, + visualization=True) + + def test_save_inference_model(self): + with fluid.program_guard(self.test_prog): + self.sr_model.save_inference_model( + dirname='test_falsr_A_model', combined=True) + + +if __name__ == "__main__": + suite = unittest.TestSuite() + suite.addTest(TestHumanSeg('test_single_pic')) + suite.addTest(TestHumanSeg('test_ndarray')) + suite.addTest(TestHumanSeg('test_save_inference_model')) + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) diff --git a/hub_module/tests/unittests/test_falsr_B.py b/hub_module/tests/unittests/test_falsr_B.py new file mode 100644 index 0000000000000000000000000000000000000000..ffee6c0bbce536a261fa2ce467eb1f9f5be47873 --- /dev/null +++ b/hub_module/tests/unittests/test_falsr_B.py @@ -0,0 +1,71 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import unittest + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +imgpath = [ + '../image_dataset/super_resolution/BSD100_001.png', + '../image_dataset/super_resolution/BSD100_002.png', + '../image_dataset/super_resolution/BSD100_003.png', +] + + +class TestHumanSeg(unittest.TestCase): + @classmethod + def setUpClass(self): + """Prepare the environment once before execution of all tests.\n""" + self.sr_model = hub.Module(name="falsr_B") + + @classmethod + def tearDownClass(self): + """clean up the environment after the execution of all tests.\n""" + self.sr_model = None + + def setUp(self): + "Call setUp() to prepare environment\n" + self.test_prog = fluid.Program() + + def tearDown(self): + "Call tearDown to restore environment.\n" + self.test_prog = None + + def test_single_pic(self): + with fluid.program_guard(self.test_prog): + img = cv2.imread(imgpath[0]) + result = self.sr_model.super_resolution( + images=[img], use_gpu=False, visualization=True) + print(result[0]['data']) + + def test_ndarray(self): + with fluid.program_guard(self.test_prog): + + for pic_path in imgpath: + img = cv2.imread(pic_path) + result = self.sr_model.super_resolution( + images=[img], + output_dir='test_falsr_B_model_output', + use_gpu=False, + visualization=True) + + def test_save_inference_model(self): + with fluid.program_guard(self.test_prog): + self.sr_model.save_inference_model( + dirname='test_falsr_B_model', combined=True) + + +if __name__ == "__main__": + suite = unittest.TestSuite() + suite.addTest(TestHumanSeg('test_single_pic')) + suite.addTest(TestHumanSeg('test_ndarray')) + suite.addTest(TestHumanSeg('test_save_inference_model')) + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) diff --git a/hub_module/tests/unittests/test_falsr_C.py b/hub_module/tests/unittests/test_falsr_C.py new file mode 100644 index 0000000000000000000000000000000000000000..c0e60d9258ecd7e2921d321df40a3853fc4270d9 --- /dev/null +++ b/hub_module/tests/unittests/test_falsr_C.py @@ -0,0 +1,71 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import unittest + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +imgpath = [ + '../image_dataset/super_resolution/BSD100_001.png', + '../image_dataset/super_resolution/BSD100_002.png', + '../image_dataset/super_resolution/BSD100_003.png', +] + + +class TestHumanSeg(unittest.TestCase): + @classmethod + def setUpClass(self): + """Prepare the environment once before execution of all tests.\n""" + self.sr_model = hub.Module(name="falsr_C") + + @classmethod + def tearDownClass(self): + """clean up the environment after the execution of all tests.\n""" + self.sr_model = None + + def setUp(self): + "Call setUp() to prepare environment\n" + self.test_prog = fluid.Program() + + def tearDown(self): + "Call tearDown to restore environment.\n" + self.test_prog = None + + def test_single_pic(self): + with fluid.program_guard(self.test_prog): + img = cv2.imread(imgpath[0]) + result = self.sr_model.super_resolution( + images=[img], use_gpu=False, visualization=True) + print(result[0]['data']) + + def test_ndarray(self): + with fluid.program_guard(self.test_prog): + + for pic_path in imgpath: + img = cv2.imread(pic_path) + result = self.sr_model.super_resolution( + images=[img], + output_dir='test_falsr_C_model_output', + use_gpu=False, + visualization=True) + + def test_save_inference_model(self): + with fluid.program_guard(self.test_prog): + self.sr_model.save_inference_model( + dirname='test_falsr_C_model', combined=True) + + +if __name__ == "__main__": + suite = unittest.TestSuite() + suite.addTest(TestHumanSeg('test_single_pic')) + suite.addTest(TestHumanSeg('test_ndarray')) + suite.addTest(TestHumanSeg('test_save_inference_model')) + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) diff --git a/hub_module/tests/unittests/test_humanseg_lite.py b/hub_module/tests/unittests/test_humanseg_lite.py new file mode 100644 index 0000000000000000000000000000000000000000..5f78384892989c725befc69d29b82e5f5521301c --- /dev/null +++ b/hub_module/tests/unittests/test_humanseg_lite.py @@ -0,0 +1,95 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import unittest + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +pic_dir = '../image_dataset/human_segmentation/image/' + +imgpath = [ + '../image_dataset/human_segmentation/image/ache-adult-depression-expression-41253.jpg', + '../image_dataset/human_segmentation/image/allergy-cold-disease-flu-41284.jpg', + '../image_dataset/human_segmentation/image/bored-female-girl-people-41321.jpg', + '../image_dataset/human_segmentation/image/colors-hairdresser-cutting-colorimetry-159780.jpg', + '../image_dataset/human_segmentation/image/pexels-photo-206339.jpg' +] + + +class TestHumanSeg(unittest.TestCase): + @classmethod + def setUpClass(self): + """Prepare the environment once before execution of all tests.\n""" + self.human_seg = hub.Module(name="humanseg_lite") + + @classmethod + def tearDownClass(self): + """clean up the environment after the execution of all tests.\n""" + self.human_seg = None + + def setUp(self): + "Call setUp() to prepare environment\n" + self.test_prog = fluid.Program() + + def tearDown(self): + "Call tearDown to restore environment.\n" + self.test_prog = None + + def test_single_pic(self): + with fluid.program_guard(self.test_prog): + pics_path_list = [ + os.path.join(pic_dir, f) for f in os.listdir(pic_dir) + ] + img = cv2.imread(pics_path_list[0]) + result = self.human_seg.segment( + images=[img], use_gpu=False, visualization=True) + print(result[0]['data']) + + def test_batch(self): + with fluid.program_guard(self.test_prog): + pics_path_list = [ + os.path.join(pic_dir, f) for f in os.listdir(pic_dir) + ] + result = self.human_seg.segment( + paths=imgpath, + batch_size=2, + output_dir='batch_output_shuffle', + use_gpu=False, + visualization=True) + print(result) + + def test_ndarray(self): + with fluid.program_guard(self.test_prog): + pics_path_list = [ + os.path.join(pic_dir, f) for f in os.listdir(pic_dir) + ] + pics_ndarray = list() + for pic_path in pics_path_list: + img = cv2.imread(pic_path) + result = self.human_seg.segment( + images=[img], + output_dir='ndarray_output_shuffle', + use_gpu=False, + visualization=True) + + def test_save_inference_model(self): + with fluid.program_guard(self.test_prog): + self.human_seg.save_inference_model( + dirname='humanseg_lite', combined=True) + + +if __name__ == "__main__": + suite = unittest.TestSuite() + suite.addTest(TestHumanSeg('test_single_pic')) + suite.addTest(TestHumanSeg('test_batch')) + suite.addTest(TestHumanSeg('test_ndarray')) + suite.addTest(TestHumanSeg('test_save_inference_model')) + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) diff --git a/hub_module/tests/unittests/test_humanseg_mobile.py b/hub_module/tests/unittests/test_humanseg_mobile.py new file mode 100644 index 0000000000000000000000000000000000000000..edb785c639dfb9836562612601c635e9e7e43a29 --- /dev/null +++ b/hub_module/tests/unittests/test_humanseg_mobile.py @@ -0,0 +1,92 @@ +# coding=utf-8 +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import unittest + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddlehub as hub + +pic_dir = '../image_dataset/human_segmentation/image/' + +imgpath = [ + '../image_dataset/human_segmentation/image/ache-adult-depression-expression-41253.jpg', + '../image_dataset/human_segmentation/image/allergy-cold-disease-flu-41284.jpg', + '../image_dataset/human_segmentation/image/bored-female-girl-people-41321.jpg', + '../image_dataset/human_segmentation/image/colors-hairdresser-cutting-colorimetry-159780.jpg', + '../image_dataset/human_segmentation/image/pexels-photo-206339.jpg' +] + + +class TestHumanSeg(unittest.TestCase): + @classmethod + def setUpClass(self): + """Prepare the environment once before execution of all tests.\n""" + self.human_seg = hub.Module(name="humanseg_mobile") + + @classmethod + def tearDownClass(self): + """clean up the environment after the execution of all tests.\n""" + self.human_seg = None + + def setUp(self): + "Call setUp() to prepare environment\n" + self.test_prog = fluid.Program() + + def tearDown(self): + "Call tearDown to restore environment.\n" + self.test_prog = None + + def test_single_pic(self): + with fluid.program_guard(self.test_prog): + pics_path_list = [ + os.path.join(pic_dir, f) for f in os.listdir(pic_dir) + ] + + img = cv2.imread(pics_path_list[0]) + result = self.human_seg.segment( + images=[img], use_gpu=False, visualization=True) + print(result[0]['data']) + + def test_batch(self): + with fluid.program_guard(self.test_prog): + result = self.human_seg.segment( + paths=imgpath, + batch_size=2, + output_dir='batch_output_hrnet', + use_gpu=False, + visualization=True) + print(result) + + def test_ndarray(self): + with fluid.program_guard(self.test_prog): + pics_path_list = [ + os.path.join(pic_dir, f) for f in os.listdir(pic_dir) + ] + pics_ndarray = list() + for pic_path in pics_path_list: + result = self.human_seg.segment( + images=[cv2.imread(pic_path)], + output_dir='ndarray_output_hrnet', + use_gpu=False, + visualization=True) + + def test_save_inference_model(self): + with fluid.program_guard(self.test_prog): + self.human_seg.save_inference_model( + dirname='humanseg_mobile', combined=True) + + +if __name__ == "__main__": + suite = unittest.TestSuite() + suite.addTest(TestHumanSeg('test_single_pic')) + suite.addTest(TestHumanSeg('test_batch')) + suite.addTest(TestHumanSeg('test_ndarray')) + suite.addTest(TestHumanSeg('test_save_inference_model')) + runner = unittest.TextTestRunner(verbosity=2) + runner.run(suite) diff --git a/paddlehub/commands/convert.py b/paddlehub/commands/convert.py index 5ed2fad58c4b2fe1ac13f099235fb56622e372c2..6fed432a4eb501673f8c39c004326b47485bb333 100644 --- a/paddlehub/commands/convert.py +++ b/paddlehub/commands/convert.py @@ -43,8 +43,10 @@ class ConvertCommand(BaseCommand): add_help=True) self.parser.add_argument('command') self.parser.add_argument('--module_name', '-n') - self.parser.add_argument( - '--module_version', '-v', nargs='?', default='1.0.0') + self.parser.add_argument('--module_version', + '-v', + nargs='?', + default='1.0.0') self.parser.add_argument('--model_dir', '-d') self.parser.add_argument('--output_dir', '-o') @@ -60,27 +62,27 @@ class ConvertCommand(BaseCommand): arcname = os.path.join(self.module, 'assets', file) tfp.add(fullpath, arcname=arcname) - tfp.add( - self.model_file, arcname=os.path.join(self.module, MODULE_FILE)) - tfp.add( - self.serving_file, - arcname=os.path.join(self.module, SERVING_FILE)) - tfp.add( - self.init_file, arcname=os.path.join(self.module, INIT_FILE)) + tfp.add(self.model_file, + arcname=os.path.join(self.module, MODULE_FILE)) + tfp.add(self.serving_file, + arcname=os.path.join(self.module, SERVING_FILE)) + tfp.add(self.init_file, + arcname=os.path.join(self.module, INIT_FILE)) def create_module_py(self): - template_file = open(os.path.join(TMPL_DIR, 'x_model.tmpl'), 'r') + template_file = open(os.path.join(TMPL_DIR, 'x_model.tmpl'), + 'r', + encoding='utf-8') tmpl = Template(template_file.read()) lines = [] lines.append( - tmpl.substitute( - NAME="'{}'".format(self.module), - TYPE="'CV'", - AUTHOR="'Baidu'", - SUMMARY="''", - VERSION="'{}'".format(self.version), - EMAIL="''")) + tmpl.substitute(NAME="'{}'".format(self.module), + TYPE="'CV'", + AUTHOR="'Baidu'", + SUMMARY="''", + VERSION="'{}'".format(self.version), + EMAIL="''")) # self.model_file = os.path.join(self.dest, MODULE_FILE) self.model_file = os.path.join(self._tmp_dir, MODULE_FILE) if os.path.exists(self.model_file): @@ -88,7 +90,7 @@ class ConvertCommand(BaseCommand): 'File `{MODULE_FILE}` is already exists in src dir.'.format( MODULE_FILE)) - with open(self.model_file, 'w') as fp: + with open(self.model_file, 'w', encoding='utf-8') as fp: fp.writelines(lines) def create_init_py(self): @@ -99,7 +101,9 @@ class ConvertCommand(BaseCommand): shutil.copyfile(os.path.join(TMPL_DIR, 'init_py.tmpl'), self.init_file) def create_serving_demo_py(self): - template_file = open(os.path.join(TMPL_DIR, 'serving_demo.tmpl'), 'r') + template_file = open(os.path.join(TMPL_DIR, 'serving_demo.tmpl'), + 'r', + encoding='utf-8') tmpl = Template(template_file.read()) lines = [] @@ -110,7 +114,7 @@ class ConvertCommand(BaseCommand): raise RuntimeError( 'File `{}` is already exists in src dir.'.format(SERVING_FILE)) - with open(self.serving_file, 'w') as fp: + with open(self.serving_file, 'w', encoding='utf-8') as fp: fp.writelines(lines) @staticmethod diff --git a/paddlehub/serving/app_single.py b/paddlehub/serving/app_single.py index 0d288de7253978a047d8ce8ea606d5aec32cff62..fb2ce403e12938a64e99b6bbbbd2bdf152c701c1 100644 --- a/paddlehub/serving/app_single.py +++ b/paddlehub/serving/app_single.py @@ -317,7 +317,9 @@ def create_app(init_flag=False, configs=None): @app_instance.route("/", methods=["GET", "POST"]) def index(): - return render_template("main.html") + return '暂不提供可视化界面,请直接使用脚本进行请求。
No visual ' \ + 'interface is provided for the time being, please use the' \ + ' python script to make a request directly.' @app_instance.before_request def before_request(): diff --git a/paddlehub/tokenizer/tokenizer.py b/paddlehub/tokenizer/tokenizer.py index 4ad6db48cf0f9da4da9b8c7285cad96d0d2bf974..f2f9111a1fbd974cb6ac1801a604d766228f29b0 100644 --- a/paddlehub/tokenizer/tokenizer.py +++ b/paddlehub/tokenizer/tokenizer.py @@ -78,7 +78,11 @@ class CustomTokenizer(object): def _convert_token_to_id(self, token): """ Converts a token (str) in an id using the vocab. """ - return self.vocab.get(token, None) + v = self.vocab.get(token, None) + if v: + return v + else: + return 0 def _convert_id_to_token(self, index): """Converts an index (integer) in a token (str) using the vocab.""" @@ -123,8 +127,8 @@ class CustomTokenizer(object): ids = [] for token in tokens: wid = self._convert_token_to_id(token) - if wid: - ids.append(self._convert_token_to_id(token)) + if wid is not None: + ids.append(wid) return ids def tokenize(self, text): @@ -204,14 +208,14 @@ class CustomTokenizer(object): if isinstance(text, str): tokens = self.tokenize(text) ids = self.convert_tokens_to_ids(tokens) - return self.convert_tokens_to_ids(tokens) + return ids elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance( - text[0], str): + text[0], str): return self.convert_tokens_to_ids(text) elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance( - text[0], int): + text[0], int): return text else: raise ValueError( @@ -350,7 +354,7 @@ class CustomTokenizer(object): """ out_string = (out_string.replace(" .", ".").replace(" ?", "?").replace( " !", "!").replace(" ,", ",").replace(" ' ", "'").replace( - " n't", - "n't").replace(" 'm", "'m").replace(" 's", "'s").replace( - " 've", "'ve").replace(" 're", "'re")) + " n't", + "n't").replace(" 'm", "'m").replace(" 's", "'s").replace( + " 've", "'ve").replace(" 're", "'re")) return out_string