diff --git a/README.md b/README.md index 55176ddb10cad97c4dc88a8a2f0d0b43cf367162..a2456c926b3ec846239d62bb23a59363e06f9e76 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ PaddleSeg具备高性能、丰富的数据增强、工业级部署、全流程 基于百度视觉技术部的实际业务经验,内置10+种数据增强策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 -- **主流模型覆盖** +- **模块化设计** -支持U-Net, DeepLabv3+, ICNet三类主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求。 +支持U-Net, DeepLabv3+, ICNet, PSPNet四种主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求;选择不同的损失函数如Dice Loss, BCE Loss等方式可以强化小目标和不均衡样本场景下的分割精度。 - **高性能** @@ -29,6 +29,20 @@ PaddleSeg支持多进程IO、多卡并行、跨卡Batch Norm同步等训练加
+## 环境依赖 + +* PaddlePaddle >= 1.6.1 +* Python 2.7 or 3.5+ + +通过以下命令安装python包依赖,请确保在该分支上至少执行过一次以下命令 +```shell +$ pip install -r requirements.txt +``` + +其他如CUDA版本、cuDNN版本等兼容信息请查看[PaddlePaddle安装](https://www.paddlepaddle.org.cn/install/doc/index) + +
+ ## 使用教程 我们提供了一系列的使用教程,来说明如何使用PaddleSeg完成语义分割模型的训练、评估、部署。 @@ -114,10 +128,19 @@ PaddleSeg在AI Studio平台上提供了在线体验的教程,欢迎体验: ## 更新日志 +* 2019.11.04 + + **`v0.2.0`** + * 新增PSPNet分割网络,提供基于COCO和cityscapes数据集的[预训练模型](./docs/model_zoo.md)4个 + * 新增Dice Loss、BCE Loss以及组合Loss配置,支持样本不均衡场景下的[模型优化](./docs/loss_select.md) + * 支持[FP16混合精度训练](./docs/multiple_gpus_train_and_mixed_precision_train.md)以及动态Loss Scaling,在不损耗精度的情况下,训练速度提升30%+ + * 支持[PaddlePaddle多卡多进程训练](./docs/multiple_gpus_train_and_mixed_precision_train.md),多卡训练时训练速度提升15%+ + * 发布基于UNet的[工业标记表盘分割模型](./contrib#%E5%B7%A5%E4%B8%9A%E7%94%A8%E8%A1%A8%E5%88%86%E5%89%B2) + * 2019.09.10 **`v0.1.0`** - * PaddleSeg分割库初始版本发布,包含DeepLabv3+, U-Net, ICNet三类分割模型, 其中DeepLabv3+支持Xception, MobileNet两种可调节的骨干网络。 + * PaddleSeg分割库初始版本发布,包含DeepLabv3+, U-Net, ICNet三类分割模型, 其中DeepLabv3+支持Xception, MobileNet v2两种可调节的骨干网络。 * CVPR19 LIP人体部件分割比赛冠军预测模型发布[ACE2P](./contrib/ACE2P) * 预置基于DeepLabv3+网络的[人像分割](./contrib/HumanSeg/)和[车道线分割](./contrib/RoadLine)预测模型发布 diff --git a/configs/deepglobe_road_extraction.yaml b/configs/deepglobe_road_extraction.yaml index c06dd8c9a42ff67a85c5c2e1decdc085faa6a57e..6dcea9f7fcee8fa4c41f57dd846e40d972c254be 100644 --- a/configs/deepglobe_road_extraction.yaml +++ b/configs/deepglobe_road_extraction.yaml @@ -1,6 +1,6 @@ EVAL_CROP_SIZE: (1025, 1025) # (width, height), for unpadding rangescaling and stepscaling TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: +AUG: AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding INF_RESIZE_VALUE: 500 # for rangescaling @@ -9,8 +9,8 @@ AUG: MAX_SCALE_FACTOR: 2.0 # for stepscaling MIN_SCALE_FACTOR: 0.5 # for stepscaling SCALE_STEP_SIZE: 0.25 # for stepscaling -BATCH_SIZE: 8 -DATASET: +BATCH_SIZE: 8 +DATASET: DATA_DIR: "./dataset/MiniDeepGlobeRoadExtraction/" IMAGE_TYPE: "rgb" # choice rgb or rgba NUM_CLASSES: 2 @@ -19,19 +19,19 @@ DATASET: VAL_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" IGNORE_INDEX: 255 SEPARATOR: '|' -FREEZE: +FREEZE: MODEL_FILENAME: "model" PARAMS_FILENAME: "params" SAVE_DIR: "freeze_model" -MODEL: +MODEL: DEFAULT_NORM_TYPE: "bn" MODEL_NAME: "deeplabv3p" DEEPLAB: - BACKBONE: "mobilenet" + BACKBONE: "mobilenetv2" DEPTH_MULTIPLIER: 1.0 ENCODER_WITH_ASPP: False ENABLE_DECODER: False -TEST: +TEST: TEST_MODEL: "./saved_model/deeplabv3p_mobilenetv2-1-0_bn_deepglobe_road_extraction/final" TRAIN: MODEL_SAVE_DIR: "./saved_model/deeplabv3p_mobilenetv2-1-0_bn_deepglobe_road_extraction/" diff --git a/configs/deeplabv3p_mobilenet-1-0_pet.yaml b/configs/deeplabv3p_mobilenet-1-0_pet.yaml index ae7ec59adc83ba87127847e295a0811d2964edb5..7578034ddcfd559947818852cb46b972d4d513ee 100644 --- a/configs/deeplabv3p_mobilenet-1-0_pet.yaml +++ b/configs/deeplabv3p_mobilenet-1-0_pet.yaml @@ -30,7 +30,7 @@ MODEL: MODEL_NAME: "deeplabv3p" DEFAULT_NORM_TYPE: "bn" DEEPLAB: - BACKBONE: "mobilenet" + BACKBONE: "mobilenetv2" DEPTH_MULTIPLIER: 1.0 ENCODER_WITH_ASPP: False ENABLE_DECODER: False diff --git a/docs/annotation/cityscapes_demo/cityscapes_demo_dataset.yaml b/docs/annotation/cityscapes_demo/cityscapes_demo_dataset.yaml new file mode 100755 index 0000000000000000000000000000000000000000..69b43e7ad42150eb57aec055294fc0fd45ff6e49 --- /dev/null +++ b/docs/annotation/cityscapes_demo/cityscapes_demo_dataset.yaml @@ -0,0 +1,9 @@ +DATASET: + DATA_DIR: "./docs/annotation/cityscapes_demo/" + IMAGE_TYPE: "rgb" # choice rgb or rgba + NUM_CLASSES: 19 + TEST_FILE_LIST: "docs/annotation/cityscapes_demo/val.list" + TRAIN_FILE_LIST: "docs/annotation/cityscapes_demo/train.list" + VAL_FILE_LIST: "docs/annotation/cityscapes_demo/val.list" + SEPARATOR: " " + IGNORE_INDEX: 255 diff --git a/docs/annotation/cityscapes_demo/gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png b/docs/annotation/cityscapes_demo/gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png new file mode 100755 index 0000000000000000000000000000000000000000..ea126e4fb7f8a7e4dbfc4aeb20e7fcdf934e249e Binary files /dev/null and b/docs/annotation/cityscapes_demo/gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png differ diff --git a/docs/annotation/cityscapes_demo/gtFine/train/stuttgart/stuttgart_000072_000019_gtFine_labelTrainIds.png b/docs/annotation/cityscapes_demo/gtFine/train/stuttgart/stuttgart_000072_000019_gtFine_labelTrainIds.png new file mode 100755 index 0000000000000000000000000000000000000000..b8ee390e4cb1175525ba1b4606b6e5653643a671 Binary files /dev/null and b/docs/annotation/cityscapes_demo/gtFine/train/stuttgart/stuttgart_000072_000019_gtFine_labelTrainIds.png differ diff --git a/docs/annotation/cityscapes_demo/gtFine/val/frankfurt/frankfurt_000001_062250_gtFine_labelTrainIds.png b/docs/annotation/cityscapes_demo/gtFine/val/frankfurt/frankfurt_000001_062250_gtFine_labelTrainIds.png new file mode 100755 index 0000000000000000000000000000000000000000..ea6f7a16033fc2ee99debe4002e775c8f1c72d92 Binary files /dev/null and b/docs/annotation/cityscapes_demo/gtFine/val/frankfurt/frankfurt_000001_062250_gtFine_labelTrainIds.png differ diff --git a/docs/annotation/cityscapes_demo/gtFine/val/frankfurt/frankfurt_000001_063045_gtFine_labelTrainIds.png b/docs/annotation/cityscapes_demo/gtFine/val/frankfurt/frankfurt_000001_063045_gtFine_labelTrainIds.png new file mode 100755 index 0000000000000000000000000000000000000000..3d7f93a4e91beb9819324dca91f74478e7af247b Binary files /dev/null and b/docs/annotation/cityscapes_demo/gtFine/val/frankfurt/frankfurt_000001_063045_gtFine_labelTrainIds.png differ diff --git a/docs/annotation/cityscapes_demo/leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png b/docs/annotation/cityscapes_demo/leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png new file mode 100755 index 0000000000000000000000000000000000000000..3994dd3da339c1fce21e48318e5866772dfb9451 Binary files /dev/null and b/docs/annotation/cityscapes_demo/leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png differ diff --git a/docs/annotation/cityscapes_demo/leftImg8bit/train/stuttgart/stuttgart_000072_000019_leftImg8bit.png b/docs/annotation/cityscapes_demo/leftImg8bit/train/stuttgart/stuttgart_000072_000019_leftImg8bit.png new file mode 100755 index 0000000000000000000000000000000000000000..f2c18c128f3ba8b190dc9b645acd035dce6eba61 Binary files /dev/null and b/docs/annotation/cityscapes_demo/leftImg8bit/train/stuttgart/stuttgart_000072_000019_leftImg8bit.png differ diff --git a/docs/annotation/cityscapes_demo/leftImg8bit/val/frankfurt/frankfurt_000001_062250_leftImg8bit.png b/docs/annotation/cityscapes_demo/leftImg8bit/val/frankfurt/frankfurt_000001_062250_leftImg8bit.png new file mode 100755 index 0000000000000000000000000000000000000000..de0b21d160edc2dd4c7c8d553a5a5f090b4bfd5b Binary files /dev/null and b/docs/annotation/cityscapes_demo/leftImg8bit/val/frankfurt/frankfurt_000001_062250_leftImg8bit.png differ diff --git a/docs/annotation/cityscapes_demo/leftImg8bit/val/frankfurt/frankfurt_000001_063045_leftImg8bit.png b/docs/annotation/cityscapes_demo/leftImg8bit/val/frankfurt/frankfurt_000001_063045_leftImg8bit.png new file mode 100755 index 0000000000000000000000000000000000000000..f89272d2cb300f2070d9771d5181c8894d08d479 Binary files /dev/null and b/docs/annotation/cityscapes_demo/leftImg8bit/val/frankfurt/frankfurt_000001_063045_leftImg8bit.png differ diff --git a/docs/annotation/cityscapes_demo/train_list.txt b/docs/annotation/cityscapes_demo/train_list.txt new file mode 100755 index 0000000000000000000000000000000000000000..02a66f470664a7c598f0d28b8470195d9ef62b0d --- /dev/null +++ b/docs/annotation/cityscapes_demo/train_list.txt @@ -0,0 +1,2 @@ +leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png +leftImg8bit/train/stuttgart/stuttgart_000072_000019_leftImg8bit.png gtFine/train/stuttgart/stuttgart_000072_000019_gtFine_labelTrainIds.png diff --git a/docs/annotation/cityscapes_demo/val_list.txt b/docs/annotation/cityscapes_demo/val_list.txt new file mode 100755 index 0000000000000000000000000000000000000000..609f78bc50cec0917d659f4f95411c7d8fef2e16 --- /dev/null +++ b/docs/annotation/cityscapes_demo/val_list.txt @@ -0,0 +1,2 @@ +leftImg8bit/val/frankfurt/frankfurt_000001_062250_leftImg8bit.png gtFine/val/frankfurt/frankfurt_000001_062250_gtFine_labelTrainIds.png +leftImg8bit/val/frankfurt/frankfurt_000001_063045_leftImg8bit.png gtFine/val/frankfurt/frankfurt_000001_063045_gtFine_labelTrainIds.png diff --git a/docs/annotation/data_annotated/aa63d7e6db0d03137883772c246c6761fc201059.json b/docs/annotation/data_annotated/aa63d7e6db0d03137883772c246c6761fc201059.json deleted file mode 100644 index 34bb4674f0d0f16f257e2cc0abb7292f1dc4566d..0000000000000000000000000000000000000000 --- a/docs/annotation/data_annotated/aa63d7e6db0d03137883772c246c6761fc201059.json +++ /dev/null @@ -1 +0,0 @@ -{"path":"/Users/chulutao/dataset/humanseg/aa63d7e6db0d03137883772c246c6761fc201059.jpg","outputs":{"object":[{"name":"person","polygon":{"x1":321.99,"y1":63,"x2":293,"y2":98.00999999999999,"x3":245.01,"y3":141.01,"x4":221,"y4":194,"x5":231.99,"y5":237,"x6":231.99,"y6":348.01,"x7":191,"y7":429,"x8":197,"y8":465.01,"x9":193,"y9":586,"x10":151,"y10":618.01,"x11":124,"y11":622,"x12":100,"y12":703,"x13":121.99,"y13":744,"x14":141.99,"y14":724,"x15":163,"y15":658.01,"x16":238.01,"y16":646,"x17":259,"y17":627,"x18":313,"y18":618.01,"x19":416,"y19":639,"x20":464,"y20":606,"x21":454,"y21":555.01,"x22":404,"y22":508.01,"x23":430,"y23":489,"x24":407,"y24":464,"x25":397,"y25":365.01,"x26":407,"y26":290,"x27":361.99,"y27":252,"x28":376,"y28":215.01,"x29":391.99,"y29":189,"x30":388.01,"y30":135.01,"x31":340,"y31":120,"x32":313,"y32":161.01,"x33":307,"y33":188.01,"x34":311,"y34":207,"x35":277,"y35":186,"x36":293,"y36":137,"x37":308.01,"y37":117,"x38":361,"y38":93}}]},"time_labeled":1568101256852,"labeled":true,"size":{"width":706,"height":1000,"depth":3}} \ No newline at end of file diff --git a/docs/annotation/jingling2seg.md b/docs/annotation/jingling2seg.md index 4e124962c3ae6ca4293ce0f0474591f3d7deec21..2637df5146e6bd5027600a26a42d5c2a6d3ece80 100644 --- a/docs/annotation/jingling2seg.md +++ b/docs/annotation/jingling2seg.md @@ -44,7 +44,7 @@ **注意:导出的标注文件位于`保存位置`下的`outputs`目录。** -精灵标注产出的真值文件可参考我们给出的文件夹`data_annotated`。 +精灵标注产出的真值文件可参考我们给出的文件夹`docs/annotation/jingling_demo`。
@@ -55,21 +55,24 @@ ## 3 数据格式转换 -* 我们用于完成语义分割的数据集目录结构如下: +* 经过数据格式转换后的数据集目录结构如下: ``` my_dataset # 根目录 - |-- JPEGImages # 数据集图片 - |-- SegmentationClassPNG # 数据集真值 - | |-- xxx.png # 像素级别的真值信息 - | |... - |-- class_names.txt # 数据集的类别名称 + |-- outputs # 标注工具导出目录 + | |-- annotations # 数据集真值 + | |-- xxx.png # 像素级别的真值信息 + | |... + | |-- class_names.txt # 数据集的类别名称 + | |-- xxx.json # 标注json文件 + |-- xxx.jpg(png or other) # 数据集原图 + |-- ... ```
- -

图5 训练所需的数据集目录的结构示意图

+ +

图5 格式转换后的数据集目录的结构示意图

* 运行转换脚本需要依赖labelme和pillow,如未安装,请先安装。Labelme的具体安装流程请参见[官方安装指南](https://github.com/wkentaro/labelme)。Pillow的安装: @@ -81,16 +84,15 @@ pip install pillow * 运行以下代码,将标注后的数据转换成满足以上格式的数据集: ``` - python jingling2seg.py + python pdseg/tools/jingling2seg.py ``` -其中,``为精灵标注产出的json文件所在文件夹的目录,一般为精灵工具使用(3)中`保存位置`下的`outputs`目录。``为转换后的数据集所在文件夹的目录。 +其中,``为精灵标注产出的json文件所在文件夹的目录,一般为精灵工具使用(3)中`保存位置`下的`outputs`目录。 -**注意:``不用预先创建,脚本运行时会自动创建,否则会报错。** -转换得到的数据集可参考我们给出的文件夹`my_dataset`。其中,文件`class_names.txt`是数据集中所有标注类别的名称,包含背景类;文件夹`JPEGImages`保存的是数据集的图片;文件夹`SegmentationClassPNG`保存的是各图片的像素级别的真值信息,背景类`_background_`对应为0,其它目标类别从1开始递增,至多为255。 +转换得到的数据集可参考我们给出的文件夹`docs/annotation/jingling_demo`。其中,文件`class_names.txt`是数据集中所有标注类别的名称,包含背景类;文件夹`annotations`保存的是各图片的像素级别的真值信息,背景类`_background_`对应为0,其它目标类别从1开始递增,至多为255。
-

图6 训练所需的数据集各目录的内容示意图

+

图6 格式转换后的数据集各目录的内容示意图

diff --git a/docs/annotation/data_annotated/aa63d7e6db0d03137883772c246c6761fc201059.jpg b/docs/annotation/jingling_demo/aa63d7e6db0d03137883772c246c6761fc201059.jpg similarity index 100% rename from docs/annotation/data_annotated/aa63d7e6db0d03137883772c246c6761fc201059.jpg rename to docs/annotation/jingling_demo/aa63d7e6db0d03137883772c246c6761fc201059.jpg diff --git a/docs/annotation/jingling_demo/outputs/aa63d7e6db0d03137883772c246c6761fc201059.json b/docs/annotation/jingling_demo/outputs/aa63d7e6db0d03137883772c246c6761fc201059.json new file mode 100644 index 0000000000000000000000000000000000000000..69d80205de92afc9cffa304b32ff0e3e95502687 --- /dev/null +++ b/docs/annotation/jingling_demo/outputs/aa63d7e6db0d03137883772c246c6761fc201059.json @@ -0,0 +1 @@ +{"path":"/Users/dataset/aa63d7e6db0d03137883772c246c6761fc201059.jpg","outputs":{"object":[{"name":"person","polygon":{"x1":321.99,"y1":63,"x2":293,"y2":98.00999999999999,"x3":245.01,"y3":141.01,"x4":221,"y4":194,"x5":231.99,"y5":237,"x6":231.99,"y6":348.01,"x7":191,"y7":429,"x8":197,"y8":465.01,"x9":193,"y9":586,"x10":151,"y10":618.01,"x11":124,"y11":622,"x12":100,"y12":703,"x13":121.99,"y13":744,"x14":141.99,"y14":724,"x15":163,"y15":658.01,"x16":238.01,"y16":646,"x17":259,"y17":627,"x18":313,"y18":618.01,"x19":416,"y19":639,"x20":464,"y20":606,"x21":454,"y21":555.01,"x22":404,"y22":508.01,"x23":430,"y23":489,"x24":407,"y24":464,"x25":397,"y25":365.01,"x26":407,"y26":290,"x27":361.99,"y27":252,"x28":376,"y28":215.01,"x29":391.99,"y29":189,"x30":388.01,"y30":135.01,"x31":340,"y31":120,"x32":313,"y32":161.01,"x33":307,"y33":188.01,"x34":311,"y34":207,"x35":277,"y35":186,"x36":293,"y36":137,"x37":308.01,"y37":117,"x38":361,"y38":93}}]},"time_labeled":1568101256852,"labeled":true,"size":{"width":706,"height":1000,"depth":3}} \ No newline at end of file diff --git a/docs/annotation/jingling_demo/outputs/annotations/aa63d7e6db0d03137883772c246c6761fc201059.png b/docs/annotation/jingling_demo/outputs/annotations/aa63d7e6db0d03137883772c246c6761fc201059.png new file mode 100644 index 0000000000000000000000000000000000000000..8dfbff7b73bcfff7ef79b904667241731641d4a4 Binary files /dev/null and b/docs/annotation/jingling_demo/outputs/annotations/aa63d7e6db0d03137883772c246c6761fc201059.png differ diff --git a/docs/annotation/jingling_demo/outputs/class_names.txt b/docs/annotation/jingling_demo/outputs/class_names.txt new file mode 100644 index 0000000000000000000000000000000000000000..dec69bf78fbb98cde124d68fd9babfe184714a51 --- /dev/null +++ b/docs/annotation/jingling_demo/outputs/class_names.txt @@ -0,0 +1,2 @@ +_background_ +person \ No newline at end of file diff --git a/docs/annotation/labelme2seg.md b/docs/annotation/labelme2seg.md index e501b3706b59b73b83ea5b651c3cb63cd6f52051..a270591d06131ec48f4ebb0d25ec206031956a24 100644 --- a/docs/annotation/labelme2seg.md +++ b/docs/annotation/labelme2seg.md @@ -47,7 +47,7 @@ git clone https://github.com/wkentaro/labelme ​ (3) 图片中所有目标的标注都完成后,点击`Save`保存json文件,**请将json文件和图片放在同一个文件夹里**,点击`Next Image`标注下一张图片。 -LableMe产出的真值文件可参考我们给出的文件夹`data_annotated`。 +LableMe产出的真值文件可参考我们给出的文件夹`docs/annotation/labelme_demo`。
@@ -65,21 +65,24 @@ LableMe产出的真值文件可参考我们给出的文件夹`data_annotated`。 ## 3 数据格式转换 -* 我们用于完成语义分割的数据集目录结构如下: +* 经过数据格式转换后的数据集目录结构如下: ``` my_dataset # 根目录 - |-- JPEGImages # 数据集图片 - |-- SegmentationClassPNG # 数据集真值 + |-- annotations # 数据集真值 | |-- xxx.png # 像素级别的真值信息 | |... |-- class_names.txt # 数据集的类别名称 + |-- xxx.jpg(png or other) # 数据集原图 + |-- ... + |-- xxx.json # 标注json文件 + |-- ... ```
-

图7 训练所需的数据集目录的结构示意图

+

图7 格式转换后的数据集目录的结构示意图

* 运行转换脚本需要依赖labelme和pillow,如未安装,请先安装。Labelme的具体安装流程请参见[官方安装指南](https://github.com/wkentaro/labelme)。Pillow的安装: @@ -91,14 +94,14 @@ pip install pillow * 运行以下代码,将标注后的数据转换成满足以上格式的数据集: ``` - python labelme2seg.py + python pdseg/tools/labelme2seg.py ``` -其中,``为图片以及LabelMe产出的json文件所在文件夹的目录,``为转换后的数据集所在文件夹的目录。**需注意的是:``不用预先创建,脚本运行时会自动创建,否则会报错。** +其中,``为图片以及LabelMe产出的json文件所在文件夹的目录,同时也是转换后的标注集所在文件夹的目录。 -转换得到的数据集可参考我们给出的文件夹`my_dataset`。其中,文件`class_names.txt`是数据集中所有标注类别的名称,包含背景类;文件夹`JPEGImages`保存的是数据集的图片;文件夹`SegmentationClassPNG`保存的是各图片的像素级别的真值信息,背景类`_background_`对应为0,其它目标类别从1开始递增,至多为255。 +转换得到的数据集可参考我们给出的文件夹`docs/annotation/labelme_demo`。其中,文件`class_names.txt`是数据集中所有标注类别的名称,包含背景类;文件夹`annotations`保存的是各图片的像素级别的真值信息,背景类`_background_`对应为0,其它目标类别从1开始递增,至多为255。
-

图8 训练所需的数据集各目录的内容示意图

+

图8 格式转换后的数据集各目录的内容示意图

diff --git a/docs/annotation/data_annotated/2011_000025.jpg b/docs/annotation/labelme_demo/2011_000025.jpg similarity index 100% rename from docs/annotation/data_annotated/2011_000025.jpg rename to docs/annotation/labelme_demo/2011_000025.jpg diff --git a/docs/annotation/data_annotated/2011_000025.json b/docs/annotation/labelme_demo/2011_000025.json similarity index 100% rename from docs/annotation/data_annotated/2011_000025.json rename to docs/annotation/labelme_demo/2011_000025.json diff --git a/docs/annotation/labelme_demo/annotations/2011_000025.png b/docs/annotation/labelme_demo/annotations/2011_000025.png new file mode 100644 index 0000000000000000000000000000000000000000..dcf7c96517d4870f6e83293cef62e3285e5b37e3 Binary files /dev/null and b/docs/annotation/labelme_demo/annotations/2011_000025.png differ diff --git a/docs/annotation/my_dataset/class_names.txt b/docs/annotation/labelme_demo/class_names.txt similarity index 100% rename from docs/annotation/my_dataset/class_names.txt rename to docs/annotation/labelme_demo/class_names.txt diff --git a/docs/data_prepare.md b/docs/data_prepare.md index 87d0ced32811c3772d1e7cafdee00fcb2e1a148d..50864a730a534c4a0e5eba84fb11dfb1bb9c542d 100644 --- a/docs/data_prepare.md +++ b/docs/data_prepare.md @@ -75,7 +75,7 @@ PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试 不可在`DATASET.TRAIN_FILE_LIST`和`DATASET.VAL_FILE_LIST`配置项中使用。 -完整的配置信息可以参考[`./dataset/cityscapes_demo`](../dataset/cityscapes_demo/)目录下的yaml和文件列表。 +完整的配置信息可以参考[`./docs/annotation/cityscapes_demo`](../docs/annotation/cityscapes_demo/)目录下的yaml和文件列表。 ### 文件列表生成 PaddleSeg提供了生成文件列表的使用脚本,可适用于自定义数据集或cityscapes数据集,并支持通过不同的Flags来开启特定功能。 diff --git a/docs/imgs/annotation/image-5.png b/docs/imgs/annotation/image-5.png index 055431d3f86b21c8e07270220821954bb9ad9aba..a6adf36bde6c390c774020045a771a1d5416320d 100644 Binary files a/docs/imgs/annotation/image-5.png and b/docs/imgs/annotation/image-5.png differ diff --git a/docs/imgs/annotation/image-6-2.png b/docs/imgs/annotation/image-6-2.png new file mode 100644 index 0000000000000000000000000000000000000000..3906c420691bc443eec228a94c0e19be6883564a Binary files /dev/null and b/docs/imgs/annotation/image-6-2.png differ diff --git a/docs/imgs/annotation/image-6.png b/docs/imgs/annotation/image-6.png index 7f0552aa182d0d30eb9f747e358cf1dc47249c8c..8660ff8449a890e0c0e740593abe11d15f816b39 100644 Binary files a/docs/imgs/annotation/image-6.png and b/docs/imgs/annotation/image-6.png differ diff --git a/docs/imgs/annotation/image-7.png b/docs/imgs/annotation/image-7.png index 2bc219690f2aa1f00cf748db5ac1b1d7df5855c4..b65d56e92b2b5c1633f5c3168eee2971b476e8f3 100644 Binary files a/docs/imgs/annotation/image-7.png and b/docs/imgs/annotation/image-7.png differ diff --git a/docs/imgs/annotation/jingling-5.png b/docs/imgs/annotation/jingling-5.png index 54bfb1e03f89078873cae11704d6d6cc6eb7cd2c..59a15567a3e25df338a3577fe9a9035c5bd0c719 100644 Binary files a/docs/imgs/annotation/jingling-5.png and b/docs/imgs/annotation/jingling-5.png differ diff --git a/docs/installation.md b/docs/installation.md index 9f2bbb54e2e2c9fd2184a996ca7ca0fb146e1185..80cc341bb8764065dc7fd871e81fdb31225d636a 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -3,9 +3,11 @@ ## 1. 安装PaddlePaddle 版本要求 -* PaddlePaddle >= 1.5.2 +* PaddlePaddle >= 1.6.1 * Python 2.7 or 3.5+ +更多详细安装信息如CUDA版本、cuDNN版本等兼容信息请查看[PaddlePaddle安装](https://www.paddlepaddle.org.cn/install/doc/index) + ### pip安装 由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用PaddleSeg. diff --git a/docs/multiple_gpus_train_and_mixed_precision_train.md b/docs/multiple_gpus_train_and_mixed_precision_train.md index b551e75bfc278a38710de0e0a85a06da69a2bce2..7826d88171bec71cba7ae2db9327ce3dfd47efd9 100644 --- a/docs/multiple_gpus_train_and_mixed_precision_train.md +++ b/docs/multiple_gpus_train_and_mixed_precision_train.md @@ -1,7 +1,7 @@ # PaddleSeg 多进程训练和混合精度训练 ### 环境要求 -* PaddlePaddle >= 1.6.0 +* PaddlePaddle >= 1.6.1 * NVIDIA NCCL >= 2.4.7 环境配置,数据,预训练模型准备等工作请参考[安装说明](./installation.md),[PaddleSeg使用说明](./usage.md) diff --git a/docs/usage.md b/docs/usage.md index f3c2bd297b768a070ee56ec330ea6e21d5405dcb..e38d16e047b4b97a71278b1ba17682d20c4586ee 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -101,7 +101,7 @@ tensorboard --logdir train_log --host {$HOST_IP} --port {$PORT} ``` NOTE: -1. 上述示例中,$HOST_IP为机器IP地址,请替换为实际IP,$PORT请替换为可访问的端口 +1. 上述示例中,$HOST\_IP为机器IP地址,请替换为实际IP,$PORT请替换为可访问的端口 2. 数据量较大时,前端加载速度会比较慢,请耐心等待 启动TensorBoard命令后,我们可以在浏览器中查看对应的训练数据 diff --git a/inference/demo.cpp b/inference/demo.cpp index 657d4f4244069d0a59c4dee7827d047a375f2741..2202b31b739bf18682fdf468b36ffe4e9e434726 100644 --- a/inference/demo.cpp +++ b/inference/demo.cpp @@ -1,3 +1,17 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include #include #include @@ -9,7 +23,8 @@ int main(int argc, char** argv) { // 0. parse args google::ParseCommandLineFlags(&argc, &argv, true); if (FLAGS_conf.empty() || FLAGS_input_dir.empty()) { - std::cout << "Usage: ./predictor --conf=/config/path/to/your/model --input_dir=/directory/of/your/input/images"; + std::cout << "Usage: ./predictor --conf=/config/path/to/your/model " + << "--input_dir=/directory/of/your/input/images"; return -1; } // 1. create a predictor and init it with conf @@ -20,7 +35,8 @@ int main(int argc, char** argv) { } // 2. get all the images with extension '.jpeg' at input_dir - auto imgs = PaddleSolution::utils::get_directory_images(FLAGS_input_dir, ".jpeg|.jpg"); + auto imgs = PaddleSolution::utils::get_directory_images(FLAGS_input_dir, + ".jpeg|.jpg"); // 3. predict predictor.predict(imgs); return 0; diff --git a/inference/docs/linux_build.md b/inference/docs/linux_build.md index 613317c5279a142df3990acb20c60fb424be046f..75a16bbea7499eaa007884b0b1f16126eacca56a 100644 --- a/inference/docs/linux_build.md +++ b/inference/docs/linux_build.md @@ -6,7 +6,8 @@ ## 前置条件 * G++ 4.8.2 ~ 4.9.4 * CMake 3.0+ -* CUDA 8.0 / CUDA 9.0 / CUDA 10.0, cudnn 7+ (仅在使用GPU版本的预测库时需要) +* CUDA 9.0 / CUDA 10.0, cudnn 7+ (仅在使用GPU版本的预测库时需要) +* CentOS 7.6, Ubuntu 16.04, Ubuntu 18.04 (均在以上系统验证过) 请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `/root/projects/`演示**。 @@ -20,17 +21,16 @@ ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference -PaddlePaddle C++ 预测库主要分为CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为三个版本预测库:CUDA 8、CUDA 9和CUDA 10版本预测库。以下为各版本C++预测库的下载链接: +PaddlePaddle C++ 预测库主要分为CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为两个版本预测库:CUDA 9.0和CUDA 10.0版本预测库。以下为各版本C++预测库的下载链接: | 版本 | 链接 | | ---- | ---- | -| CPU版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/latest-cpu-avx-mkl/fluid_inference.tgz) | -| CUDA 8版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/latest-gpu-cuda8-cudnn7-avx-mkl/fluid_inference.tgz) | -| CUDA 9版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/latest-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) | -| CUDA 10版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/latest-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz) | +| CPU版本 | [fluid_inference.tgz](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_linux_cpu_1.6.1.tgz) | +| CUDA 9.0版本 | [fluid_inference.tgz](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_linux_cuda97_1.6.1.tgz) | +| CUDA 10.0版本 | [fluid_inference.tgz](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_linux_cuda10_1.6.1.tgz) | -针对不同的CPU类型、不同的指令集,官方提供更多可用的预测库版本,目前已经推出1.6版本的预测库,具体请参考以下链接:[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html) +针对不同的CPU类型、不同的指令集,官方提供更多可用的预测库版本,目前已经推出1.6版本的预测库。其余版本具体请参考以下链接:[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html) 下载并解压后`/root/projects/fluid_inference`目录包含内容为: @@ -63,7 +63,7 @@ make install ### Step4: 编译 -`CMake`编译时,涉及到四个编译参数用于指定核心依赖库的路径, 他们的定义如下:(带*表示仅在使用**GPU版本**预测库时指定) +`CMake`编译时,涉及到四个编译参数用于指定核心依赖库的路径, 他们的定义如下:(带*表示仅在使用**GPU版本**预测库时指定,其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1版本CUDA库**) | 参数名 | 含义 | | ---- | ---- | @@ -84,6 +84,7 @@ make 在使用**CPU版本**预测库进行编译时,可执行下列操作。 ```shell cd /root/projects/PaddleSeg/inference + mkdir build && cd build cmake .. -DWITH_GPU=OFF -DPADDLE_DIR=/root/projects/fluid_inference -DOPENCV_DIR=/root/projects/opencv3/ -DWITH_STATIC_LIB=OFF make @@ -97,4 +98,4 @@ make ./demo --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory ``` -更详细说明请参考README文档: [预测和可视化部分](../README.md) +更详细说明请参考README文档: [预测和可视化部分](../README.md) \ No newline at end of file diff --git a/inference/docs/windows_vs2015_build.md b/inference/docs/windows_vs2015_build.md index 360e70558d06765863bab0d0ae652847befbabd4..f0c96a18b1204b434653be8cd29dce57d229d10c 100644 --- a/inference/docs/windows_vs2015_build.md +++ b/inference/docs/windows_vs2015_build.md @@ -5,7 +5,7 @@ ## 前置条件 * Visual Studio 2015 -* CUDA 8.0/ CUDA 9.0/ CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) +* CUDA 9.0 / CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) * CMake 3.0+ 请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `D:\projects`演示**。 @@ -20,14 +20,13 @@ ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference -PaddlePaddle C++ 预测库主要分为两大版本:CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为三个版本预测库:CUDA 8、CUDA 9和CUDA 10版本预测库。根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录。以下为各版本C++预测库(CUDA 8版本基于1.5版本的预测库,其余均基于1.6版本的预测库)的下载链接: +PaddlePaddle C++ 预测库主要分为两大版本:CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为两个版本预测库:CUDA 9.0和CUDA 10.0版本预测库。根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录。以下为各版本C++预测库的下载链接: | 版本 | 链接 | | ---- | ---- | -| CPU版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.0/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | -| CUDA 8版本 | [fluid_inference_install_dir.zip](https://paddle-inference-lib.bj.bcebos.com/1.5.1-win/gpu_mkl_avx_8.0/fluid_inference_install_dir.zip) | -| CUDA 9版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.0/win-infer/mkl/post97/fluid_inference_install_dir.zip) | -| CUDA 10版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.0/win-infer/mkl/post107/fluid_inference_install_dir.zip) | +| CPU版本 | [fluid_inference_install_dir.zip](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_install_dir_win_cpu_1.6.zip) | +| CUDA 9.0版本 | [fluid_inference_install_dir.zip](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_install_dir_win_cuda9_1.6.1.zip) | +| CUDA 10.0版本 | [fluid_inference_install_dir.zip](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_install_dir_win_cuda10_1.6.1.zip) | 解压后`D:\projects\fluid_inference`目录包含内容为: ``` @@ -59,31 +58,36 @@ fluid_inference call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 ``` -* CMAKE编译工程 (带*表示仅在使用**GPU版本**预测库时指定) - * PADDLE_DIR: fluid_inference预测库路径 - * *CUDA_LIB: CUDA动态库目录, 请根据实际安装情况调整 - * OPENCV_DIR: OpenCV解压目录 +三个编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): -在使用**GPU版本**预测库进行编译时,可执行下列操作。 -``` +| 参数名 | 含义 | +| ---- | ---- | +| *CUDA_LIB | CUDA的库路径 | +| OPENCV_DIR | OpenCV的安装路径 | +| PADDLE_DIR | Paddle预测库的路径 | + +在使用**GPU版本**预测库进行编译时,可执行下列操作。**注意**把对应的参数改为你的上述依赖库实际路径: + +```bash # 切换到预测库所在目录 cd /d D:\projects\PaddleSeg\inference\ # 创建构建目录, 重新构建只需要删除该目录即可 mkdir build cd build # cmake构建VS项目 -D:\projects\PaddleSeg\inference\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DPADDLE_DIR=D:\projects\fluid_inference -DCUDA_LIB=D:\projects\cudalib\v8.0\lib\x64 -DOPENCV_DIR=D:\projects\opencv -T host=x64 +D:\projects\PaddleSeg\inference\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DPADDLE_DIR=D:\projects\fluid_inference -DCUDA_LIB=D:\projects\cudalib\v9.0\lib\x64 -DOPENCV_DIR=D:\projects\opencv -T host=x64 ``` 在使用**CPU版本**预测库进行编译时,可执行下列操作。 -``` + +```bash # 切换到预测库所在目录 cd /d D:\projects\PaddleSeg\inference\ # 创建构建目录, 重新构建只需要删除该目录即可 mkdir build cd build # cmake构建VS项目 -D:\projects\PaddleSeg\inference\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DPADDLE_DIR=D:\projects\fluid_inference -DOPENCV_DIR=D:\projects\opencv -T host=x64 +D:\projects\PaddleSeg\inference\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=OFF -DPADDLE_DIR=D:\projects\fluid_inference -DOPENCV_DIR=D:\projects\opencv -T host=x64 ``` 这里的`cmake`参数`-G`, 表示生成对应的VS版本的工程,可以根据自己的`VS`版本调整,具体请参考[cmake文档](https://cmake.org/cmake/help/v3.15/manual/cmake-generators.7.html) diff --git a/inference/docs/windows_vs2019_build.md b/inference/docs/windows_vs2019_build.md index d7bf5d022d1c166aac58f0f6fb305964d0efb4d0..890844674db848177c4f859f4a8b8ef8d7360fa7 100644 --- a/inference/docs/windows_vs2019_build.md +++ b/inference/docs/windows_vs2019_build.md @@ -6,7 +6,7 @@ Windows 平台下,我们使用`Visual Studio 2015` 和 `Visual Studio 2019 Com ## 前置条件 * Visual Studio 2019 -* CUDA 8.0/ CUDA 9.0/ CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) +* CUDA 9.0/ CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) * CMake 3.0+ 请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 @@ -15,7 +15,7 @@ Windows 平台下,我们使用`Visual Studio 2015` 和 `Visual Studio 2019 Com ### Step1: 下载代码 -1. 点击下载源代码:[下载地址](https://github.com/PaddlePaddle/PaddleSeg/archive/master.zip) +1. 点击下载源代码:[下载地址](https://github.com/PaddlePaddle/PaddleSeg/archive/release/v0.2.0.zip) 2. 解压,解压后目录重命名为`PaddleSeg` 以下代码目录路径为`D:\projects\PaddleSeg` 为例。 @@ -23,14 +23,13 @@ Windows 平台下,我们使用`Visual Studio 2015` 和 `Visual Studio 2019 Com ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference -PaddlePaddle C++ 预测库主要分为两大版本:CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为三个版本预测库:CUDA 8、CUDA 9和CUDA 10版本预测库。根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录。以下为各版本C++预测库(CUDA 8版本基于1.5版本的预测库,其余均基于1.6版本的预测库)的下载链接: +PaddlePaddle C++ 预测库主要分为两大版本:CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为三个版本预测库:CUDA 9.0和CUDA 10.0版本预测库。根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录。以下为各版本C++预测库的下载链接: | 版本 | 链接 | | ---- | ---- | -| CPU版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.0/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | -| CUDA 8版本 | [fluid_inference_install_dir.zip](https://paddle-inference-lib.bj.bcebos.com/1.5.1-win/gpu_mkl_avx_8.0/fluid_inference_install_dir.zip) | -| CUDA 9版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.0/win-infer/mkl/post97/fluid_inference_install_dir.zip) | -| CUDA 10版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.0/win-infer/mkl/post107/fluid_inference_install_dir.zip) | +| CPU版本 | [fluid_inference_install_dir.zip](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_install_dir_win_cpu_1.6.zip) | +| CUDA 9.0版本 | [fluid_inference_install_dir.zip](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_install_dir_win_cuda9_1.6.1.zip) | +| CUDA 10.0版本 | [fluid_inference_install_dir.zip](https://bj.bcebos.com/paddlehub/paddle_inference_lib/fluid_inference_install_dir_win_cuda10_1.6.1.zip) | 解压后`D:\projects\fluid_inference`目录包含内容为: ``` @@ -68,12 +67,12 @@ fluid_inference 4. 点击`浏览`,分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径 -三个编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定): +三个编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): | 参数名 | 含义 | | ---- | ---- | -| *CUDA_LIB | cuda的库路径 | -| OPENCV_DIR | OpenCV的安装路径, | +| *CUDA_LIB | CUDA的库路径 | +| OPENCV_DIR | OpenCV的安装路径 | | PADDLE_DIR | Paddle预测库的路径 | **注意**在使用CPU版本预测库时,需要把CUDA_LIB的勾去掉。 ![step4](https://paddleseg.bj.bcebos.com/inference/vs2019_step5.png) @@ -90,7 +89,7 @@ fluid_inference 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: ``` -cd /d D:\projects\PaddleSeg\inference\out\x64-Release +cd /d D:\projects\PaddleSeg\inference\out\build\x64-Release ``` 之后执行命令: diff --git a/inference/predictor/seg_predictor.cpp b/inference/predictor/seg_predictor.cpp index 5488ec946c96aaed543221a9f5d64be77092fba2..ca7673edb13afcbbb0fe35ed154729009993c01b 100644 --- a/inference/predictor/seg_predictor.cpp +++ b/inference/predictor/seg_predictor.cpp @@ -1,275 +1,317 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "seg_predictor.h" #include #undef min namespace PaddleSolution { + using std::chrono::duration_cast; + int Predictor::init(const std::string& conf) { + if (!_model_config.load_config(conf)) { + LOG(FATAL) << "Fail to load config file: [" << conf << "]"; + return -1; + } + _preprocessor = PaddleSolution::create_processor(conf); + if (_preprocessor == nullptr) { + LOG(FATAL) << "Failed to create_processor"; + return -1; + } - int Predictor::init(const std::string& conf) { - if (!_model_config.load_config(conf)) { - LOG(FATAL) << "Fail to load config file: [" << conf << "]"; - return -1; - } - _preprocessor = PaddleSolution::create_processor(conf); - if (_preprocessor == nullptr) { - LOG(FATAL) << "Failed to create_processor"; - return -1; - } - - _mask.resize(_model_config._resize[0] * _model_config._resize[1]); - _scoremap.resize(_model_config._resize[0] * _model_config._resize[1]); + int res_size = _model_config._resize[0] * _model_config._resize[1]; + _mask.resize(res_size); + _scoremap.resize(res_size); - bool use_gpu = _model_config._use_gpu; - const auto& model_dir = _model_config._model_path; - const auto& model_filename = _model_config._model_file_name; - const auto& params_filename = _model_config._param_file_name; + bool use_gpu = _model_config._use_gpu; + const auto& model_dir = _model_config._model_path; + const auto& model_filename = _model_config._model_file_name; + const auto& params_filename = _model_config._param_file_name; - // load paddle model file - if (_model_config._predictor_mode == "NATIVE") { - paddle::NativeConfig config; - auto prog_file = utils::path_join(model_dir, model_filename); - auto param_file = utils::path_join(model_dir, params_filename); - config.prog_file = prog_file; - config.param_file = param_file; - config.fraction_of_gpu_memory = 0; - config.use_gpu = use_gpu; - config.device = 0; - _main_predictor = paddle::CreatePaddlePredictor(config); - } - else if (_model_config._predictor_mode == "ANALYSIS") { - paddle::AnalysisConfig config; - if (use_gpu) { - config.EnableUseGpu(100, 0); - } - auto prog_file = utils::path_join(model_dir, model_filename); - auto param_file = utils::path_join(model_dir, params_filename); - config.SetModel(prog_file, param_file); - config.SwitchUseFeedFetchOps(false); - config.SwitchSpecifyInputNames(true); - config.EnableMemoryOptim(); - _main_predictor = paddle::CreatePaddlePredictor(config); + // load paddle model file + if (_model_config._predictor_mode == "NATIVE") { + paddle::NativeConfig config; + auto prog_file = utils::path_join(model_dir, model_filename); + auto param_file = utils::path_join(model_dir, params_filename); + config.prog_file = prog_file; + config.param_file = param_file; + config.fraction_of_gpu_memory = 0; + config.use_gpu = use_gpu; + config.device = 0; + _main_predictor = paddle::CreatePaddlePredictor(config); + } else if (_model_config._predictor_mode == "ANALYSIS") { + paddle::AnalysisConfig config; + if (use_gpu) { + config.EnableUseGpu(100, 0); } - else { - return -1; - } - return 0; + auto prog_file = utils::path_join(model_dir, model_filename); + auto param_file = utils::path_join(model_dir, params_filename); + config.SetModel(prog_file, param_file); + config.SwitchUseFeedFetchOps(false); + config.SwitchSpecifyInputNames(true); + config.EnableMemoryOptim(); + _main_predictor = paddle::CreatePaddlePredictor(config); + } else { + return -1; + } + return 0; + } + int Predictor::predict(const std::vector& imgs) { + if (_model_config._predictor_mode == "NATIVE") { + return native_predict(imgs); + } else if (_model_config._predictor_mode == "ANALYSIS") { + return analysis_predict(imgs); } + return -1; + } - int Predictor::predict(const std::vector& imgs) { - if (_model_config._predictor_mode == "NATIVE") { - return native_predict(imgs); - } - else if (_model_config._predictor_mode == "ANALYSIS") { - return analysis_predict(imgs); - } + int Predictor::output_mask(const std::string& fname, float* p_out, + int length, int* height, int* width) { + int eval_width = _model_config._resize[0]; + int eval_height = _model_config._resize[1]; + int eval_num_class = _model_config._class_num; + + int blob_out_len = length; + int seg_out_len = eval_height * eval_width * eval_num_class; + + if (blob_out_len != seg_out_len) { + LOG(ERROR) << " [FATAL] unequal: input vs output [" << + seg_out_len << "|" << blob_out_len << "]" << std::endl; return -1; } + // post process + _mask.clear(); + _scoremap.clear(); + std::vector out_shape{eval_num_class, eval_height, eval_width}; + utils::argmax(p_out, out_shape, _mask, _scoremap); + cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); + mask_png.data = _mask.data(); + std::string nname(fname); + auto pos = fname.find("."); + nname[pos] = '_'; + std::string mask_save_name = nname + ".png"; + cv::imwrite(mask_save_name, mask_png); + cv::Mat scoremap_png = cv::Mat(eval_height, eval_width, CV_8UC1); + scoremap_png.data = _scoremap.data(); + std::string scoremap_save_name = nname + + std::string("_scoremap.png"); + cv::imwrite(scoremap_save_name, scoremap_png); + std::cout << "save mask of [" << fname << "] done" << std::endl; - int Predictor::output_mask(const std::string& fname, float* p_out, int length, int* height, int* width) { - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - int eval_num_class = _model_config._class_num; + if (height && width) { + int recover_height = *height; + int recover_width = *width; + cv::Mat recover_png = cv::Mat(recover_height, + recover_width, CV_8UC1); + cv::resize(scoremap_png, recover_png, + cv::Size(recover_width, recover_height), + 0, 0, cv::INTER_CUBIC); + std::string recover_name = nname + std::string("_recover.png"); + cv::imwrite(recover_name, recover_png); + } + return 0; + } - int blob_out_len = length; - int seg_out_len = eval_height * eval_width * eval_num_class; + int Predictor::native_predict(const std::vector& imgs) { + if (imgs.size() == 0) { + LOG(ERROR) << "No image found"; + return -1; + } + int config_batch_size = _model_config._batch_size; - if (blob_out_len != seg_out_len) { - LOG(ERROR) << " [FATAL] unequal: input vs output [" << - seg_out_len << "|" << blob_out_len << "]" << std::endl; - return -1; - } + int channels = _model_config._channels; + int eval_width = _model_config._resize[0]; + int eval_height = _model_config._resize[1]; + std::size_t total_size = imgs.size(); + int default_batch_size = std::min(config_batch_size, + static_cast(total_size)); + int batch = total_size / default_batch_size + + ((total_size % default_batch_size) != 0); + int batch_buffer_size = default_batch_size * channels + * eval_width * eval_height; - //post process - _mask.clear(); - _scoremap.clear(); - std::vector out_shape{eval_num_class, eval_height, eval_width}; - utils::argmax(p_out, out_shape, _mask, _scoremap); - cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); - mask_png.data = _mask.data(); - std::string nname(fname); - auto pos = fname.find("."); - nname[pos] = '_'; - std::string mask_save_name = nname + ".png"; - cv::imwrite(mask_save_name, mask_png); - cv::Mat scoremap_png = cv::Mat(eval_height, eval_width, CV_8UC1); - scoremap_png.data = _scoremap.data(); - std::string scoremap_save_name = nname + std::string("_scoremap.png"); - cv::imwrite(scoremap_save_name, scoremap_png); - std::cout << "save mask of [" << fname << "] done" << std::endl; + auto& input_buffer = _buffer; + auto& org_width = _org_width; + auto& org_height = _org_height; + auto& imgs_batch = _imgs_batch; - if (height && width) { - int recover_height = *height; - int recover_width = *width; - cv::Mat recover_png = cv::Mat(recover_height, recover_width, CV_8UC1); - cv::resize(scoremap_png, recover_png, cv::Size(recover_width, recover_height), - 0, 0, cv::INTER_CUBIC); - std::string recover_name = nname + std::string("_recover.png"); - cv::imwrite(recover_name, recover_png); + input_buffer.resize(batch_buffer_size); + org_width.resize(default_batch_size); + org_height.resize(default_batch_size); + for (int u = 0; u < batch; ++u) { + int batch_size = default_batch_size; + if (u == (batch - 1) && (total_size % default_batch_size)) { + batch_size = total_size % default_batch_size; } - return 0; - } - int Predictor::native_predict(const std::vector& imgs) - { - if (imgs.size() == 0) { - LOG(ERROR) << "No image found"; + int real_buffer_size = batch_size * channels + * eval_width * eval_height; + std::vector feeds; + input_buffer.resize(real_buffer_size); + org_height.resize(batch_size); + org_width.resize(batch_size); + for (int i = 0; i < batch_size; ++i) { + org_width[i] = org_height[i] = 0; + } + imgs_batch.clear(); + for (int i = 0; i < batch_size; ++i) { + int idx = u * default_batch_size + i; + imgs_batch.push_back(imgs[idx]); + } + if (!_preprocessor->batch_process(imgs_batch, + input_buffer.data(), + org_width.data(), + org_height.data())) { return -1; } - int config_batch_size = _model_config._batch_size; - - int channels = _model_config._channels; - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - std::size_t total_size = imgs.size(); - int default_batch_size = std::min(config_batch_size, (int)total_size); - int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0); - int batch_buffer_size = default_batch_size * channels * eval_width * eval_height; - - auto& input_buffer = _buffer; - auto& org_width = _org_width; - auto& org_height = _org_height; - auto& imgs_batch = _imgs_batch; - - input_buffer.resize(batch_buffer_size); - org_width.resize(default_batch_size); - org_height.resize(default_batch_size); - for (int u = 0; u < batch; ++u) { - int batch_size = default_batch_size; - if (u == (batch - 1) && (total_size % default_batch_size)) { - batch_size = total_size % default_batch_size; - } - - int real_buffer_size = batch_size * channels * eval_width * eval_height; - std::vector feeds; - input_buffer.resize(real_buffer_size); - org_height.resize(batch_size); - org_width.resize(batch_size); - for (int i = 0; i < batch_size; ++i) { - org_width[i] = org_height[i] = 0; - } - imgs_batch.clear(); - for (int i = 0; i < batch_size; ++i) { - int idx = u * default_batch_size + i; - imgs_batch.push_back(imgs[idx]); - } - if (!_preprocessor->batch_process(imgs_batch, input_buffer.data(), org_width.data(), org_height.data())) { - return -1; - } - paddle::PaddleTensor im_tensor; - im_tensor.name = "image"; - im_tensor.shape = std::vector({ batch_size, channels, eval_height, eval_width }); - im_tensor.data.Reset(input_buffer.data(), real_buffer_size * sizeof(float)); - im_tensor.dtype = paddle::PaddleDType::FLOAT32; - feeds.push_back(im_tensor); - _outputs.clear(); - auto t1 = std::chrono::high_resolution_clock::now(); - if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { - LOG(ERROR) << "Failed: NativePredictor->Run() return false at batch: " << u; - continue; - } - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "runtime = " << duration << std::endl; - int out_num = 1; - // print shape of first output tensor for debugging - std::cout << "size of outputs[" << 0 << "]: ("; - for (int j = 0; j < _outputs[0].shape.size(); ++j) { - out_num *= _outputs[0].shape[j]; - std::cout << _outputs[0].shape[j] << ","; - } - std::cout << ")" << std::endl; - const size_t nums = _outputs.front().data.length() / sizeof(float); - if (out_num % batch_size != 0 || out_num != nums) { - LOG(ERROR) << "outputs data size mismatch with shape size."; - return -1; - } - - for (int i = 0; i < batch_size; ++i) { - float* output_addr = (float*)(_outputs[0].data.data()) + i * (out_num / batch_size); - output_mask(imgs_batch[i], output_addr, out_num / batch_size, &org_height[i], &org_width[i]); - } + paddle::PaddleTensor im_tensor; + im_tensor.name = "image"; + im_tensor.shape = std::vector{ batch_size, channels, + eval_height, eval_width }; + im_tensor.data.Reset(input_buffer.data(), + real_buffer_size * sizeof(float)); + im_tensor.dtype = paddle::PaddleDType::FLOAT32; + feeds.push_back(im_tensor); + _outputs.clear(); + auto t1 = std::chrono::high_resolution_clock::now(); + if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { + LOG(ERROR) << + "Failed: NativePredictor->Run() return false at batch: " + << u; + continue; + } + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = duration_cast + (t2 - t1).count(); + std::cout << "runtime = " << duration << std::endl; + int out_num = 1; + // print shape of first output tensor for debugging + std::cout << "size of outputs[" << 0 << "]: ("; + for (int j = 0; j < _outputs[0].shape.size(); ++j) { + out_num *= _outputs[0].shape[j]; + std::cout << _outputs[0].shape[j] << ","; + } + std::cout << ")" << std::endl; + const size_t nums = _outputs.front().data.length() + / sizeof(float); + if (out_num % batch_size != 0 || out_num != nums) { + LOG(ERROR) << "outputs data size mismatch with shape size."; + return -1; } - return 0; + for (int i = 0; i < batch_size; ++i) { + float* output_addr = reinterpret_cast( + _outputs[0].data.data()) + + i * (out_num / batch_size); + output_mask(imgs_batch[i], output_addr, + out_num / batch_size, + &org_height[i], + &org_width[i]); + } } - int Predictor::analysis_predict(const std::vector& imgs) { + return 0; + } - if (imgs.size() == 0) { - LOG(ERROR) << "No image found"; - return -1; - } + int Predictor::analysis_predict(const std::vector& imgs) { + if (imgs.size() == 0) { + LOG(ERROR) << "No image found"; + return -1; + } - int config_batch_size = _model_config._batch_size; - int channels = _model_config._channels; - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - auto total_size = imgs.size(); - int default_batch_size = std::min(config_batch_size, (int)total_size); - int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0); - int batch_buffer_size = default_batch_size * channels * eval_width * eval_height; + int config_batch_size = _model_config._batch_size; + int channels = _model_config._channels; + int eval_width = _model_config._resize[0]; + int eval_height = _model_config._resize[1]; + auto total_size = imgs.size(); + int default_batch_size = std::min(config_batch_size, + static_cast(total_size)); + int batch = total_size / default_batch_size + + ((total_size % default_batch_size) != 0); + int batch_buffer_size = default_batch_size * channels + * eval_width * eval_height; - auto& input_buffer = _buffer; - auto& org_width = _org_width; - auto& org_height = _org_height; - auto& imgs_batch = _imgs_batch; + auto& input_buffer = _buffer; + auto& org_width = _org_width; + auto& org_height = _org_height; + auto& imgs_batch = _imgs_batch; - input_buffer.resize(batch_buffer_size); - org_width.resize(default_batch_size); - org_height.resize(default_batch_size); + input_buffer.resize(batch_buffer_size); + org_width.resize(default_batch_size); + org_height.resize(default_batch_size); - for (int u = 0; u < batch; ++u) { - int batch_size = default_batch_size; - if (u == (batch - 1) && (total_size % default_batch_size)) { - batch_size = total_size % default_batch_size; - } + for (int u = 0; u < batch; ++u) { + int batch_size = default_batch_size; + if (u == (batch - 1) && (total_size % default_batch_size)) { + batch_size = total_size % default_batch_size; + } - int real_buffer_size = batch_size * channels * eval_width * eval_height; - std::vector feeds; - input_buffer.resize(real_buffer_size); - org_height.resize(batch_size); - org_width.resize(batch_size); - for (int i = 0; i < batch_size; ++i) { - org_width[i] = org_height[i] = 0; - } - imgs_batch.clear(); - for (int i = 0; i < batch_size; ++i) { - int idx = u * default_batch_size + i; - imgs_batch.push_back(imgs[idx]); - } + int real_buffer_size = batch_size * channels + * eval_width * eval_height; + std::vector feeds; + input_buffer.resize(real_buffer_size); + org_height.resize(batch_size); + org_width.resize(batch_size); + for (int i = 0; i < batch_size; ++i) { + org_width[i] = org_height[i] = 0; + } + imgs_batch.clear(); + for (int i = 0; i < batch_size; ++i) { + int idx = u * default_batch_size + i; + imgs_batch.push_back(imgs[idx]); + } - if (!_preprocessor->batch_process(imgs_batch, input_buffer.data(), org_width.data(), org_height.data())) { - return -1; - } - auto im_tensor = _main_predictor->GetInputTensor("image"); - im_tensor->Reshape({ batch_size, channels, eval_height, eval_width }); - im_tensor->copy_from_cpu(input_buffer.data()); + if (!_preprocessor->batch_process(imgs_batch, + input_buffer.data(), + org_width.data(), + org_height.data())) { + return -1; + } + auto im_tensor = _main_predictor->GetInputTensor("image"); + im_tensor->Reshape({ batch_size, channels, + eval_height, eval_width }); + im_tensor->copy_from_cpu(input_buffer.data()); - auto t1 = std::chrono::high_resolution_clock::now(); - _main_predictor->ZeroCopyRun(); - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "runtime = " << duration << std::endl; + auto t1 = std::chrono::high_resolution_clock::now(); + _main_predictor->ZeroCopyRun(); + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = duration_cast + (t2 - t1).count(); + std::cout << "runtime = " << duration << std::endl; - auto output_names = _main_predictor->GetOutputNames(); - auto output_t = _main_predictor->GetOutputTensor(output_names[0]); - std::vector out_data; - std::vector output_shape = output_t->shape(); + auto output_names = _main_predictor->GetOutputNames(); + auto output_t = _main_predictor->GetOutputTensor( + output_names[0]); + std::vector out_data; + std::vector output_shape = output_t->shape(); - int out_num = 1; - std::cout << "size of outputs[" << 0 << "]: ("; - for (int j = 0; j < output_shape.size(); ++j) { - out_num *= output_shape[j]; - std::cout << output_shape[j] << ","; - } - std::cout << ")" << std::endl; + int out_num = 1; + std::cout << "size of outputs[" << 0 << "]: ("; + for (int j = 0; j < output_shape.size(); ++j) { + out_num *= output_shape[j]; + std::cout << output_shape[j] << ","; + } + std::cout << ")" << std::endl; - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - for (int i = 0; i < batch_size; ++i) { - float* out_addr = out_data.data() + (out_num / batch_size) * i; - output_mask(imgs_batch[i], out_addr, out_num / batch_size, &org_height[i], &org_width[i]); - } + out_data.resize(out_num); + output_t->copy_to_cpu(out_data.data()); + for (int i = 0; i < batch_size; ++i) { + float* out_addr = out_data.data() + + (out_num / batch_size) * i; + output_mask(imgs_batch[i], out_addr, out_num / batch_size, + &org_height[i], &org_width[i]); } - return 0; } -} + return 0; + } +} // namespace PaddleSolution diff --git a/inference/predictor/seg_predictor.h b/inference/predictor/seg_predictor.h index 0217115b81391a9a4d8daa9471f2b50f677fcbe1..ade8c0478830971e0f02f633fef508aefc46bb79 100644 --- a/inference/predictor/seg_predictor.h +++ b/inference/predictor/seg_predictor.h @@ -1,49 +1,59 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #pragma once +#include +#include + #include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include "utils/seg_conf_parser.h" +#include "utils/utils.h" +#include "preprocessor/preprocessor.h" namespace PaddleSolution { - class Predictor { - public: - // init a predictor with a yaml config file - int init(const std::string& conf); - // predict api - int predict(const std::vector& imgs); - - private: - int output_mask( - const std::string& fname, - float* p_out, - int length, - int* height = NULL, - int* width = NULL); - int native_predict(const std::vector& imgs); - int analysis_predict(const std::vector& imgs); - private: - std::vector _buffer; - std::vector _org_width; - std::vector _org_height; - std::vector _imgs_batch; - std::vector _outputs; +class Predictor { + public: + // init a predictor with a yaml config file + int init(const std::string& conf); + // predict api + int predict(const std::vector& imgs); + private: + int output_mask(const std::string& fname, float* p_out, int length, + int* height = NULL, int* width = NULL); + int native_predict(const std::vector& imgs); + int analysis_predict(const std::vector& imgs); + private: + std::vector _buffer; + std::vector _org_width; + std::vector _org_height; + std::vector _imgs_batch; + std::vector _outputs; - std::vector _mask; - std::vector _scoremap; + std::vector _mask; + std::vector _scoremap; - PaddleSolution::PaddleSegModelConfigPaser _model_config; - std::shared_ptr _preprocessor; - std::unique_ptr _main_predictor; - }; -} + PaddleSolution::PaddleSegModelConfigPaser _model_config; + std::shared_ptr _preprocessor; + std::unique_ptr _main_predictor; +}; +} // namespace PaddleSolution diff --git a/inference/preprocessor/preprocessor.cpp b/inference/preprocessor/preprocessor.cpp index f3ce82dc122308941357e1de13fb156459782bc7..9d6d20b5fb69a8d00596f9125b8d84f541bd279e 100644 --- a/inference/preprocessor/preprocessor.cpp +++ b/inference/preprocessor/preprocessor.cpp @@ -1,3 +1,17 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include @@ -7,9 +21,10 @@ namespace PaddleSolution { - std::shared_ptr create_processor(const std::string& conf_file) { - - auto config = std::make_shared(); + std::shared_ptr create_processor( + const std::string& conf_file) { + auto config = std::make_shared(); if (!config->load_config(conf_file)) { LOG(FATAL) << "fail to laod conf file [" << conf_file << "]"; return nullptr; @@ -23,9 +38,9 @@ namespace PaddleSolution { return p; } - LOG(FATAL) << "unknown processor_name [" << config->_pre_processor << "]"; + LOG(FATAL) << "unknown processor_name [" << config->_pre_processor + << "]"; return nullptr; } -} - +} // namespace PaddleSolution diff --git a/inference/preprocessor/preprocessor.h b/inference/preprocessor/preprocessor.h index d94809f008be3da4d66dcab948ba2aa6a8f085c0..4a1372de612e851759ea7f384bc555423f1b237d 100644 --- a/inference/preprocessor/preprocessor.h +++ b/inference/preprocessor/preprocessor.h @@ -1,3 +1,17 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #pragma once #include #include @@ -12,18 +26,19 @@ namespace PaddleSolution { class ImagePreProcessor { -protected: - ImagePreProcessor() {}; - -public: + protected: + ImagePreProcessor() {} + public: virtual ~ImagePreProcessor() {} - virtual bool single_process(const std::string& fname, float* data, int* ori_w, int* ori_h) = 0; - - virtual bool batch_process(const std::vector& imgs, float* data, int* ori_w, int* ori_h) = 0; + virtual bool single_process(const std::string& fname, float* data, + int* ori_w, int* ori_h) = 0; -}; // end of class ImagePreProcessor + virtual bool batch_process(const std::vector& imgs, + float* data, int* ori_w, int* ori_h) = 0; +}; // end of class ImagePreProcessor -std::shared_ptr create_processor(const std::string &config_file); +std::shared_ptr create_processor( + const std::string &config_file); -} // end of namespace paddle_solution +} // namespace PaddleSolution diff --git a/inference/preprocessor/preprocessor_seg.cpp b/inference/preprocessor/preprocessor_seg.cpp index c2d056bfd2706ad441b96d76165804c0d81cdfaf..7c74042071143be53e11f1e7915531ea9354f356 100644 --- a/inference/preprocessor/preprocessor_seg.cpp +++ b/inference/preprocessor/preprocessor_seg.cpp @@ -1,18 +1,33 @@ -#include +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "preprocessor_seg.h" #include -#include "preprocessor_seg.h" +#include + namespace PaddleSolution { - bool SegPreProcessor::single_process(const std::string& fname, float* data, int* ori_w, int* ori_h) { + bool SegPreProcessor::single_process(const std::string& fname, + float* data, int* ori_w, int* ori_h) { cv::Mat im = cv::imread(fname, -1); if (im.data == nullptr || im.empty()) { LOG(ERROR) << "Failed to open image: " << fname; return false; } - int channels = im.channels(); *ori_w = im.cols; *ori_h = im.rows; @@ -36,7 +51,8 @@ namespace PaddleSolution { return true; } - bool SegPreProcessor::batch_process(const std::vector& imgs, float* data, int* ori_w, int* ori_h) { + bool SegPreProcessor::batch_process(const std::vector& imgs, + float* data, int* ori_w, int* ori_h) { auto ic = _config->_channels; auto iw = _config->_resize[0]; auto ih = _config->_resize[1]; @@ -58,9 +74,9 @@ namespace PaddleSolution { return true; } - bool SegPreProcessor::init(std::shared_ptr config) { + bool SegPreProcessor::init( + std::shared_ptr config) { _config = config; return true; } - -} +} // namespace PaddleSolution diff --git a/inference/preprocessor/preprocessor_seg.h b/inference/preprocessor/preprocessor_seg.h index eba904b8949b3c000799ee84541699989fea425a..5eba23e555c9b335ac1ee2cee4e51098546c5fe1 100644 --- a/inference/preprocessor/preprocessor_seg.h +++ b/inference/preprocessor/preprocessor_seg.h @@ -1,24 +1,40 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #pragma once +#include +#include +#include #include "preprocessor.h" #include "utils/utils.h" namespace PaddleSolution { class SegPreProcessor : public ImagePreProcessor { + public: + SegPreProcessor() : _config(nullptr) {} -public: - SegPreProcessor() : _config(nullptr){ - }; - - bool init(std::shared_ptr config); + bool init( + std::shared_ptr config); - bool single_process(const std::string &fname, float* data, int* ori_w, int* ori_h); + bool single_process(const std::string &fname, float* data, + int* ori_w, int* ori_h); - bool batch_process(const std::vector& imgs, float* data, int* ori_w, int* ori_h); - -private: + bool batch_process(const std::vector& imgs, float* data, + int* ori_w, int* ori_h); + private: std::shared_ptr _config; }; - -} +} // namespace PaddleSolution diff --git a/inference/tools/visualize.py b/inference/tools/visualize.py index 3939342baf907846a45d3bd0e8ab87e6c35699f4..c249a1db73144ba15960a5949589a9e2bc5d9734 100644 --- a/inference/tools/visualize.py +++ b/inference/tools/visualize.py @@ -1,27 +1,41 @@ -import cv2 -import sys - -# ColorMap for visualization more clearly -color_map = [[128, 64, 128], [244, 35, 231], [69, 69, 69], [102, 102, 156], - [190, 153, 153], [153, 153, 153], [250, 170, 29], [219, 219, 0], - [106, 142, 35], [152, 250, 152], [69, 129, 180], [219, 19, 60], - [255, 0, 0], [0, 0, 142], [0, 0, 69], [0, 60, 100], [0, 79, 100], +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import sys + +# ColorMap for visualization more clearly +color_map = [[128, 64, 128], [244, 35, 231], [69, 69, 69], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 29], [219, 219, 0], + [106, 142, 35], [152, 250, 152], [69, 129, 180], [219, 19, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 69], [0, 60, 100], [0, 79, 100], [0, 0, 230], [119, 10, 32]] -# python visualize.py demo1.jpg demo1_jpg.png vis_result.png -if __name__ == "__main__": - if len(sys.argv) != 4: - print( - "Usage: python visualize.py demo1.jpg demo1_jpg.png vis_result.png") - else: - ori_im = cv2.imread(sys.argv[1]) - ori_shape = ori_im.shape - print(ori_shape) - im = cv2.imread(sys.argv[2]) - shape = im.shape - print("visualizing...") - for i in range(0, shape[0]): - for j in range(0, shape[1]): - im[i, j] = color_map[im[i, j, 0]] - im = cv2.resize(im, (ori_shape[1], ori_shape[0])) - cv2.imwrite(sys.argv[3], im) - print("visualizing done!") +# python visualize.py demo1.jpg demo1_jpg.png vis_result.png +if __name__ == "__main__": + if len(sys.argv) != 4: + print( + "Usage: python visualize.py demo1.jpg demo1_jpg.png vis_result.png") + else: + ori_im = cv2.imread(sys.argv[1]) + ori_shape = ori_im.shape + print(ori_shape) + im = cv2.imread(sys.argv[2]) + shape = im.shape + print("visualizing...") + for i in range(0, shape[0]): + for j in range(0, shape[1]): + im[i, j] = color_map[im[i, j, 0]] + im = cv2.resize(im, (ori_shape[1], ori_shape[0])) + cv2.imwrite(sys.argv[3], im) + print("visualizing done!") diff --git a/inference/utils/seg_conf_parser.h b/inference/utils/seg_conf_parser.h index 078d04f3eb9dcd1763f69a8eb770c0853f9f1b24..b1e40ff84bf0ac9e4e932875b341ee9741efb4ff 100644 --- a/inference/utils/seg_conf_parser.h +++ b/inference/utils/seg_conf_parser.h @@ -1,165 +1,178 @@ -#pragma once -#include -#include -#include - -#include -namespace PaddleSolution { - - class PaddleSegModelConfigPaser { - public: - PaddleSegModelConfigPaser() - :_class_num(0), - _channels(0), - _use_gpu(0), - _batch_size(1), - _model_file_name("__model__"), - _param_file_name("__params__") { - } - ~PaddleSegModelConfigPaser() { - } - - void reset() { - _resize.clear(); - _mean.clear(); - _std.clear(); - _img_type.clear(); - _class_num = 0; - _channels = 0; - _use_gpu = 0; - _batch_size = 1; - _model_file_name.clear(); - _model_path.clear(); - _param_file_name.clear(); - } - - std::string process_parenthesis(const std::string& str) { - if (str.size() < 2) { - return str; - } - std::string nstr(str); - if (str[0] == '(' && str.back() == ')') { - nstr[0] = '['; - nstr[str.size() - 1] = ']'; - } - return nstr; - } - - template - std::vector parse_str_to_vec(const std::string& str) { - std::vector data; - auto node = YAML::Load(str); - for (const auto& item : node) { - data.push_back(item.as()); - } - return data; - } - - bool load_config(const std::string& conf_file) { - - reset(); - - YAML::Node config = YAML::LoadFile(conf_file); - // 1. get resize - auto str = config["DEPLOY"]["EVAL_CROP_SIZE"].as(); - _resize = parse_str_to_vec(process_parenthesis(str)); - - // 2. get mean - for (const auto& item : config["DEPLOY"]["MEAN"]) { - _mean.push_back(item.as()); - } - - // 3. get std - for (const auto& item : config["DEPLOY"]["STD"]) { - _std.push_back(item.as()); - } - - // 4. get image type - _img_type = config["DEPLOY"]["IMAGE_TYPE"].as(); - // 5. get class number - _class_num = config["DEPLOY"]["NUM_CLASSES"].as(); - // 7. set model path - _model_path = config["DEPLOY"]["MODEL_PATH"].as(); - // 8. get model file_name - _model_file_name = config["DEPLOY"]["MODEL_FILENAME"].as(); - // 9. get model param file name - _param_file_name = config["DEPLOY"]["PARAMS_FILENAME"].as(); - // 10. get pre_processor - _pre_processor = config["DEPLOY"]["PRE_PROCESSOR"].as(); - // 11. use_gpu - _use_gpu = config["DEPLOY"]["USE_GPU"].as(); - // 12. predictor_mode - _predictor_mode = config["DEPLOY"]["PREDICTOR_MODE"].as(); - // 13. batch_size - _batch_size = config["DEPLOY"]["BATCH_SIZE"].as(); - // 14. channels - _channels = config["DEPLOY"]["CHANNELS"].as(); - return true; - } - - void debug() const { - - std::cout << "EVAL_CROP_SIZE: (" << _resize[0] << ", " << _resize[1] << ")" << std::endl; - - std::cout << "MEAN: ["; - for (int i = 0; i < _mean.size(); ++i) { - if (i != _mean.size() - 1) { - std::cout << _mean[i] << ", "; - } else { - std::cout << _mean[i]; - } - } - std::cout << "]" << std::endl; - - std::cout << "STD: ["; - for (int i = 0; i < _std.size(); ++i) { - if (i != _std.size() - 1) { - std::cout << _std[i] << ", "; - } - else { - std::cout << _std[i]; - } - } - std::cout << "]" << std::endl; - - std::cout << "DEPLOY.IMAGE_TYPE: " << _img_type << std::endl; - std::cout << "DEPLOY.NUM_CLASSES: " << _class_num << std::endl; - std::cout << "DEPLOY.CHANNELS: " << _channels << std::endl; - std::cout << "DEPLOY.MODEL_PATH: " << _model_path << std::endl; - std::cout << "DEPLOY.MODEL_FILENAME: " << _model_file_name << std::endl; - std::cout << "DEPLOY.PARAMS_FILENAME: " << _param_file_name << std::endl; - std::cout << "DEPLOY.PRE_PROCESSOR: " << _pre_processor << std::endl; - std::cout << "DEPLOY.USE_GPU: " << _use_gpu << std::endl; - std::cout << "DEPLOY.PREDICTOR_MODE: " << _predictor_mode << std::endl; - std::cout << "DEPLOY.BATCH_SIZE: " << _batch_size << std::endl; - } - - // DEPLOY.EVAL_CROP_SIZE - std::vector _resize; - // DEPLOY.MEAN - std::vector _mean; - // DEPLOY.STD - std::vector _std; - // DEPLOY.IMAGE_TYPE - std::string _img_type; - // DEPLOY.NUM_CLASSES - int _class_num; - // DEPLOY.CHANNELS - int _channels; - // DEPLOY.MODEL_PATH - std::string _model_path; - // DEPLOY.MODEL_FILENAME - std::string _model_file_name; - // DEPLOY.PARAMS_FILENAME - std::string _param_file_name; - // DEPLOY.PRE_PROCESSOR - std::string _pre_processor; - // DEPLOY.USE_GPU - int _use_gpu; - // DEPLOY.PREDICTOR_MODE - std::string _predictor_mode; - // DEPLOY.BATCH_SIZE - int _batch_size; - }; - -} +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include + +namespace PaddleSolution { +class PaddleSegModelConfigPaser { + public: + PaddleSegModelConfigPaser() + :_class_num(0), + _channels(0), + _use_gpu(0), + _batch_size(1), + _model_file_name("__model__"), + _param_file_name("__params__") { + } + ~PaddleSegModelConfigPaser() { + } + + void reset() { + _resize.clear(); + _mean.clear(); + _std.clear(); + _img_type.clear(); + _class_num = 0; + _channels = 0; + _use_gpu = 0; + _batch_size = 1; + _model_file_name.clear(); + _model_path.clear(); + _param_file_name.clear(); + } + + std::string process_parenthesis(const std::string& str) { + if (str.size() < 2) { + return str; + } + std::string nstr(str); + if (str[0] == '(' && str.back() == ')') { + nstr[0] = '['; + nstr[str.size() - 1] = ']'; + } + return nstr; + } + + template + std::vector parse_str_to_vec(const std::string& str) { + std::vector data; + auto node = YAML::Load(str); + for (const auto& item : node) { + data.push_back(item.as()); + } + return data; + } + + bool load_config(const std::string& conf_file) { + reset(); + + YAML::Node config = YAML::LoadFile(conf_file); + // 1. get resize + auto str = config["DEPLOY"]["EVAL_CROP_SIZE"].as(); + _resize = parse_str_to_vec(process_parenthesis(str)); + + // 2. get mean + for (const auto& item : config["DEPLOY"]["MEAN"]) { + _mean.push_back(item.as()); + } + + // 3. get std + for (const auto& item : config["DEPLOY"]["STD"]) { + _std.push_back(item.as()); + } + + // 4. get image type + _img_type = config["DEPLOY"]["IMAGE_TYPE"].as(); + // 5. get class number + _class_num = config["DEPLOY"]["NUM_CLASSES"].as(); + // 7. set model path + _model_path = config["DEPLOY"]["MODEL_PATH"].as(); + // 8. get model file_name + _model_file_name = config["DEPLOY"]["MODEL_FILENAME"].as(); + // 9. get model param file name + _param_file_name = + config["DEPLOY"]["PARAMS_FILENAME"].as(); + // 10. get pre_processor + _pre_processor = config["DEPLOY"]["PRE_PROCESSOR"].as(); + // 11. use_gpu + _use_gpu = config["DEPLOY"]["USE_GPU"].as(); + // 12. predictor_mode + _predictor_mode = config["DEPLOY"]["PREDICTOR_MODE"].as(); + // 13. batch_size + _batch_size = config["DEPLOY"]["BATCH_SIZE"].as(); + // 14. channels + _channels = config["DEPLOY"]["CHANNELS"].as(); + return true; + } + + void debug() const { + std::cout << "EVAL_CROP_SIZE: (" + << _resize[0] << ", " << _resize[1] + << ")" << std::endl; + std::cout << "MEAN: ["; + for (int i = 0; i < _mean.size(); ++i) { + if (i != _mean.size() - 1) { + std::cout << _mean[i] << ", "; + } else { + std::cout << _mean[i]; + } + } + std::cout << "]" << std::endl; + + std::cout << "STD: ["; + for (int i = 0; i < _std.size(); ++i) { + if (i != _std.size() - 1) { + std::cout << _std[i] << ", "; + } else { + std::cout << _std[i]; + } + } + std::cout << "]" << std::endl; + + std::cout << "DEPLOY.IMAGE_TYPE: " << _img_type << std::endl; + std::cout << "DEPLOY.NUM_CLASSES: " << _class_num << std::endl; + std::cout << "DEPLOY.CHANNELS: " << _channels << std::endl; + std::cout << "DEPLOY.MODEL_PATH: " << _model_path << std::endl; + std::cout << "DEPLOY.MODEL_FILENAME: " << _model_file_name << std::endl; + std::cout << "DEPLOY.PARAMS_FILENAME: " + << _param_file_name << std::endl; + std::cout << "DEPLOY.PRE_PROCESSOR: " << _pre_processor << std::endl; + std::cout << "DEPLOY.USE_GPU: " << _use_gpu << std::endl; + std::cout << "DEPLOY.PREDICTOR_MODE: " << _predictor_mode << std::endl; + std::cout << "DEPLOY.BATCH_SIZE: " << _batch_size << std::endl; + } + + // DEPLOY.EVAL_CROP_SIZE + std::vector _resize; + // DEPLOY.MEAN + std::vector _mean; + // DEPLOY.STD + std::vector _std; + // DEPLOY.IMAGE_TYPE + std::string _img_type; + // DEPLOY.NUM_CLASSES + int _class_num; + // DEPLOY.CHANNELS + int _channels; + // DEPLOY.MODEL_PATH + std::string _model_path; + // DEPLOY.MODEL_FILENAME + std::string _model_file_name; + // DEPLOY.PARAMS_FILENAME + std::string _param_file_name; + // DEPLOY.PRE_PROCESSOR + std::string _pre_processor; + // DEPLOY.USE_GPU + int _use_gpu; + // DEPLOY.PREDICTOR_MODE + std::string _predictor_mode; + // DEPLOY.BATCH_SIZE + int _batch_size; +}; + +} // namespace PaddleSolution diff --git a/inference/utils/utils.h b/inference/utils/utils.h index 894636499bb55b9018cd40072455ae5cedd8a63f..7e322daa03c02e704509f032d5709684a341060f 100644 --- a/inference/utils/utils.h +++ b/inference/utils/utils.h @@ -1,3 +1,17 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #pragma once #include @@ -16,105 +30,110 @@ #endif namespace PaddleSolution { - namespace utils { - inline std::string path_join(const std::string& dir, const std::string& path) { - std::string seperator = "/"; - #ifdef _WIN32 - seperator = "\\"; - #endif - return dir + seperator + path; +namespace utils { + inline std::string path_join(const std::string& dir, + const std::string& path) { + std::string seperator = "/"; + #ifdef _WIN32 + seperator = "\\"; + #endif + return dir + seperator + path; + } + #ifndef _WIN32 + // scan a directory and get all files with input extensions + inline std::vector get_directory_images( + const std::string& path, const std::string& exts) { + std::vector imgs; + struct dirent *entry; + DIR *dir = opendir(path.c_str()); + if (dir == NULL) { + closedir(dir); + return imgs; } - #ifndef _WIN32 - // scan a directory and get all files with input extensions - inline std::vector get_directory_images(const std::string& path, const std::string& exts) - { - std::vector imgs; - struct dirent *entry; - DIR *dir = opendir(path.c_str()); - if (dir == NULL) { - closedir(dir); - return imgs; - } - while ((entry = readdir(dir)) != NULL) { - std::string item = entry->d_name; - auto ext = strrchr(entry->d_name, '.'); - if (!ext || std::string(ext) == "." || std::string(ext) == "..") { - continue; - } - if (exts.find(ext) != std::string::npos) { - imgs.push_back(path_join(path, entry->d_name)); - } + while ((entry = readdir(dir)) != NULL) { + std::string item = entry->d_name; + auto ext = strrchr(entry->d_name, '.'); + if (!ext || std::string(ext) == "." || std::string(ext) == "..") { + continue; + } + if (exts.find(ext) != std::string::npos) { + imgs.push_back(path_join(path, entry->d_name)); } - return imgs; } - #else - // scan a directory and get all files with input extensions - inline std::vector get_directory_images(const std::string& path, const std::string& exts) - { - std::vector imgs; - for (const auto& item : std::experimental::filesystem::directory_iterator(path)) { - auto suffix = item.path().extension().string(); - if (exts.find(suffix) != std::string::npos && suffix.size() > 0) { - auto fullname = path_join(path, item.path().filename().string()); - imgs.push_back(item.path().string()); - } + return imgs; + } + #else + // scan a directory and get all files with input extensions + inline std::vector get_directory_images( + const std::string& path, const std::string& exts) { + std::vector imgs; + for (const auto& item : + std::experimental::filesystem::directory_iterator(path)) { + auto suffix = item.path().extension().string(); + if (exts.find(suffix) != std::string::npos && suffix.size() > 0) { + auto fullname = path_join(path, + item.path().filename().string()); + imgs.push_back(item.path().string()); } - return imgs; } - #endif + return imgs; + } + #endif - // normalize and HWC_BGR -> CHW_RGB - inline void normalize(cv::Mat& im, float* data, std::vector& fmean, std::vector& fstd) { - int rh = im.rows; - int rw = im.cols; - int rc = im.channels(); - double normf = (double)1.0 / 255.0; - #pragma omp parallel for - for (int h = 0; h < rh; ++h) { - const uchar* ptr = im.ptr(h); - int im_index = 0; - for (int w = 0; w < rw; ++w) { - for (int c = 0; c < rc; ++c) { - int top_index = (c * rh + h) * rw + w; - float pixel = static_cast(ptr[im_index++]); - pixel = (pixel * normf - fmean[c]) / fstd[c]; - data[top_index] = pixel; - } + // normalize and HWC_BGR -> CHW_RGB + inline void normalize(cv::Mat& im, float* data, std::vector& fmean, + std::vector& fstd) { + int rh = im.rows; + int rw = im.cols; + int rc = im.channels(); + double normf = static_cast(1.0) / 255.0; + #pragma omp parallel for + for (int h = 0; h < rh; ++h) { + const uchar* ptr = im.ptr(h); + int im_index = 0; + for (int w = 0; w < rw; ++w) { + for (int c = 0; c < rc; ++c) { + int top_index = (c * rh + h) * rw + w; + float pixel = static_cast(ptr[im_index++]); + pixel = (pixel * normf - fmean[c]) / fstd[c]; + data[top_index] = pixel; } } } + } - // argmax - inline void argmax(float* out, std::vector& shape, std::vector& mask, std::vector& scoremap) { - int out_img_len = shape[1] * shape[2]; - int blob_out_len = out_img_len * shape[0]; - /* - Eigen::TensorMap> out_3d(out, shape[0], shape[1], shape[2]); - Eigen::Tensor argmax = out_3d.argmax(0); - */ - float max_value = -1; - int label = 0; - #pragma omp parallel private(label) - for (int i = 0; i < out_img_len; ++i) { - max_value = -1; - label = 0; - #pragma omp for reduction(max : max_value) - for (int j = 0; j < shape[0]; ++j) { - int index = i + j * out_img_len; - if (index >= blob_out_len) { - continue; - } - float value = out[index]; - if (value > max_value) { - max_value = value; - label = j; - } + // argmax + inline void argmax(float* out, std::vector& shape, + std::vector& mask, std::vector& scoremap) { + int out_img_len = shape[1] * shape[2]; + int blob_out_len = out_img_len * shape[0]; + /* + Eigen::TensorMap> out_3d(out, shape[0], shape[1], shape[2]); + Eigen::Tensor argmax = out_3d.argmax(0); + */ + float max_value = -1; + int label = 0; + #pragma omp parallel private(label) + for (int i = 0; i < out_img_len; ++i) { + max_value = -1; + label = 0; + #pragma omp for reduction(max : max_value) + for (int j = 0; j < shape[0]; ++j) { + int index = i + j * out_img_len; + if (index >= blob_out_len) { + continue; + } + float value = out[index]; + if (value > max_value) { + max_value = value; + label = j; } - if (label == 0) max_value = 0; - mask[i] = uchar(label); - scoremap[i] = uchar(max_value * 255); } + if (label == 0) max_value = 0; + mask[i] = uchar(label); + scoremap[i] = uchar(max_value * 255); } } -} +} // namespace utils +} // namespace PaddleSolution diff --git a/docs/annotation/jingling2seg.py b/pdseg/tools/jingling2seg.py similarity index 70% rename from docs/annotation/jingling2seg.py rename to pdseg/tools/jingling2seg.py index 18626157738671afc8415471d108e9d6a04f8495..7c9d6322a3db432fe2a231df3a665c43e3950811 100644 --- a/docs/annotation/jingling2seg.py +++ b/pdseg/tools/jingling2seg.py @@ -7,7 +7,6 @@ import glob import json import os import os.path as osp -import sys import numpy as np import PIL.Image @@ -15,26 +14,24 @@ import PIL.Image import labelme -def main(): +def parse_args(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - parser.add_argument('label_dir', help='input annotated directory') - parser.add_argument('output_dir', help='output dataset directory') - args = parser.parse_args() + parser.add_argument('input_dir', + help='input annotated directory') + return parser.parse_args() - if osp.exists(args.output_dir): - print('Output directory already exists:', args.output_dir) - sys.exit(1) - os.makedirs(args.output_dir) - os.makedirs(osp.join(args.output_dir, 'JPEGImages')) - os.makedirs(osp.join(args.output_dir, 'SegmentationClassPNG')) - print('Creating dataset:', args.output_dir) +def main(args): + output_dir = osp.join(args.input_dir, 'annotations') + if not osp.exists(output_dir): + os.makedirs(output_dir) + print('Creating annotations directory:', output_dir) # get the all class names for the given dataset class_names = ['_background_'] - for label_file in glob.glob(osp.join(args.label_dir, '*.json')): + for label_file in glob.glob(osp.join(args.input_dir, '*.json')): with open(label_file) as f: data = json.load(f) if data['outputs']: @@ -53,19 +50,17 @@ def main(): class_names = tuple(class_names) print('class_names:', class_names) - out_class_names_file = osp.join(args.output_dir, 'class_names.txt') + out_class_names_file = osp.join(args.input_dir, 'class_names.txt') with open(out_class_names_file, 'w') as f: f.writelines('\n'.join(class_names)) print('Saved class_names:', out_class_names_file) - for label_file in glob.glob(osp.join(args.label_dir, '*.json')): + for label_file in glob.glob(osp.join(args.input_dir, '*.json')): print('Generating dataset from:', label_file) with open(label_file) as f: base = osp.splitext(osp.basename(label_file))[0] - out_img_file = osp.join( - args.output_dir, 'JPEGImages', base + '.jpg') out_png_file = osp.join( - args.output_dir, 'SegmentationClassPNG', base + '.png') + output_dir, base + '.png') data = json.load(f) @@ -83,19 +78,18 @@ def main(): shape = {'label': name, 'points': points, 'shape_type': 'polygon'} data_shapes.append(shape) - img_file = osp.join(osp.dirname(label_file), data['path']) - img = np.asarray(PIL.Image.open(img_file)) - PIL.Image.fromarray(img).save(out_img_file) + data_size = data['size'] + img_shape = (data_size['height'], data_size['width'], data_size['depth']) lbl = labelme.utils.shapes_to_label( - img_shape=img.shape, + img_shape=img_shape, shapes=data_shapes, label_name_to_value=class_name_to_id, ) if osp.splitext(out_png_file)[1] != '.png': out_png_file += '.png' - # Assume label ranses [0, 255] for uint8, + # Assume label ranges [0, 255] for uint8, if lbl.min() >= 0 and lbl.max() <= 255: lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='L') lbl_pil.save(out_png_file) @@ -107,4 +101,5 @@ def main(): if __name__ == '__main__': - main() + args = parse_args() + main(args) diff --git a/docs/annotation/labelme2seg.py b/pdseg/tools/labelme2seg.py similarity index 71% rename from docs/annotation/labelme2seg.py rename to pdseg/tools/labelme2seg.py index 96eb2aa8319a26c55aca2fd9eb9c888475d1340c..689ecaedb35d6f181aaa6d8ef66dfc09a764dccd 100755 --- a/docs/annotation/labelme2seg.py +++ b/pdseg/tools/labelme2seg.py @@ -7,7 +7,6 @@ import glob import json import os import os.path as osp -import sys import numpy as np import PIL.Image @@ -15,21 +14,20 @@ import PIL.Image import labelme -def main(): +def parse_args(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - parser.add_argument('input_dir', help='input annotated directory') - parser.add_argument('output_dir', help='output dataset directory') - args = parser.parse_args() - - if osp.exists(args.output_dir): - print('Output directory already exists:', args.output_dir) - sys.exit(1) - os.makedirs(args.output_dir) - os.makedirs(osp.join(args.output_dir, 'JPEGImages')) - os.makedirs(osp.join(args.output_dir, 'SegmentationClassPNG')) - print('Creating dataset:', args.output_dir) + parser.add_argument('input_dir', + help='input annotated directory') + return parser.parse_args() + + +def main(args): + output_dir = osp.join(args.input_dir, 'annotations') + if not osp.exists(output_dir): + os.makedirs(output_dir) + print('Creating annotations directory:', output_dir) # get the all class names for the given dataset class_names = ['_background_'] @@ -45,14 +43,14 @@ def main(): class_name_to_id = {} for i, class_name in enumerate(class_names): - class_id = i # starts with 0 + class_id = i # starts with 0 class_name_to_id[class_name] = class_id if class_id == 0: assert class_name == '_background_' class_names = tuple(class_names) print('class_names:', class_names) - out_class_names_file = osp.join(args.output_dir, 'class_names.txt') + out_class_names_file = osp.join(args.input_dir, 'class_names.txt') with open(out_class_names_file, 'w') as f: f.writelines('\n'.join(class_names)) print('Saved class_names:', out_class_names_file) @@ -61,16 +59,13 @@ def main(): print('Generating dataset from:', label_file) with open(label_file) as f: base = osp.splitext(osp.basename(label_file))[0] - out_img_file = osp.join( - args.output_dir, 'JPEGImages', base + '.jpg') out_png_file = osp.join( - args.output_dir, 'SegmentationClassPNG', base + '.png') + output_dir, base + '.png') data = json.load(f) img_file = osp.join(osp.dirname(label_file), data['imagePath']) img = np.asarray(PIL.Image.open(img_file)) - PIL.Image.fromarray(img).save(out_img_file) lbl = labelme.utils.shapes_to_label( img_shape=img.shape, @@ -80,7 +75,7 @@ def main(): if osp.splitext(out_png_file)[1] != '.png': out_png_file += '.png' - # Assume label ranses [0, 255] for uint8, + # Assume label ranges [0, 255] for uint8, if lbl.min() >= 0 and lbl.max() <= 255: lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='L') lbl_pil.save(out_png_file) @@ -90,5 +85,7 @@ def main(): 'Please consider using the .npy format.' % out_png_file ) + if __name__ == '__main__': - main() + args = parse_args() + main(args) diff --git a/pdseg/utils/config.py b/pdseg/utils/config.py index c14d9df6ea491e84a0f788c30f782714d56db009..5d66c2f076ca964fcdf23d1cfd427e61acf68876 100644 --- a/pdseg/utils/config.py +++ b/pdseg/utils/config.py @@ -68,8 +68,8 @@ cfg.DATASET.DATA_DIM = 3 cfg.DATASET.SEPARATOR = ' ' # 忽略的像素标签值, 默认为255,一般无需改动 cfg.DATASET.IGNORE_INDEX = 255 -# 数据增强是图像的padding值 -cfg.DATASET.PADDING_VALUE = [127.5,127.5,127.5] +# 数据增强是图像的padding值 +cfg.DATASET.PADDING_VALUE = [127.5, 127.5, 127.5] ########################### 数据增强配置 ###################################### # 图像镜像左右翻转 @@ -186,11 +186,11 @@ cfg.MODEL.SCALE_LOSS = "DYNAMIC" cfg.MODEL.DEEPLAB.BACKBONE = "xception_65" # DeepLab output stride cfg.MODEL.DEEPLAB.OUTPUT_STRIDE = 16 -# MobileNet backbone scale 设置 +# MobileNet v2 backbone scale 设置 cfg.MODEL.DEEPLAB.DEPTH_MULTIPLIER = 1.0 -# MobileNet backbone scale 设置 +# MobileNet v2 backbone scale 设置 cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP = True -# MobileNet backbone scale 设置 +# MobileNet v2 backbone scale 设置 cfg.MODEL.DEEPLAB.ENABLE_DECODER = True # ASPP是否使用可分离卷积 cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV = True diff --git a/pdseg/vis.py b/pdseg/vis.py index 7806ce5d4f3643bb7f4f145f695222e5513bbdbc..81024fcbe4dc87a20fd825da0ffd5df462624197 100644 --- a/pdseg/vis.py +++ b/pdseg/vis.py @@ -226,7 +226,7 @@ def visualize(cfg, # Use Tensorboard to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name - epoch = int(ckpt_dir.split(os.path.sep)[-1]) + epoch = int(os.path.split(ckpt_dir)[-1]) print("Tensorboard visualization epoch", epoch) log_writer.add_image( "Predict/{}".format(img_names[i]), diff --git a/turtorial/finetune_deeplabv3plus.md b/turtorial/finetune_deeplabv3plus.md index eee70fcea2c8b43f0fedcb7ab9da4755acbf274a..35fb677d9d416512a79ded14bcdcadf516aa6b70 100644 --- a/turtorial/finetune_deeplabv3plus.md +++ b/turtorial/finetune_deeplabv3plus.md @@ -115,15 +115,15 @@ python pdseg/eval.py --use_gpu --cfg ./configs/deeplabv3p_xception65_pet.yaml |预训练模型名称|BackBone|Norm Type|数据集|配置| |-|-|-|-|-| -|mobilenetv2-2-0_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 2.0
MODEL.DEFAULT_NORM_TYPE: bn| -|mobilenetv2-1-5_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.5
MODEL.DEFAULT_NORM_TYPE: bn| -|mobilenetv2-1-0_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0
MODEL.DEFAULT_NORM_TYPE: bn| -|mobilenetv2-0-5_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.5
MODEL.DEFAULT_NORM_TYPE: bn| -|mobilenetv2-0-25_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25
MODEL.DEFAULT_NORM_TYPE: bn| +|mobilenetv2-2-0_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 2.0
MODEL.DEFAULT_NORM_TYPE: bn| +|mobilenetv2-1-5_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.5
MODEL.DEFAULT_NORM_TYPE: bn| +|mobilenetv2-1-0_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0
MODEL.DEFAULT_NORM_TYPE: bn| +|mobilenetv2-0-5_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.5
MODEL.DEFAULT_NORM_TYPE: bn| +|mobilenetv2-0-25_bn_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25
MODEL.DEFAULT_NORM_TYPE: bn| |xception41_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: xception_41
MODEL.DEFAULT_NORM_TYPE: bn| |xception65_imagenet|-|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: xception_65
MODEL.DEFAULT_NORM_TYPE: bn| -|deeplabv3p_mobilenetv2-1-0_bn_coco|MobileNet V2|bn|COCO|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0
MODEL.DEEPLAB.ENCODER_WITH_ASPP: False
MODEL.DEEPLAB.ENABLE_DECODER: False
MODEL.DEFAULT_NORM_TYPE: bn| +|deeplabv3p_mobilenetv2-1-0_bn_coco|MobileNet V2|bn|COCO|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0
MODEL.DEEPLAB.ENCODER_WITH_ASPP: False
MODEL.DEEPLAB.ENABLE_DECODER: False
MODEL.DEFAULT_NORM_TYPE: bn| |**deeplabv3p_xception65_bn_coco**|Xception|bn|COCO|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: xception_65
MODEL.DEFAULT_NORM_TYPE: bn | -|deeplabv3p_mobilenetv2-1-0_bn_cityscapes|MobileNet V2|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenet
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0
MODEL.DEEPLAB.ENCODER_WITH_ASPP: False
MODEL.DEEPLAB.ENABLE_DECODER: False
MODEL.DEFAULT_NORM_TYPE: bn| +|deeplabv3p_mobilenetv2-1-0_bn_cityscapes|MobileNet V2|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: mobilenetv2
MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0
MODEL.DEEPLAB.ENCODER_WITH_ASPP: False
MODEL.DEEPLAB.ENABLE_DECODER: False
MODEL.DEFAULT_NORM_TYPE: bn| |deeplabv3p_xception65_gn_cityscapes|Xception|gn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: xception_65
MODEL.DEFAULT_NORM_TYPE: gn| |deeplabv3p_xception65_bn_cityscapes|Xception|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p
MODEL.DEEPLAB.BACKBONE: xception_65
MODEL.DEFAULT_NORM_TYPE: bn|