“02cc7d90a606875a44d605c18e17855ce8339652”上不存在“...gserver/gradientmachines/RecurrentGradientMachine.h”
提交 cfef6b5b 编写于 作者: P pengmian
......@@ -23,12 +23,10 @@ PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义
- **工业级部署**
- 基于[Paddle Serving](https://github.com/PaddlePaddle/Serving)和PaddlePaddle高性能预测引擎, 结合百度开放的AI能力,轻松搭建人像分割和车道线分割服务。
- 基于[Paddle Serving](https://github.com/PaddlePaddle/Serving)和PaddlePaddle高性能预测引擎结合百度开放的AI能力,轻松搭建人像分割和车道线分割服务。
更多模型信息与技术细节请查看[模型介绍](./docs/models.md)[预训练模型](./docs/mode_zoo.md)
更多模型信息与技术细节请查看[模型介绍](./docs/models.md)[预训练模型](./docs/model_zoo.md)
## AI Studio教程
......
......@@ -23,8 +23,6 @@ AUG:
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 4
MEAN: [0.5, 0.5, 0.5]
STD: [0.5, 0.5, 0.5]
DATASET:
DATA_DIR: "./dataset/cityscapes/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
......
EVAL_CROP_SIZE: (513, 513) # (width, height), for unpadding rangescaling and stepscaling
TRAIN_CROP_SIZE: (513, 513) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding
INF_RESIZE_VALUE: 500 # for rangescaling
MAX_RESIZE_VALUE: 600 # for rangescaling
MIN_RESIZE_VALUE: 400 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
RICH_CROP:
ENABLE: False
ASPECT_RATIO: 0.33
BLUR: True
BLUR_RATIO: 0.1
FLIP: True
FLIP_RATIO: 0.2
MAX_ROTATION: 15
MIN_AREA_RATIO: 0.5
BRIGHTNESS_JITTER_RATIO: 0.5
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 8
MEAN: [104.008, 116.669, 122.675]
STD: [1.0, 1.0, 1.0]
DATASET:
DATA_DIR: "./data/COCO2014/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 21
TEST_FILE_LIST: "data/COCO2014/VOC_ImageSets/val.txt"
TRAIN_FILE_LIST: "data/COCO2014/ImageSets/train.txt"
VAL_FILE_LIST: "data/COCO2014/VOC_ImageSets/val.txt"
SEPARATOR: " "
IGNORE_INDEX: 255
FREEZE:
MODEL_FILENAME: "model"
PARAMS_FILENAME: "params"
MODEL:
DEFAULT_NORM_TYPE: "bn"
MODEL_NAME: "deeplabv3p"
TEST:
TEST_MODEL: "snapshots/coco_v1/final"
TRAIN:
MODEL_SAVE_DIR: "snapshots/coco_v1/"
PRETRAINED_MODEL: "pretrain/xception65_pretrained/"
RESUME: False
SNAPSHOT_EPOCH: 5
SOLVER:
LR: 0.007
WEIGHT_DECAY: 0.00004
NUM_EPOCHS: 40
LR_POLICY: "poly"
OPTIMIZER: "SGD"
TRAIN_CROP_SIZE: (513, 513) # (width, height), for unpadding rangescaling and stepscaling
EVAL_CROP_SIZE: (513, 513) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: u"unpadding" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (513, 513) # (width, height), for unpadding
INF_RESIZE_VALUE: 513 # for rangescaling
MAX_RESIZE_VALUE: 400 # for rangescaling
MIN_RESIZE_VALUE: 513 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
RICH_CROP:
ENABLE: True
ASPECT_RATIO: 0
BLUR: True
BLUR_RATIO: 0.1
FLIP: True
FLIP_RATIO: 0.2
MAX_ROTATION: 45
MIN_AREA_RATIO: 0
BRIGHTNESS_JITTER_RATIO: 0.5
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 24
MEAN: [104.008, 116.669, 122.675]
STD: [1.0, 1.0, 1.0]
DATASET:
DATA_DIR: u"./data/humanseg/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 2
TEST_FILE_LIST: u"data/humanseg/list/val.txt"
TRAIN_FILE_LIST: u"data/humanseg/list/train.txt"
VAL_FILE_LIST: u"data/humanseg/list/val.txt"
IGNORE_INDEX: 255
SEPARATOR: "|"
FREEZE:
MODEL_FILENAME: u"model"
PARAMS_FILENAME: u"params"
SAVE_DIR: u"human_freeze_model"
MODEL:
DEFAULT_NORM_TYPE: u"bn"
MODEL_NAME: "deeplabv3p"
DEEPLAB:
BACKBONE: "xception_65"
TEST:
TEST_MODEL: "snapshots/humanseg/aic_v2/final/"
TRAIN:
MODEL_SAVE_DIR: "snapshots/humanseg/aic_v2/"
PRETRAINED_MODEL: u"pretrain/xception65_pretrained/"
RESUME: False
SNAPSHOT_EPOCH: 5
SOLVER:
LR: 0.1
NUM_EPOCHS: 40
LR_POLICY: "poly"
OPTIMIZER: "sgd"
EVAL_CROP_SIZE: (1536, 576) # (width, height), for unpadding rangescaling and stepscaling
TRAIN_CROP_SIZE: (1536, 576) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: u"unpadding" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (1536, 576) # (width, height), for unpadding
INF_RESIZE_VALUE: 1280 # for rangescaling
MAX_RESIZE_VALUE: 1024 # for rangescaling
MIN_RESIZE_VALUE: 1536 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
RICH_CROP:
ENABLE: False
ASPECT_RATIO: 0.33
BLUR: True
BLUR_RATIO: 0.1
FLIP: True
FLIP_RATIO: 0.2
MAX_ROTATION: 15
MIN_AREA_RATIO: 0.5
BRIGHTNESS_JITTER_RATIO: 0.5
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 1
MEAN: [127.5, 127.5, 127.5]
STD: [127.5, 127.5, 127.5]
DATASET:
DATA_DIR: "./data/line/L4_lane_mask_dataset_app/L4_360_0_2class/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 2
TEST_FILE_LIST: "data/line/L4_lane_mask_dataset_app/L4_360_0_2class/val.txt"
TRAIN_FILE_LIST: "data/line/L4_lane_mask_dataset_app/L4_360_0_2class/train.txt"
VAL_FILE_LIST: "data/line/L4_lane_mask_dataset_app/L4_360_0_2class/val.txt"
SEPARATOR: " "
IGNORE_INDEX: 255
FREEZE:
MODEL_FILENAME: "__model__"
PARAMS_FILENAME: "__params__"
SAVE_DIR: "line_freeze_model"
MODEL:
DEFAULT_NORM_TYPE: "bn"
MODEL_NAME: "deeplabv3p"
DEEPLAB:
BACKBONE: "mobilenet"
TEST:
TEST_MODEL: "snapshots/line_v4/final/"
TRAIN:
MODEL_SAVE_DIR: "snapshots/line_v4/"
PRETRAINED_MODEL: u"pretrain/MobileNetV2_pretrained/"
RESUME: False
SNAPSHOT_EPOCH: 10
SOLVER:
LR: 0.01
LR_POLICY: "poly"
OPTIMIZER: "sgd"
NUM_EPOCHS: 40
......@@ -25,8 +25,6 @@ AUG:
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 4
MEAN: [104.008, 116.669, 122.675]
STD: [1.0, 1.0, 1.0]
DATASET:
DATA_DIR: "./dataset/mini_pet/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
......
......@@ -7,7 +7,7 @@
### 1. 模型概述
CVPR 19 Look into Person (LIP) 单人人像分割比赛冠军模型,详见[ACE2P/README](http://gitlab.baidu.com/Paddle/PaddleSeg/tree/master/contrib/ACE2P)
CVPR 19 Look into Person (LIP) 单人人像分割比赛冠军模型,详见[ACE2P](./ACE2P)
### 2. 模型下载
......
......@@ -4,7 +4,7 @@ BASIC Group存放所有通用配置
## `MEAN`
图像预处理减去的均值(格式为*[R, G, B]*
图像预处理减去的均值(格式为 *[R, G, B]*
### 默认值
......@@ -15,7 +15,7 @@ BASIC Group存放所有通用配置
## `STD`
图像预处理所除的标准差(格式为*[R, G, B]*
图像预处理所除的标准差(格式为 *[R, G, B]*
### 默认值
......@@ -26,7 +26,7 @@ BASIC Group存放所有通用配置
## `EVAL_CROP_SIZE`
评估时所对图片裁剪的大小(格式为*[宽, 高]*
评估时所对图片裁剪的大小(格式为 *[宽, 高]*
### 默认值
......@@ -40,7 +40,7 @@ BASIC Group存放所有通用配置
## `TRAIN_CROP_SIZE`
训练时所对图片裁剪的大小(格式为*[宽, 高]*
训练时所对图片裁剪的大小(格式为 *[宽, 高]*
### 默认值
......@@ -66,4 +66,4 @@ BASIC Group存放所有通用配置
* 增大BATCH_SIZE有利于模型训练时的收敛速度,但是会带来显存的开销。请根据实际情况评估后填写合适的值
<br/>
<br/>
\ No newline at end of file
<br/>
......@@ -13,7 +13,7 @@ DATALOADER Group存放所有与数据加载相关的配置
### 注意事项
* 该选项只在`pdseg/train.py``pdseg/eval.py`中使用到
* 当使用多线程时,该字段表示线程量,使用多进程时,该字段表示进程数量。一般该字段使用默认值即可
* 当使用多线程时,该字段表示线程量,使用多进程时,该字段表示进程数量。一般该字段使用默认值即可
<br/>
<br/>
......@@ -27,4 +27,4 @@ DATALOADER Group存放所有与数据加载相关的配置
256
<br/>
<br/>
\ No newline at end of file
<br/>
......@@ -53,10 +53,6 @@ rich crop是指对图像进行多种变换,保证在训练过程中数据的
该步骤主要是通过crop的方式使得输入到网络中的图像在某一个固定大小,控制该大小的参数为TRAIN_CROP_SIZE,类型为tuple,格式为(width, height). 当输入图像大小小于CROP_SIZE的时候会对输入图像进行padding,padding值为均值。
- preprocess
- 减均值
- 除方差
- 水平翻转
- 输入图片格式
- 原图
- 图片格式:rgb三通道图片和rgba四通道图片两种类型的图片进行训练,但是在一次训练过程只能存在一种格式。
......
......@@ -45,8 +45,7 @@ PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试
```
其中`[SEP]`是文件路径分割库,可以`DATASET.SEPRATOR`配置中进行配置, 默认为空格。
如果文件名中存在**空格**,推荐使用'|'等文件名不可用字符进行切分。
其中`[SEP]`是文件路径分割符,可以在`DATASET.SEPARATOR`配置项中修改, 默认为空格。
**注意事项**
......@@ -55,7 +54,8 @@ PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试
* 文件列表请使用**UTF-8**格式保存, PaddleSeg默认使用UTF-8编码读取file_list文件
如下图所示,左边为原图的图片路径,右边为图片对应的标注路径。
![cityscapes_filelist](./docs/imgs/file_list.png)
![cityscapes_filelist](./imgs/file_list.png)
完整的配置信息可以参考[`./dataset/cityscapes_demo`](../dataset/cityscapes_demo/)目录下的yaml和文件列表。
......@@ -104,4 +104,4 @@ python pdseg/check.py --cfg ${YAML_FILE_PATH}
-`AUG.AUG_METHOD`为rangscaling时,`EVAL_CROP_SIZE`的宽高应不小于缩放后图像中最大的宽高。
我们将计算并给出`EVAL_CROP_SIZE`的建议值。
\ No newline at end of file
我们将计算并给出`EVAL_CROP_SIZE`的建议值。
......@@ -2,27 +2,27 @@
PaddleSeg对所有内置的分割模型都提供了公开数据集的下的预训练模型,通过加载预训练模型后训练可以在自定义数据集中得到更稳定地效果。
## ImageNet预训练模型
## ImageNet预训练模型
所有Imagenet预训练模型来自于PaddlePaddle图像分类库,想获取更多细节请点击[这里](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification))
| 模型 | 数据集合 | Depth multiplier | 模型加载config设置 | 下载地址 | Accuray Top1/5 Error|
|---|---|---|---|---|---|
| MobieNetV2_1.0x | ImageNet | 1.0x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEFAULT_NORM_TYPE: bn| [MobileNetV2_1.0x] (https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% |
| MobieNetV2_0.25x | ImageNet | 0.25x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25 <br> MODEL.DEFAULT_NORM_TYPE: bn |[MobileNetV2_0.25x] (https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21%/76.52% |
| MobieNetV2_0.5x | ImageNet | 0.5x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.5 <br> MODEL.DEFAULT_NORM_TYPE: bn | [MobileNetV2_0.5x] (https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% |
| MobieNetV2_1.5x | ImageNet | 1.5x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.5 <br> MODEL.DEFAULT_NORM_TYPE: bn| [MobileNetV2_1.5x] (https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% |
| MobieNetV2_2.0x | ImageNet | 2.0x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 2.0 <br> MODEL.DEFAULT_NORM_TYPE: bn | [MobileNetV2_2.0x] (https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% |
| MobieNetV2_1.0x | ImageNet | 1.0x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEFAULT_NORM_TYPE: bn| [MobileNetV2_1.0x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% |
| MobieNetV2_0.25x | ImageNet | 0.25x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25 <br> MODEL.DEFAULT_NORM_TYPE: bn |[MobileNetV2_0.25x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21%/76.52% |
| MobieNetV2_0.5x | ImageNet | 0.5x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.5 <br> MODEL.DEFAULT_NORM_TYPE: bn | [MobileNetV2_0.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% |
| MobieNetV2_1.5x | ImageNet | 1.5x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.5 <br> MODEL.DEFAULT_NORM_TYPE: bn| [MobileNetV2_1.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% |
| MobieNetV2_2.0x | ImageNet | 2.0x | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 2.0 <br> MODEL.DEFAULT_NORM_TYPE: bn | [MobileNetV2_2.0x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% |
用户可以结合实际场景的精度和预测性能要求,选取不同`Depth multiplier`参数的MobileNet模型。
| 模型 | 数据集合 | 模型加载config设置 | 下载地址 | Accuray Top1/5 Error |
|---|---|---|---|---|
| Xception41 | ImageNet | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_41 <br> MODEL.DEFAULT_NORM_TYPE: bn| [Xception41_pretrained.tgz] (https://paddleseg.bj.bcebos.com/models/Xception41_pretrained.tgz) | 79.5%/94.38% |
| Xception65 | ImageNet | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn| [Xception65_pretrained.tgz] (https://paddleseg.bj.bcebos.com/models/Xception65_pretrained.tgz) | 80.32%/94.47% |
| Xception41 | ImageNet | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_41 <br> MODEL.DEFAULT_NORM_TYPE: bn| [Xception41_pretrained.tgz](https://paddleseg.bj.bcebos.com/models/Xception41_pretrained.tgz) | 79.5%/94.38% |
| Xception65 | ImageNet | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn| [Xception65_pretrained.tgz](https://paddleseg.bj.bcebos.com/models/Xception65_pretrained.tgz) | 80.32%/94.47% |
| Xception71 | ImageNet | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_71 <br> MODEL.DEFAULT_NORM_TYPE: bn| coming soon | -- |
## COCO预训练模型
## COCO预训练模型
train数据集为coco instance分割数据集合转换成的语义分割数据集合
......@@ -30,15 +30,15 @@ train数据集为coco instance分割数据集合转换成的语义分割数据
|---|---|---|---|---|---|---|
| DeepLabv3+/MobileNetv2/bn | COCO | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEFAULT_NORM_TYPE: bn|[deeplabv3plus_coco_bn_init.tgz](https://bj.bcebos.com/v1/paddleseg/deeplabv3plus_coco_bn_init.tgz) | 16 | --| -- |
| DeeplabV3+/Xception65/bn | COCO | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn | [xception65_coco.tgz](https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz)| 16 | -- | -- |
| UNet/bn | COCO | MODEL.MODEL_NEME: unet <br> MODEL.DEFAULT_NORM_TYPE: bn | [unet](https://paddleseg.bj.bcebos.com/models/unet_coco_v2.tgz) | 16 | -- | -- |
| UNet/bn | COCO | MODEL.MODEL_NEME: unet <br> MODEL.DEFAULT_NORM_TYPE: bn | [unet](https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz) | 16 | -- | -- |
## Cityscapes预训练模型
## Cityscapes预训练模型
train数据集合为Cityscapes 训练集合,测试为Cityscapes的验证集合
| 模型 | 数据集合 | 模型加载config设置 | 下载地址 |Output Stride| mutli-scale test| mIoU on val|
|---|---|---|---|---|---|---|
| DeepLabv3+/MobileNetv2/bn | Cityscapes |MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn|[mobilenet_cityscapes.tgz] (https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz) |16|false| 0.698|
| DeepLabv3+/MobileNetv2/bn | Cityscapes |MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenet <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn|[mobilenet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz) |16|false| 0.698|
| DeepLabv3+/Xception65/gn | Cityscapes |MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: gn | [deeplabv3p_xception65_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz) |16|false| 0.7804 |
| DeepLabv3+/Xception65/bn | Cityscapes | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn| [Xception65_deeplab_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/Xception65_deeplab_cityscapes.tgz) | 16 | false | 0.7715 |
| DeepLabv3+/Xception65/bn | Cityscapes | MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn| [Xception65_deeplab_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz) | 16 | false | 0.7715 |
| ICNet/bn | Cityscapes | MODEL.MODEL_NAME: icnet <br> MODEL.DEFAULT_NORM_TYPE: bn | [icnet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/icnet_cityscapes.tgz) |16|false| 0.6854 |
......@@ -48,7 +48,7 @@ python pdseg/export_model.py ${FLAGS} ${OPTIONS}
1. 下载合适版本的paddlepaddle
2. PaddleSeg相关依赖已经安装
如果有不确认的地方,请参考[安装说明](./docs/installation.md)
如果有不确认的地方,请参考[安装说明](./installation.md)
### 下载预训练模型
```shell
......@@ -111,10 +111,10 @@ NOTE:
启动TensorBoard命令后,我们可以在浏览器中查看对应的训练数据
`SCALAR`这个tab中,查看训练loss、iou、acc的变化趋势
![](docs/imgs/tensorboard_scalar.JPG)
![](./imgs/tensorboard_scalar.JPG)
`IMAGE`这个tab中,查看样本的预测情况
![](docs/imgs/tensorboard_image.JPG)
![](./imgs/tensorboard_image.JPG)
### 模型评估
训练完成后,我们可以通过eval.py来评估模型效果。由于我们设置的训练EPOCH数量为500,保存间隔为10,因此一共会产生50个定期保存的模型,加上最终保存的final模型,一共有51个模型。我们选择最后保存的模型进行效果的评估:
......@@ -147,4 +147,4 @@ python pdseg/export_model.py --cfg configs/unet_pet.yaml \
TEST.TEST_MODEL test/saved_models/unet_pet/final
```
模型会导出到freeze_model目录,接下来就是进行模型的部署,相关步骤,请查看[模型部署](./inference/README.md)
模型会导出到freeze_model目录,接下来就是进行模型的部署,相关步骤,请查看[模型部署](../inference/README.md)
......@@ -122,7 +122,7 @@ D:\PaddleDeploy\inference\build\Release> demo.exe --conf=D:\\PaddleDeploy\\infer
| conf | 模型配置的yaml文件路径 |
| input_dir | 需要预测的图片目录 |
**配置文件**的样例以及字段注释说明请参考: [conf/humanseg.yaml](inference/conf/humanseg.yaml)
**配置文件**的样例以及字段注释说明请参考: [conf/humanseg.yaml](./conf/humanseg.yaml)
样例程序会扫描input_dir目录下的所有图片,并生成对应的预测结果图片。
......
......@@ -12,76 +12,134 @@ import argparse
import cv2
from tqdm import tqdm
import imghdr
import logging
from utils.config import cfg
def init_global_variable():
"""
初始化全局变量
"""
global png_format_right_num # 格式错误的标签图数量
global png_format_wrong_num # 格式错误的标图数量
global total_grt_classes # 总的标类别
global png_format_right_num # 格式正确的标注图数量
global png_format_wrong_num # 格式错误的标图数量
global total_grt_classes # 总的标类别
global total_num_of_each_class # 每个类别总的像素数
global shape_unequal # 图片和标签shape不一致
global png_format_wrong # 标签格式错误
global shape_unequal_image # 图片和标注shape不一致列表
global png_format_wrong_image # 标注格式错误列表
global max_width # 图片最长宽
global max_height # 图片最长高
global min_aspectratio # 图片最小宽高比
global max_aspectratio # 图片最大宽高比
global img_dim # 图片的通道数
global list_wrong #文件名格式错误列表
global imread_failed #图片读取失败列表, 二元列表
global label_wrong # 标注图片出错列表
png_format_right_num = 0
png_format_wrong_num = 0
total_grt_classes = []
total_num_of_each_class = []
shape_unequal = []
png_format_wrong = []
shape_unequal_image = []
png_format_wrong_image = []
max_width = 0
max_height = 0
min_aspectratio = sys.float_info.max
max_aspectratio = 0
img_dim = []
list_wrong = []
imread_failed = []
label_wrong = []
def parse_args():
parser = argparse.ArgumentParser(description='PaddleSeg check')
parser.add_argument(
'--cfg',
dest='cfg_file',
help='Config file for training (and optionally testing)',
default=None,
type=str)
'--cfg',
dest='cfg_file',
help='Config file for training (and optionally testing)',
default=None,
type=str
)
return parser.parse_args()
def error_print(str):
return "".join(["\nNOT PASS ", str])
def correct_print(str):
return "".join(["\nPASS ", str])
def cv2_imread(file_path, flag=cv2.IMREAD_COLOR):
# resolve cv2.imread open Chinese file path issues on Windows Platform.
return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag)
def get_image_max_height_width(img, max_height, max_width):
def get_image_max_height_width(img):
global max_width, max_height
img_shape = img.shape
height, width = img_shape[0], img_shape[1]
max_height = max(height, max_height)
max_width = max(width, max_width)
return max_height, max_width
def get_image_min_max_aspectratio(img, min_aspectratio, max_aspectratio):
def get_image_min_max_aspectratio(img):
global min_aspectratio, max_aspectratio
img_shape = img.shape
height, width = img_shape[0], img_shape[1]
min_aspectratio = min(width / height, min_aspectratio)
max_aspectratio = max(width / height, max_aspectratio)
min_aspectratio = min(width/height, min_aspectratio)
max_aspectratio = max(width/height, max_aspectratio)
return min_aspectratio, max_aspectratio
def get_image_dim(img, img_dim):
def get_image_dim(img):
"""获取图像的维度"""
img_shape = img.shape
if img_shape[-1] not in img_dim:
img_dim.append(img_shape[-1])
def image_label_shape_check(img, grt):
"""
验证图像和标注的大小是否匹配
"""
flag = True
img_height = img.shape[0]
img_width = img.shape[1]
grt_height = grt.shape[0]
grt_width = grt.shape[1]
if img_height != grt_height or img_width != grt_width:
flag = False
return flag
def ground_truth_check(grt, grt_path):
"""
验证标注是否重零开始,标注值为0,1,...,num_classes-1, ingnore_idx
验证标注图像的格式
返回标注的像素数
检查图像是否都是ignore_index
params:
grt: 标注图
grt_path: 标注图路径
return:
png_format: 返回是否是png格式图片
label_correct: 返回标注是否是正确的
label_pixel_num: 返回标注的像素数
"""
if imghdr.what(grt_path) == "png":
png_format = True
else:
png_format = False
unique, counts = np.unique(grt, return_counts=True)
return png_format, unique, counts
def sum_gt_check(png_format, grt_classes, num_of_each_class):
"""
统计所有标图上的格式、类别和每个类别的像素数
统计所有标图上的格式、类别和每个类别的像素数
params:
png_format: 返回是否是png格式图片
grt_classes: 标类别
grt_classes: 标类别
num_of_each_class: 各个类别的像素数目
"""
is_label_correct = True
global png_format_right_num, png_format_wrong_num, total_grt_classes, total_num_of_each_class
if png_format:
......@@ -90,12 +148,11 @@ def sum_gt_check(png_format, grt_classes, num_of_each_class):
png_format_wrong_num += 1
if cfg.DATASET.IGNORE_INDEX in grt_classes:
grt_classes2 = np.delete(
grt_classes, np.where(grt_classes == cfg.DATASET.IGNORE_INDEX))
grt_classes2 = np.delete(grt_classes, np.where(grt_classes == cfg.DATASET.IGNORE_INDEX))
else:
grt_classes2 = grt_classes
if min(grt_classes2) < 0 or max(grt_classes2) > cfg.DATASET.NUM_CLASSES - 1:
print("fatal error: label class is out of range [0, {}]".format(
cfg.DATASET.NUM_CLASSES - 1))
is_label_correct = False
add_class = []
add_num = []
for i in range(len(grt_classes)):
......@@ -108,145 +165,113 @@ def sum_gt_check(png_format, grt_classes, num_of_each_class):
add_num.append(num_of_each_class[i])
total_num_of_each_class += add_num
total_grt_classes += add_class
return is_label_correct
def gt_check():
"""
对标签进行校验,输出校验结果
params:
png_format_wrong_num: 格式错误的标签图数量
png_format_right_num: 格式正确的标签图数量
total_grt_classes: 总的标签类别
total_num_of_each_class: 每个类别总的像素数目
return:
total_nc: 按升序排序后的总标签类别和像素数目
对标注图像进行校验,输出校验结果
"""
if png_format_wrong_num == 0:
print("Not pass label png format check!")
if png_format_right_num:
logger.info(correct_print("label format check"))
else:
logger.info(error_print("label format check"))
logger.info("No label image to check")
return
else:
print("Pass label png format check!")
print(
"total {} label imgs are png format, {} label imgs are not png fromat".
format(png_format_right_num, png_format_wrong_num))
logger.info(error_print("label format check"))
logger.info("total {} label images are png format, {} label images are not png "
"format".format(png_format_right_num, png_format_wrong_num))
if len(png_format_wrong_image) > 0:
for i in png_format_wrong_image:
logger.debug(i)
total_nc = sorted(zip(total_grt_classes, total_num_of_each_class))
print("total label calsses and their corresponding numbers:\n{} ".format(
total_nc))
if total_nc[0][0]:
print(
"Not pass label class check!\nWarning: label classes should start from 0 !!!"
)
else:
print("Pass label class check!")
total_nc = sorted(zip(total_grt_classes, total_num_of_each_class))
logger.info("\nDoing label pixel statistics...\nTotal label calsses "
"and their corresponding numbers:\n{} ".format(total_nc))
def ground_truth_check(grt, grt_path):
"""
验证标签是否重零开始,标签值为0,1,...,num_classes-1, ingnore_idx
验证标签图像的格式
返回标签的像素数
检查图像是否都是ignore_index
params:
grt: 标签图
grt_path: 标签图路径
return:
png_format: 返回是否是png格式图片
label_correct: 返回标签是否是正确的
label_pixel_num: 返回标签的像素数
"""
if imghdr.what(grt_path) == "png":
png_format = True
if len(label_wrong) == 0 and not total_nc[0][0]:
logger.info(correct_print("label class check!"))
else:
png_format = False
logger.info(error_print("label class check!"))
if total_nc[0][0]:
logger.info("Warning: label classes should start from 0")
if len(label_wrong) > 0:
logger.info("fatal error: label class is out of range [0, {}]".format(cfg.DATASET.NUM_CLASSES - 1))
for i in label_wrong:
logger.debug(i)
unique, counts = np.unique(grt, return_counts=True)
return png_format, unique, counts
def eval_crop_size_check(max_height, max_width, min_aspectratio,
max_aspectratio):
def eval_crop_size_check(max_height, max_width, min_aspectratio, max_aspectratio):
"""
判断eval_crop_siz与验证集及测试集的max_height, max_width的关系
param
max_height: 数据集的最大高
max_width: 数据集的最大宽
"""
if cfg.AUG.AUG_METHOD == "stepscaling":
flag = True
if max_width > cfg.EVAL_CROP_SIZE[0]:
print(
"ERROR: The EVAL_CROP_SIZE[0]: {} should larger max width of images {}!"
.format(cfg.EVAL_CROP_SIZE[0], max_width))
flag = False
if max_height > cfg.EVAL_CROP_SIZE[1]:
print(
"ERROR: The EVAL_CROP_SIZE[1]: {} should larger max height of images {}!"
.format(cfg.EVAL_CROP_SIZE[1], max_height))
flag = False
if flag:
print("EVAL_CROP_SIZE setting correct")
if max_width <= cfg.EVAL_CROP_SIZE[0] or max_height <= cfg.EVAL_CROP_SIZE[1]:
logger.info(correct_print("EVAL_CROP_SIZE check"))
else:
logger.info(error_print("EVAL_CROP_SIZE check"))
if max_width > cfg.EVAL_CROP_SIZE[0]:
logger.info("The EVAL_CROP_SIZE[0]: {} should larger max width of images {}!".format(
cfg.EVAL_CROP_SIZE[0], max_width))
if max_height > cfg.EVAL_CROP_SIZE[1]:
logger.info(error_print("The EVAL_CROP_SIZE[1]: {} should larger max height of images {}!".format(
cfg.EVAL_CROP_SIZE[1], max_height)))
elif cfg.AUG.AUG_METHOD == "rangescaling":
if min_aspectratio <= 1 and max_aspectratio >= 1:
if cfg.EVAL_CROP_SIZE[
0] >= cfg.AUG.INF_RESIZE_VALUE and cfg.EVAL_CROP_SIZE[
1] >= cfg.AUG.INF_RESIZE_VALUE:
print("EVAL_CROP_SIZE setting correct")
if cfg.EVAL_CROP_SIZE[0] >= cfg.AUG.INF_RESIZE_VALUE and cfg.EVAL_CROP_SIZE[1] >= cfg.AUG.INF_RESIZE_VALUE:
logger.info(correct_print("EVAL_CROP_SIZE check"))
else:
print(
"ERROR: EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
logger.info(error_print("EVAL_CROP_SIZE check"))
logger.info("EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
.format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1],
cfg.AUG.INF_RESIZE_VALUE, cfg.AUG.INF_RESIZE_VALUE))
elif min_aspectratio > 1:
max_height_rangscaling = cfg.AUG.INF_RESIZE_VALUE / min_aspectratio
max_height_rangscaling = round(max_height_rangscaling)
if cfg.EVAL_CROP_SIZE[
0] >= cfg.AUG.INF_RESIZE_VALUE and cfg.EVAL_CROP_SIZE[
1] >= max_height_rangscaling:
print("EVAL_CROP_SIZE setting correct")
if cfg.EVAL_CROP_SIZE[0] >= cfg.AUG.INF_RESIZE_VALUE and cfg.EVAL_CROP_SIZE[1] >= max_height_rangscaling:
logger.info(correct_print("EVAL_CROP_SIZE check"))
else:
print(
"ERROR: EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
.format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1],
cfg.AUG.INF_RESIZE_VALUE, max_height_rangscaling))
logger.info(error_print("EVAL_CROP_SIZE check"))
logger.info("EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
.format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1],
cfg.AUG.INF_RESIZE_VALUE, max_height_rangscaling))
elif max_aspectratio < 1:
max_width_rangscaling = cfg.AUG.INF_RESIZE_VALUE * max_aspectratio
max_width_rangscaling = round(max_width_rangscaling)
if cfg.EVAL_CROP_SIZE[
0] >= max_width_rangscaling and cfg.EVAL_CROP_SIZE[
1] >= cfg.AUG.INF_RESIZE_VALUE:
print("EVAL_CROP_SIZE setting correct")
if cfg.EVAL_CROP_SIZE[0] >= max_width_rangscaling and cfg.EVAL_CROP_SIZE[1] >= cfg.AUG.INF_RESIZE_VALUE:
logger.info(correct_print("EVAL_CROP_SIZE check"))
else:
print(
"ERROR: EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
logger.info(error_print("EVAL_CROP_SIZE check"))
logger.info("EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
.format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1],
max_width_rangscaling, cfg.AUG.INF_RESIZE_VALUE))
elif cfg.AUG.AUG_METHOD == "unpadding":
if cfg.EVAL_CROP_SIZE[0] >= cfg.AUG.FIX_RESIZE_SIZE[
0] and cfg.EVAL_CROP_SIZE[1] >= cfg.AUG.FIX_RESIZE_SIZE[1]:
print("EVAL_CROP_SIZE setting correct")
if cfg.EVAL_CROP_SIZE[0] >= cfg.AUG.FIX_RESIZE_SIZE[0] and cfg.EVAL_CROP_SIZE[1] >= cfg.AUG.FIX_RESIZE_SIZE[1]:
logger.info(correct_print("EVAL_CROP_SIZE check"))
else:
print(
"ERROR: EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
.format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1],
cfg.AUG.FIX_RESIZE_SIZE[0], cfg.AUG.FIX_RESIZE_SIZE[1]))
logger.info(error_print("EVAL_CROP_SIZE check"))
logger.info("EVAL_CROP_SIZE: ({},{}) must large than img size({},{})"
.format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1],
cfg.AUG.FIX_RESIZE_SIZE[0], cfg.AUG.FIX_RESIZE_SIZE[1]))
else:
print(
"ERROR: cfg.AUG.AUG_METHOD setting wrong, it should be one of [unpadding, stepscaling, rangescaling]"
)
logger.info("\nERROR! cfg.AUG.AUG_METHOD setting wrong, it should be one of "
"[unpadding, stepscaling, rangescaling]")
def inf_resize_value_check():
if cfg.AUG.AUG_METHOD == "rangescaling":
if cfg.AUG.INF_RESIZE_VALUE < cfg.AUG.MIN_RESIZE_VALUE or \
cfg.AUG.INF_RESIZE_VALUE > cfg.AUG.MIN_RESIZE_VALUE:
print(
"ERROR: you set AUG.AUG_METHOD = 'rangescaling'"
"AUG.INF_RESIZE_VALUE: {} not in [AUG.MIN_RESIZE_VALUE, AUG.MAX_RESIZE_VALUE]: "
"[{}, {}].".format(cfg.AUG.INF_RESIZE_VALUE,
cfg.AUG.MIN_RESIZE_VALUE,
cfg.AUG.MAX_RESIZE_VALUE))
logger.info("\nWARNING! you set AUG.AUG_METHOD = 'rangescaling'"
"AUG.INF_RESIZE_VALUE: {} not in [AUG.MIN_RESIZE_VALUE, AUG.MAX_RESIZE_VALUE]: "
"[{}, {}].".format(cfg.AUG.INF_RESIZE_VALUE, cfg.AUG.MIN_RESIZE_VALUE, cfg.AUG.MAX_RESIZE_VALUE))
def image_type_check(img_dim):
"""
......@@ -256,166 +281,189 @@ def image_type_check(img_dim):
return
"""
if (1 in img_dim or 3 in img_dim) and cfg.DATASET.IMAGE_TYPE == 'rgba':
print(
"ERROR: DATASET.IMAGE_TYPE is {} but the type of image has gray or rgb\n"
.format(cfg.DATASET.IMAGE_TYPE))
# elif (1 not in img_dim and 3 not in img_dim and 4 in img_dim) and cfg.DATASET.IMAGE_TYPE == 'rgb':
# print("ERROR: DATASET.IMAGE_TYPE is {} but the type of image is rgba\n".format(cfg.DATASET.IMAGE_TYPE))
logger.info(error_print("DATASET.IMAGE_TYPE check"))
logger.info("DATASET.IMAGE_TYPE is {} but the type of image has "
"gray or rgb\n".format(cfg.DATASET.IMAGE_TYPE))
elif (1 not in img_dim and 3 not in img_dim and 4 in img_dim) and cfg.DATASET.IMAGE_TYPE == 'rgb':
logger.info(correct_print("DATASET.IMAGE_TYPE check"))
logger.info("\nWARNING: DATASET.IMAGE_TYPE is {} but the type of all image is rgba".format(cfg.DATASET.IMAGE_TYPE))
else:
print("DATASET.IMAGE_TYPE setting correct")
logger.info(correct_print("DATASET.IMAGE_TYPE check"))
def shape_check():
"""输出shape校验结果"""
if len(shape_unequal_image) == 0:
logger.info(correct_print("shape check"))
logger.info("All images are the same shape as the labels")
else:
logger.info(error_print("shape check"))
logger.info("Some images are not the same shape as the labels as follow: ")
for i in shape_unequal_image:
logger.debug(i)
def image_label_shape_check(img, grt):
"""
验证图像和标签的大小是否匹配
"""
flag = True
img_height = img.shape[0]
img_width = img.shape[1]
grt_height = grt.shape[0]
grt_width = grt.shape[1]
def file_list_check(list_name):
"""检查分割符是否复合要求"""
if len(list_wrong) == 0:
logger.info(correct_print(list_name.split(os.sep)[-1] + " DATASET.SEPARATOR check"))
else:
logger.info(error_print(list_name.split(os.sep)[-1] + " DATASET.SEPARATOR check"))
logger.info("The following list is not separated by {}".format(cfg.DATASET.SEPARATOR))
for i in list_wrong:
logger.debug(i)
def imread_check():
if len(imread_failed) == 0:
logger.info(correct_print("dataset reading check"))
logger.info("All images can be read successfully")
else:
logger.info(error_print("dataset reading check"))
logger.info("Failed to read {} images".format(len(imread_failed)))
for i in imread_failed:
logger.debug(i)
if img_height != grt_height or img_width != grt_width:
flag = False
return flag
def check_train_dataset():
train_list = cfg.DATASET.TRAIN_FILE_LIST
print("\ncheck train dataset...")
with open(train_list, 'r') as fid:
img_dim = []
list_file = cfg.DATASET.TRAIN_FILE_LIST
logger.info("-----------------------------\n1. Check train dataset...")
with open(list_file, 'r') as fid:
lines = fid.readlines()
for line in tqdm(lines):
parts = line.strip().split(cfg.DATASET.SEPARATOR)
line = line.strip()
parts = line.split(cfg.DATASET.SEPARATOR)
if len(parts) != 2:
print(
line, "File list format incorrect! It should be"
" image_name{}label_name\\n ".format(cfg.DATASET.SEPARATOR))
list_wrong.append(line)
continue
img_name, grt_name = parts[0], parts[1]
img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name)
grt_path = os.path.join(cfg.DATASET.DATA_DIR, grt_name)
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
try:
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
except Exception as e:
imread_failed.append((line, str(e)))
continue
get_image_dim(img, img_dim)
get_image_dim(img)
is_equal_img_grt_shape = image_label_shape_check(img, grt)
if not is_equal_img_grt_shape:
print(line,
"ERROR: source img and label img must has the same size")
shape_unequal_image.append(line)
png_format, grt_classes, num_of_each_class = ground_truth_check(
grt, grt_path)
sum_gt_check(png_format, grt_classes, num_of_each_class)
png_format, grt_classes, num_of_each_class = ground_truth_check(grt, grt_path)
if not png_format:
png_format_wrong_image.append(line)
is_label_correct = sum_gt_check(png_format, grt_classes, num_of_each_class)
if not is_label_correct:
label_wrong.append(line)
file_list_check(list_file)
imread_check()
gt_check()
image_type_check(img_dim)
shape_check()
def check_val_dataset():
val_list = cfg.DATASET.VAL_FILE_LIST
with open(val_list) as fid:
max_height = 0
max_width = 0
min_aspectratio = sys.float_info.max
max_aspectratio = 0.0
img_dim = []
print("check val dataset...")
list_file = cfg.DATASET.VAL_FILE_LIST
logger.info("\n-----------------------------\n2. Check val dataset...")
with open(list_file) as fid:
lines = fid.readlines()
for line in tqdm(lines):
parts = line.strip().split(cfg.DATASET.SEPARATOR)
line = line.strip()
parts = line.split(cfg.DATASET.SEPARATOR)
if len(parts) != 2:
print(
line, "File list format incorrect! It should be"
" image_name{}label_name\\n ".format(cfg.DATASET.SEPARATOR))
list_wrong.append(line)
continue
img_name, grt_name = parts[0], parts[1]
img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name)
grt_path = os.path.join(cfg.DATASET.DATA_DIR, grt_name)
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
max_height, max_width = get_image_max_height_width(
img, max_height, max_width)
min_aspectratio, max_aspectratio = get_image_min_max_aspectratio(
img, min_aspectratio, max_aspectratio)
get_image_dim(img, img_dim)
try:
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
except Exception as e:
imread_failed.append((line, e.message))
get_image_max_height_width(img)
get_image_min_max_aspectratio(img)
get_image_dim(img)
is_equal_img_grt_shape = image_label_shape_check(img, grt)
if not is_equal_img_grt_shape:
print(line,
"ERROR: source img and label img must has the same size")
png_format, grt_classes, num_of_each_class = ground_truth_check(
grt, grt_path)
sum_gt_check(png_format, grt_classes, num_of_each_class)
shape_unequal_image.append(line)
png_format, grt_classes, num_of_each_class = ground_truth_check(grt, grt_path)
if not png_format:
png_format_wrong_image.append(line)
is_label_correct = sum_gt_check(png_format, grt_classes, num_of_each_class)
if not is_label_correct:
label_wrong.append(line)
file_list_check(list_file)
imread_check()
gt_check()
eval_crop_size_check(max_height, max_width, min_aspectratio,
max_aspectratio)
image_type_check(img_dim)
shape_check()
eval_crop_size_check(max_height, max_width, min_aspectratio, max_aspectratio)
def check_test_dataset():
test_list = cfg.DATASET.TEST_FILE_LIST
with open(test_list) as fid:
max_height = 0
max_width = 0
min_aspectratio = sys.float_info.max
max_aspectratio = 0.0
img_dim = []
print("check test dataset...")
list_file = cfg.DATASET.TEST_FILE_LIST
has_label = False
with open(list_file) as fid:
logger.info("\n-----------------------------\n3. Check test dataset...")
lines = fid.readlines()
for line in tqdm(lines):
parts = line.strip().split(cfg.DATASET.SEPARATOR)
line = line.strip()
parts = line.split(cfg.DATASET.SEPARATOR)
if len(parts) == 1:
img_name = parts
img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name)
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name[0])
try:
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
except Exception as e:
imread_failed.append((line, str(e)))
continue
elif len(parts) == 2:
has_label = True
img_name, grt_name = parts[0], parts[1]
img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name)
grt_path = os.path.join(cfg.DATASET.DATA_DIR, grt_name)
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
try:
img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
except Exception as e:
imread_failed.append((line, e.message))
continue
is_equal_img_grt_shape = image_label_shape_check(img, grt)
if not is_equal_img_grt_shape:
print(
line,
"ERROR: source img and label img must has the same size"
)
png_format, grt_classes, num_of_each_class = ground_truth_check(
grt, grt_path)
sum_gt_check(png_format, grt_classes, num_of_each_class)
shape_unequal_image.append(line)
png_format, grt_classes, num_of_each_class = ground_truth_check(grt, grt_path)
if not png_format:
png_format_wrong_image.append(line)
is_label_correct = sum_gt_check(png_format, grt_classes, num_of_each_class)
if not is_label_correct:
label_wrong.append(line)
else:
print(
line, "File list format incorrect! It should be"
" image_name{}label_name\\n or image_name\n ".format(
cfg.DATASET.SEPARATOR))
list_wrong.append(lines)
continue
max_height, max_width = get_image_max_height_width(
img, max_height, max_width)
min_aspectratio, max_aspectratio = get_image_min_max_aspectratio(
img, min_aspectratio, max_aspectratio)
get_image_dim(img, img_dim)
gt_check()
eval_crop_size_check(max_height, max_width, min_aspectratio,
max_aspectratio)
get_image_max_height_width(img)
get_image_min_max_aspectratio(img)
get_image_dim(img)
file_list_check(list_file)
imread_check()
if has_label:
gt_check()
image_type_check(img_dim)
if has_label:
shape_check()
eval_crop_size_check(max_height, max_width, min_aspectratio, max_aspectratio)
def main(args):
if args.cfg_file is not None:
cfg.update_from_file(args.cfg_file)
cfg.check_and_infer(reset_dataset=True)
print(pprint.pformat(cfg))
logger.info(pprint.pformat(cfg))
init_global_variable()
check_train_dataset()
......@@ -428,8 +476,19 @@ def main(args):
inf_resize_value_check()
if __name__ == "__main__":
args = parse_args()
args.cfg_file = "../configs/cityscape.yaml"
logger = logging.getLogger()
logger.setLevel('DEBUG')
BASIC_FORMAT = "%(message)s"
formatter = logging.Formatter(BASIC_FORMAT)
sh = logging.StreamHandler()
sh.setFormatter(formatter)
sh.setLevel('INFO')
th = logging.FileHandler('detail.log', 'w')
th.setFormatter(formatter)
logger.addHandler(sh)
logger.addHandler(th)
main(args)
......@@ -192,12 +192,10 @@ def train(cfg):
def data_generator():
if args.use_mpio:
print("Use multiprocess reader")
data_gen = dataset.multiprocess_generator(
num_processes=cfg.DATALOADER.NUM_WORKERS,
max_queue_size=cfg.DATALOADER.BUF_SIZE)
else:
print("Use multi-thread reader")
data_gen = dataset.generator()
batch_data = []
......@@ -332,6 +330,11 @@ def train(cfg):
("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
begin_epoch, cfg.SOLVER.NUM_EPOCHS))
if args.use_mpio:
print("Use multiprocess reader")
else:
print("Use multi-thread reader")
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
py_reader.start()
while True:
......
......@@ -22,9 +22,9 @@ cfg = SegConfig()
########################## 基本配置 ###########################################
# 均值,图像预处理减去的均值
cfg.MEAN = [104.008, 116.669, 122.675]
cfg.MEAN = [0.5, 0.5, 0.5]
# 标准差,图像预处理除以标准差·
cfg.STD = [1.000, 1.000, 1.000]
cfg.STD = [0.5, 0.5, 0.5]
# 批处理大小
cfg.BATCH_SIZE = 1
# 验证时图像裁剪尺寸(宽,高)
......
# 源码编译安装及搭建服务流程
本文将介绍源码编译安装以及在服务搭建流程。
本文将介绍源码编译安装以及在服务搭建流程。编译前确保PaddleServing的依赖项安装完毕。依赖安装教程请前往[PaddleSegServing 依赖安装](./README.md).
## 1. 系统依赖项
依赖项 | 验证过的版本
-- | --
Linux | Centos 6.10 / 7
CMake | 3.0+
GCC | 4.8.2/5.4.0
Python| 2.7
GO编译器| 1.9.2
openssl| 1.0.1+
bzip2 | 1.0.6+
如果需要使用GPU预测,还需安装以下几个依赖库
GPU库 | 验证过的版本
-- | --
CUDA | 9.2
cuDNN | 7.1.4
nccl | 2.4.7
## 2. 安装依赖项
以下流程在百度云CentOS7.5+CUDA9.2环境下进行。
### 2.1. 安装openssl、Go编译器以及bzip2
```bash
yum -y install openssl openssl-devel golang bzip2-libs bzip2-devel
```
### 2.2. 安装GPU预测的依赖项(如果需要使用GPU预测,必须执行此步骤)
#### 2.2.1. 安装配置CUDA9.2以及cuDNN 7.1.4
该百度云机器已经安装CUDA以及cuDNN,仅需复制相关头文件与链接库
```bash
# 看情况确定是否需要安装 cudnn
# 进入 cudnn 根目录
cd /home/work/cudnn/cudnn7.1.4
# 拷贝头文件
cp include/cudnn.h /usr/local/cuda/include/
# 拷贝链接库
cp lib64/libcudnn* /usr/local/cuda/lib64/
# 修改头文件、链接库访问权限
chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
#### 2.2.2. 安装nccl库
```bash
# 下载文件 nccl-repo-rhel7-2.4.7-ga-cuda9.2-1-1.x86_64.rpm
wget -c https://paddlehub.bj.bcebos.com/serving/nccl-repo-rhel7-2.4.7-ga-cuda9.2-1-1.x86_64.rpm
# 安装nccl的repo
rpm -i nccl-repo-rhel7-2.4.7-ga-cuda9.2-1-1.x86_64.rpm
# 更新索引
yum -y update
# 安装包
yum -y install libnccl-2.4.7-1+cuda9.2 libnccl-devel-2.4.7-1+cuda9.2 libnccl-static-2.4.7-1+cuda9.2
```
### 2.3. 安装 cmake 3.15
如果机器没有安装cmake或者已安装cmake的版本低于3.0,请执行以下步骤
```bash
# 如果原来的已经安装低于3.0版本的cmake,请先卸载原有低版本 cmake
yum -y remove cmake
# 下载源代码并解压
wget -c https://github.com/Kitware/CMake/releases/download/v3.15.0/cmake-3.15.0.tar.gz
tar xvfz cmake-3.15.0.tar.gz
# 编译cmake
cd cmake-3.15.0
./configure
make -j4
# 安装并检查cmake版本
make install
cmake --version
# 在cmake-3.15.0目录中,将相应的头文件目录(curl目录,为PaddleServing的依赖头文件目录)拷贝到系统include目录下
cp -r Utilities/cmcurl/include/curl/ /usr/include/
```
### 2.4. 为依赖库增加相应的软连接
现在Linux系统中大部分链接库的名称都以版本号作为后缀,如libcurl.so.4.3.0。这种命名方式最大的问题是,CMakeList.txt中find_library命令是无法识别使用这种命名方式的链接库,会导致CMake时候出错。由于本项目是用CMake构建,所以务必保证相应的链接库以 .so 或 .a为后缀命名。解决这个问题最简单的方式就是用创建一个软连接指向相应的链接库。在百度云的机器中,只有curl库的命名方式有问题。所以命令如下:(如果是其他库,解决方法也类似):
```bash
ln -s /usr/lib64/libcurl.so.4.3.0 /usr/lib64/libcurl.so
```
### 2.5. 编译安装PaddleServing
## 1. 编译安装PaddleServing
下列步骤介绍CPU版本以及GPU版本的PaddleServing编译安装过程。
```bash
......@@ -134,7 +46,7 @@ serving
└── tools
```
### 2.6. 安装PaddleSegServing
## 2. 安装PaddleSegServing
```bash
# Step 1. 在~目录下下载PaddleSeg代码
......@@ -243,4 +155,4 @@ make install
可参考[预编译安装流程](./README.md)中2.2.2节。可执行文件在该目录下:~/serving/build/output/demo/seg-serving/bin/。
### 3.4 运行客户端程序进行测试。
可参考[预编译安装流程](./README.md)中2.2.3节。
可参考[预编译安装流程](./README.md)中2.2.3节。
\ No newline at end of file
# PaddleSeg Serving
# PaddleSegServing
## 1.简介
PaddleSegServing是基于PaddleSeg开发的实时图像分割服务的企业级解决方案。用户仅需关注模型本身,无需理解模型模型的加载、预测以及GPU/CPU资源的并发调度等细节操作,通过设置不同的参数配置,即可根据自身的业务需求定制化不同图像分割服务。目前,PaddleSegServing支持人脸分割、城市道路分割、宠物外形分割模型。本文将通过一个人脸分割服务的搭建示例,展示PaddleSeg服务通用的搭建流程。
## 2.预编译版本安装及搭建服务流程
### 2.1. 下载预编译的PaddleSegServing
运行PaddleSegServing需要依赖其他的链接库,请保证在下载安装前系统环境已经具有相应的依赖项。
安装以及搭建服务的流程均在Centos和Ubuntu系统上验证。以下是Centos系统上的搭建流程,Ubuntu版本的依赖项安装流程介绍在[Ubuntu系统下依赖项的安装教程](UBUNTU.md)
### 2.1. 系统依赖项
依赖项 | 验证过的版本
-- | --
Linux | Centos 6.10 / 7, Ubuntu16.07
CMake | 3.0+
GCC | 4.8.2
Python| 2.7
GO编译器| 1.9.2
openssl| 1.0.1+
bzip2 | 1.0.6+
如果需要使用GPU预测,还需安装以下几个依赖库
GPU库 | 验证过的版本
-- | --
CUDA | 9.2
cuDNN | 7.1.4
nccl | 2.4.7
### 2.2. 安装依赖项
#### 2.2.1. 安装openssl、Go编译器以及bzip2
```bash
yum -y install openssl openssl-devel golang bzip2-libs bzip2-devel
```
#### 2.2.2. 安装GPU预测的依赖项(如果需要使用GPU预测,必须执行此步骤)
#### 2.2.2.1. 安装配置CUDA 9.2以及cuDNN 7.1.4
请确保正确安装CUDA 9.2以及cuDNN 7.1.4. 以下为安装CUDA和cuDNN的官方教程。
```bash
安装CUDA教程: https://developer.nvidia.com/cuda-90-download-archive?target_os=Linux&target_arch=x86_64&target_distro=CentOS&target_version=7&target_type=rpmnetwork
安装cuDNN教程: https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html
```
#### 2.2.2.2. 安装nccl库(如果已安装nccl 2.4.7请忽略该步骤)
```bash
# 下载文件 nccl-repo-rhel7-2.4.7-ga-cuda9.2-1-1.x86_64.rpm
wget -c https://paddlehub.bj.bcebos.com/serving/nccl-repo-rhel7-2.4.7-ga-cuda9.2-1-1.x86_64.rpm
# 安装nccl的repo
rpm -i nccl-repo-rhel7-2.4.7-ga-cuda9.2-1-1.x86_64.rpm
# 更新索引
yum -y update
# 安装包
yum -y install libnccl-2.4.7-1+cuda9.2 libnccl-devel-2.4.7-1+cuda9.2 libnccl-static-2.4.7-1+cuda9.2
```
### 2.2.3. 安装 cmake 3.15
如果机器没有安装cmake或者已安装cmake的版本低于3.0,请执行以下步骤
```bash
# 如果原来的已经安装低于3.0版本的cmake,请先卸载原有低版本 cmake
yum -y remove cmake
# 下载源代码并解压
wget -c https://github.com/Kitware/CMake/releases/download/v3.15.0/cmake-3.15.0.tar.gz
tar xvfz cmake-3.15.0.tar.gz
# 编译cmake
cd cmake-3.15.0
./configure
make -j4
# 安装并检查cmake版本
make install
cmake --version
# 在cmake-3.15.0目录中,将相应的头文件目录(curl目录,为PaddleServing的依赖头文件目录)拷贝到系统include目录下
cp -r Utilities/cmcurl/include/curl/ /usr/include/
```
### 2.2.4. 为依赖库增加相应的软连接
现在Linux系统中大部分链接库的名称都以版本号作为后缀,如libcurl.so.4.3.0。这种命名方式最大的问题是,CMakeList.txt中find_library命令是无法识别使用这种命名方式的链接库,会导致CMake时候出错。由于本项目是用CMake构建,所以务必保证相应的链接库以 .so 或 .a为后缀命名。解决这个问题最简单的方式就是用创建一个软连接指向相应的链接库。在百度云的机器中,只有curl库的命名方式有问题。所以命令如下:(如果是其他库,解决方法也类似):
```bash
ln -s /usr/lib64/libcurl.so.4.3.0 /usr/lib64/libcurl.so
```
### 2.3. 下载预编译的PaddleSegServing
预编译版本在Centos7.6系统下编译,如果想快速体验PaddleSegServing,可在此系统下下载预编译版本进行安装。预编译版本有两个,一个是针对有GPU的机器,推荐安装GPU版本PaddleSegServing。另一个是CPU版本PaddleServing,针对无GPU的机器。
#### 2.1.1. 下载并解压GPU版本PaddleSegServing
#### 2.3.1. 下载并解压GPU版本PaddleSegServing
```bash
cd ~
wget -c XXXX/PaddleSegServing.centos7.6_cuda9.2_gpu.tar.gz
tar xvfz PaddleSegServing.centos7.6_cuda9.2_gpu.tar.gz
wget -c https://paddleseg.bj.bcebos.com/serving%2Fpaddle_seg_serving_centos7.6_gpu_cuda9.2.tar.gz
tar xvfz PaddleSegServing.centos7.6_cuda9.2_gpu.tar.gz seg-serving
```
#### 2.1.2. 下载并解压CPU版本PaddleSegServing
#### 2.3.2. 下载并解压CPU版本PaddleSegServing
```bash
cd ~
wget -c XXXX/PaddleSegServing.centos7.6_cuda9.2_cpu.tar.gz
tar xvfz PaddleSegServing.centos7.6_cuda9.2_gpu.tar.gz
wget -c https://paddleseg.bj.bcebos.com/serving%2Fpaddle_seg_serving_centos7.6_cpu.tar.gz
tar xvfz PaddleSegServing.centos7.6_cuda9.2_gpu.tar.gz seg-serving
```
解压后的PaddleSegServing目录如下。
......@@ -36,13 +116,22 @@ tar xvfz PaddleSegServing.centos7.6_cuda9.2_gpu.tar.gz
└── log
```
### 2.2. 运行PaddleSegServing
### 2.4 安装动态库
把 libiomp5.so, libmklml_gnu.so, libmklml_intel.so拷贝到/usr/lib。
```bash
cd seg-serving/bin/
cp libiomp5.so libmklml_gnu.so libmklml_intel.so /usr/lib
```
### 2.5. 运行PaddleSegServing
本节将介绍如何运行以及测试PaddleSegServing。
#### 2.2.1. 搭建人脸分割服务
#### 2.5.1. 搭建人脸分割服务
搭建人脸分割服务只需完成一些配置文件的编写即可,其他分割服务的搭建流程类似。
##### 2.2.1.1. 下载人脸分割模型文件,并将其复制到相应目录。
#### 2.5.1.1. 下载人脸分割模型文件,并将其复制到相应目录。
```bash
# 下载人脸分割模型
wget -c https://paddleseg.bj.bcebos.com/inference_model/deeplabv3p_xception65_humanseg.tgz
......@@ -52,11 +141,7 @@ cp -r deeplabv3p_xception65_humanseg seg-serving/bin/data/model/paddle/fluid
```
##### 2.2.1.2. 配置参数文件
参数文件如,PaddleSegServing仅新增一个配置文件seg_conf.yaml,用来指定具体分割模型的一些参数,如均值、方差、图像尺寸等。该配置文件可在gflags.conf中通过--seg_conf_file指定。
其他配置文件的字段解释可参考以下链接:https://github.com/PaddlePaddle/Serving/blob/develop/doc/SERVING_CONFIGURE.md (TODO:介绍seg_conf.yaml中每个字段的含义)
#### 2.5.1.2. 配置参数文件。参数文件如下。PaddleSegServing仅新增一个配置文件seg_conf.yaml,用来指定具体分割模型的一些参数,如均值、方差、图像尺寸等。该配置文件可在gflags.conf中通过--seg_conf_file指定。其他配置文件的字段解释可参考以下链接:https://github.com/PaddlePaddle/Serving/blob/develop/doc/SERVING_CONFIGURE.md
```bash
conf/
......@@ -68,7 +153,25 @@ conf/
└── workflow.prototxt
```
#### 2.2.2 运行服务端程序
以下为seg_conf.yaml文件内容以及每一个配置项的内容。
```bash
%YAML:1.0
# 输入到模型的图像的尺寸。会将任意图片resize到513*513尺寸的图像,再放入模型进行推测。
SIZE: [513, 513]
# 均值
MEAN: [104.008, 116.669, 122.675]
# 方差
STD: [1.0, 1.0, 1.0]
# 通道数
CHANNELS: 3
# 类别数量
CLASS_NUM: 2
# 加载的模型的名称,需要与model_toolkit.prototxt中对应模型的名称保持一致。
MODEL_NAME: "human_segmentation"
```
#### 2.5.2 运行服务端程序
```bash
# 1. 设置环境变量
......@@ -77,17 +180,44 @@ export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib64:$LD_LIBRARY_PATH
cd ~/serving/build/output/demo/seg-serving/bin/
./seg-serving
```
#### 2.2.3.运行客户端程序进行测试 (建议在windows、mac测试,可直接查看分割后的图像)
#### 2.5.3.运行客户端程序
以下为PaddleSeg的目录结构,客户端在PaddleSeg/serving/tools目录。
客户端程序是用Python3编写的,代码简洁易懂,可以通过运行客户端验证服务的正确性以及性能表现。
```bash
PaddleSeg
├── configs
├── contrib
├── dataset
├── docs
├── inference
├── pdseg
├── README.md
├── requirements.txt
├── scripts
├── serving
│ ├── COMPILE_GUIDE.md
│ ├── imgs
│ ├── README.md
│ ├── requirements.txt # 客户端程序依赖的包
│ ├── seg-serving
│ ├── tools # 客户端目录
│ │ ├── images # 测试的图像目录,可放置jpg格式或其他三通道格式的图像,以jpg或jpeg作为文件后缀名
│ │ │  ├── 1.jpg
│ │ │ ├── 2.jpg
│ │ │ └── 3.jpg
│ │ └── image_seg_client.py # 客户端测试代码
│ └── UBUNTU.md
├── test
└── test.md
```
客户端程序使用Python3编写,通过下载requirements.txt中的python依赖包(`pip3 install -r requirements.txt`),用户可以在Windows、Mac、Linux等平台上正常运行该客户端,测试的图像放在PaddleSeg/serving/tools/images目录,用户可以根据自己需要把其他三通道格式的图片放置到该目录下进行测试。从服务端返回的结果图像保存在PaddleSeg/serving/tools目录下。
```bash
# 使用Python3.6,需要安装opencv-python、requests、numpy包(建议安装anaconda)
cd tools
vim image_seg_client.py (修改IMAGE_SEG_URL变量,改成服务端的ip地址)
python3.6 image_seg_client.py
# 当前目录可以看到生成出分割结果的图片。
# 当前目录可以看到生成出分割结果的图片。
```
## 3. 源码编译安装及搭建服务流程 (可选)
源码编译安装时间较长,一般推荐在centos7.6下安装预编译版本进行使用。如果您系统版本非centos7.6或者您想进行二次开发,请点击以下链接查看[源码编译安装流程](./COMPILE_GUIDE.md)
源码编译安装时间较长,一般推荐在centos7.6下安装预编译版本进行使用。如果您系统版本非centos7.6或者您想进行二次开发,请点击以下链接查看[源码编译安装流程](./COMPILE_GUIDE.md)
\ No newline at end of file
# Ubuntu系统下依赖项的安装教程
运行PaddleSegServing需要系统安装一些依赖库。在不同发行版本的Linux系统下,安装依赖项的具体命令略有不同,以下介绍在Ubuntu 16.07下安装依赖项的方法。
## 1. 安装ssl、go、python、bzip2、crypto.
```bash
sudo apt-get install golang-1.10 python2.7 libssl1.0.0 libssl-dev libssl-doc libcrypto++-dev libcrypto++-doc libcrypto++-utils libbz2-1.0 libbz2-dev
```
## 2. 为ssl、crypto、curl链接库添加软连接
```bash
ln -s /lib/x86_64-linux-gnu/libssl.so.1.0.0 /usr/lib/x86_64-linux-gnu/libssl.so
ln -s /lib/x86_64-linux-gnu/libcrypto.so.1.0.0 /usr/lib/x86_64-linux-gnu/libcrypto.so.10
ln -s /usr/lib/x86_64-linux-gnu/libcurl.so.4.4.0 /usr/lib/x86_64-linux-gnu/libcurl.so
```
## 3. 安装GPU依赖项(如果需要使用GPU预测,必须执行此步骤)
### 3.1. 安装配置CUDA 9.2以及cuDNN 7.1.4
方法与[预编译安装流程](README.md) 2.2.2.1节一样。
### 3.2. 安装nccl库(如果已安装nccl 2.4.7请忽略该步骤)
```bash
# 下载nccl相关的deb包
wget -c https://paddleseg.bj.bcebos.com/serving%2Fnccl-repo-ubuntu1604-2.4.8-ga-cuda9.2_1-1_amd64.deb
sudo apt-key add /var/nccl-repo-2.4.8-ga-cuda9.2/7fa2af80.pub
# 安装deb包
sudo dpkg -i nccl-repo-ubuntu1604-2.4.8-ga-cuda9.2_1-1_amd64.deb
# 更新索引
sudo apt update
# 安装nccl库
sudo apt-get install libnccl2 libnccl-dev
```
## 4. 安装cmake 3.15
如果机器没有安装cmake或者已安装cmake的版本低于3.0,请执行以下步骤
```bash
# 如果原来的已经安装低于3.0版本的cmake,请先卸载原有低版本 cmake
sudo apt-get autoremove cmake
```
其余安装cmake的流程请参考以下链接[预编译安装流程](README.md) 2.2.3节。
## 5. 安装PaddleSegServing
### 5.1. 下载并解压GPU版本PaddleSegServing
```bash
cd ~
wget -c https://paddleseg.bj.bcebos.com/serving%2Fpaddle_seg_serving_ubuntu16.07_gpu_cuda9.2.tar.gz
tar xvfz PaddleSegServing.ubuntu16.07_cuda9.2_gpu.tar.gz seg-serving
```
### 5.2. 下载并解压CPU版本PaddleSegServing
```bash
cd ~
wget -c https://paddleseg.bj.bcebos.com/serving%2Fpaddle_seg_serving_ubuntu16.07_cpu.tar.gz
tar xvfz PaddleSegServing.ubuntu16.07_cuda9.2_gpu.tar.gz seg-serving
```
## 6. gcc版本问题
在Ubuntu 16.07系统中,默认的gcc版本为5.4.0。而目前PaddleSegServing仅支持gcc 4.8编译,所以如果测试的机器gcc版本为5.4,请先进行降级(无需卸载原有的gcc)。
```bash
# 安装gcc 4.8
sudo apt-get install gcc-4.8
# 查看是否成功安装gcc4.8
ls /usr/bin/gcc*
# 设置gcc4.8的优先级,使其能被gcc命令优先连接gcc4.8
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 100
# 查看设置结果(非必须)
sudo update-alternatives --config gcc
```
--enable_model_toolkit
--seg_conf_file=./conf/seg_conf.yaml
--num_threads=1
--bthread_min_concurrency=4
--bthread_concurrency=4
engines {
name: "human_segmentation"
type: "FLUID_GPU_NATIVE"
type: "FLUID_GPU_ANALYSIS"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/deeplabv3p_xception65_humanseg"
......
......@@ -128,16 +128,30 @@ int ImageSegOp::inference() {
mask_raw[di] = label;
}
//cv::Mat mask_mat = cv::Mat(height, width, CV_32FC1);
cv::Mat mask_mat = cv::Mat(height, width, CV_8UC1);
mask_mat.data = mask_raw.data();
//scoremap
// mask_mat.data = reinterpret_cast<uchar *>(data + out_size);
//mask_mat.data = mask_raw.data();
std::vector<uchar> temp_mat(out_size, 0);
for(int i = 0; i < out_size; ++i){
temp_mat[i] = 255 * data[i + out_size];
}
mask_mat.data = temp_mat.data();
cv::Mat mask_temp_mat((*height_vec)[si], (*width_vec)[si], mask_mat.type());
//Size(cols, rows)
cv::resize(mask_mat, mask_temp_mat, mask_temp_mat.size());
// cv::resize(mask_mat, mask_temp_mat, cv::Size((*width_vec)[si], (*height_vec)[si]));
//debug
//for(int i = 0; i < (*height_vec)[si]; ++i){
// for(int j = 0; j < (*width_vec)[si]; ++j) {
// std::cout << mask_temp_mat.at<float>(i, j) << " ";
// }
// std::cout << std::endl;
//}
std::vector<uchar> mat_buff;
cv::imencode(".png", mask_temp_mat, mat_buff);
ins->set_mask(mat_buff.data(), mat_buff.size());
ins->set_mask(reinterpret_cast<char *>(mat_buff.data()), mat_buff.size());
}
// release out tensor object resource
......
......@@ -103,7 +103,8 @@ int ReaderOp::inference() {
const ImageSegReqItem& ins = req->instances(si);
// read dense image from request bytes
const char* binary = ins.image_binary().c_str();
size_t length = ins.image_length();
//size_t length = ins.image_length();
size_t length = ins.image_binary().length();
if (length == 0) {
LOG(ERROR) << "Empty image, length is 0";
return -1;
......
......@@ -50,7 +50,7 @@ int WriteJsonOp::inference() {
std::string err_string;
uint32_t batch_size = seg_out->item_size();
LOG(INFO) << "batch_size = " << batch_size;
LOG(INFO) << seg_out->ShortDebugString();
// LOG(INFO) << seg_out->ShortDebugString();
for (uint32_t si = 0; si < batch_size; si++) {
ResponseItem* ins = res->add_prediction();
//LOG(INFO) << "Original image width = " << seg_out->width(si) << ", height = " << seg_out->height(si);
......@@ -59,6 +59,7 @@ int WriteJsonOp::inference() {
return -1;
}
std::string* text = ins->mutable_info();
LOG(INFO) << seg_out->item(si).ShortDebugString();
if (!ProtoMessageToJson(seg_out->item(si), text, &err_string)) {
LOG(ERROR) << "Failed convert message["
<< seg_out->item(si).ShortDebugString()
......
# coding: utf-8
import sys
import os
import cv2
import requests
import json
......@@ -8,115 +7,96 @@ import base64
import numpy as np
import time
import threading
import re
#分割服务的地址
#IMAGE_SEG_URL = 'http://yq01-gpu-151-23-00.epc:8010/ImageSegService/inference'
#IMAGE_SEG_URL = 'http://106.12.25.202:8010/ImageSegService/inference'
IMAGE_SEG_URL = 'http://180.76.118.53:8010/ImageSegService/inference'
# 请求预测服务
# input_img 要预测的图片列表
def get_item_json(input_img):
with open(input_img, mode="rb") as fp:
# 使用 http 协议请求服务时, 请使用 base64 编码发送图片
item_binary_b64 = str(base64.b64encode(fp.read()), 'utf-8')
item_size = len(item_binary_b64)
item_json = {
"image_length": item_size,
"image_binary": item_binary_b64
}
return item_json
def request_predictor_server(input_img_list, dir_name):
data = {"instances" : [get_item_json(dir_name + input_img) for input_img in input_img_list]}
response = requests.post(IMAGE_SEG_URL, data=json.dumps(data))
try:
response = json.loads(response.text)
prediction_list = response["prediction"]
mask_response_list = [mask_response["info"] for mask_response in prediction_list]
mask_raw_list = [json.loads(mask_response)["mask"] for mask_response in mask_response_list]
except Exception as err:
print ("Exception[%s], server_message[%s]" % (str(err), response.text))
return None
# 使用 json 协议回复的包也是 base64 编码过的
mask_binary_list = [base64.b64decode(mask_raw) for mask_raw in mask_raw_list]
m = [np.fromstring(mask_binary, np.uint8) for mask_binary in mask_binary_list]
return m
# 对预测结果进行可视化
# input_raw_mask 是server返回的预测结果
# output_img 是可视化结果存储路径
def visualization(mask_mat, output_img):
# ColorMap for visualization more clearly
color_map = [[128, 64, 128],
[244, 35, 231],
[69, 69, 69],
[102, 102, 156],
[190, 153, 153],
[153, 153, 153],
[250, 170, 29],
[219, 219, 0],
[106, 142, 35],
[152, 250, 152],
[69, 129, 180],
[219, 19, 60],
[255, 0, 0],
[0, 0, 142],
[0, 0, 69],
[0, 60, 100],
[0, 79, 100],
[0, 0, 230],
[119, 10, 32]]
im = cv2.imdecode(mask_mat, 1)
w, h, c = im.shape
im2 = cv2.resize(im, (w, h))
im = im2
for i in range(0, h):
for j in range(0, w):
im[i, j] = color_map[im[i, j, 0]]
cv2.imwrite(output_img, im)
#benchmark test
def benchmark_test(batch_size, img_list):
start = time.time()
total_size = len(img_list)
for i in range(0, total_size, batch_size):
mask_mat_list = request_predictor_server(img_list[i : np.min([i + batch_size, total_size])], "images/")
# 将获得的mask matrix转换成可视化图像,并在当前目录下保存为图像文件
# 如果进行压测,可以把这句话注释掉
# for j in range(len(mask_mat_list)):
# visualization(mask_mat_list[j], img_list[j + i])
latency = time.time() - start
print("batch size = %d, total latency = %f s" % (batch_size, latency))
IMAGE_SEG_URL = 'http://xxx.xxx.xxx.xxx:8010/ImageSegService/inference'
class ClientThread(threading.Thread):
def __init__(self, thread_id, batch_size):
def __init__(self, thread_id, image_data_repo):
threading.Thread.__init__(self)
self.__thread_id = thread_id
self.__batch_size = batch_size
self.__image_data_repo = image_data_repo
def run(self):
self.request_image_seg_service(3)
self.__request_image_seg_service()
def request_image_seg_service(self, imgs_num):
total_size = imgs_num
img_list = [str(i + 1) + ".jpg" for i in range(total_size)]
# batch_size_list = [2**i for i in range(0, 4)]
def __request_image_seg_service(self):
# 持续发送150个请求
batch_size_list = [self.__batch_size] * 150
i = 1
for batch_size in batch_size_list:
for i in range(1, 151):
print("Epoch %d, thread %d" % (i, self.__thread_id))
i += 1
benchmark_test(batch_size, img_list)
self.__benchmark_test()
# benchmark test
def __benchmark_test(self):
start = time.time()
for image_filename in self.__image_data_repo:
mask_mat_list = self.__request_predictor_server(image_filename)
input_img = self.__image_data_repo.get_image_matrix(image_filename)
# 将获得的mask matrix转换成可视化图像,并在当前目录下保存为图像文件
# 如果进行压测,可以把这句话注释掉
for j in range(len(mask_mat_list)):
self.__visualization(mask_mat_list[j], image_filename, 2, input_img)
latency = time.time() - start
print("total latency = %f s" % (latency))
# 对预测结果进行可视化
# input_raw_mask 是server返回的预测结果
# output_img 是可视化结果存储路径
def __visualization(self, mask_mat, output_img, num_cls, input_img):
# ColorMap for visualization more clearly
n = num_cls
color_map = []
for j in range(n):
lab = j
a = b = c = 0
color_map.append([a, b, c])
i = 0
while lab:
color_map[j][0] |= (((lab >> 0) & 1) << (7 - i))
color_map[j][1] |= (((lab >> 1) & 1) << (7 - i))
color_map[j][2] |= (((lab >> 2) & 1) << (7 - i))
i += 1
lab >>= 3
im = cv2.imdecode(mask_mat, 1)
w, h, c = im.shape
im2 = cv2.resize(im, (w, h))
im = im2
# I = aF + (1-a)B
a = im / 255.0
im = a * input_img + (1 - a) * [255, 255, 255]
cv2.imwrite(output_img, im)
def __request_predictor_server(self, input_img):
data = {"instances": [self.__get_item_json(input_img)]}
response = requests.post(IMAGE_SEG_URL, data=json.dumps(data))
try:
response = json.loads(response.text)
prediction_list = response["prediction"]
mask_response_list = [mask_response["info"] for mask_response in prediction_list]
mask_raw_list = [json.loads(mask_response)["mask"] for mask_response in mask_response_list]
except Exception as err:
print("Exception[%s], server_message[%s]" % (str(err), response.text))
return None
# 使用 json 协议回复的包也是 base64 编码过的
mask_binary_list = [base64.b64decode(mask_raw) for mask_raw in mask_raw_list]
m = [np.fromstring(mask_binary, np.uint8) for mask_binary in mask_binary_list]
return m
# 请求预测服务
# input_img 要预测的图片列表
def __get_item_json(self, input_img):
# 使用 http 协议请求服务时, 请使用 base64 编码发送图片
item_binary_b64 = str(base64.b64encode(self.__image_data_repo.get_image_binary(input_img)), 'utf-8')
item_size = len(item_binary_b64)
item_json = {
"image_length": item_size,
"image_binary": item_binary_b64
}
return item_json
def create_thread_pool(thread_num, batch_size):
return [ClientThread(i + 1, batch_size) for i in range(thread_num)]
def create_thread_pool(thread_num, image_data_repo):
return [ClientThread(i + 1, image_data_repo) for i in range(thread_num)]
def run_threads(thread_pool):
......@@ -126,7 +106,35 @@ def run_threads(thread_pool):
for thread in thread_pool:
thread.join()
class ImageDataRepo:
def __init__(self, dir_name):
print("Loading images data...")
self.__data = {}
pattern = re.compile(".+\.(jpg|jpeg)", re.I)
if os.path.isdir(dir_name):
for image_filename in os.listdir(dir_name):
if pattern.match(image_filename):
full_path = os.path.join(dir_name, image_filename)
fp = open(full_path, mode="rb")
image_binary_data = fp.read()
image_mat_data = cv2.imread(full_path)
self.__data[image_filename] = (image_binary_data, image_mat_data)
else:
raise Exception("Please use directory to initialize");
print("Finish loading.")
def __iter__(self):
for filename in self.__data:
yield filename
def get_image_binary(self, image_name):
return self.__data[image_name][0]
def get_image_matrix(self, image_name):
return self.__data[image_name][1]
if __name__ == "__main__":
thread_pool = create_thread_pool(thread_num=2, batch_size=1)
#preprocess
IDR = ImageDataRepo("images")
thread_pool = create_thread_pool(thread_num=1, image_data_repo=IDR)
run_threads(thread_pool)
EVAL_CROP_SIZE: (1536, 576) # (width, height), for unpadding rangescaling and stepscaling
TRAIN_CROP_SIZE: (1536, 576) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (1536, 576) # (width, height), for unpadding
INF_RESIZE_VALUE: 1280 # for rangescaling
MAX_RESIZE_VALUE: 1024 # for rangescaling
MIN_RESIZE_VALUE: 1536 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
RICH_CROP:
ENABLE: False
ASPECT_RATIO: 0.33
BLUR: True
BLUR_RATIO: 0.1
FLIP: True
FLIP_RATIO: 0.2
MAX_ROTATION: 15
MIN_AREA_RATIO: 0.5
BRIGHTNESS_JITTER_RATIO: 0.5
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 1
MEAN: [127.5, 127.5, 127.5]
STD: [127.5, 127.5, 127.5]
DATASET:
DATA_DIR: "./dataset/line/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 2
TEST_FILE_LIST: "./dataset/line/test_list.txt"
SEPARATOR: " "
IGNORE_INDEX: 255
FREEZE:
MODEL_FILENAME: "__model__"
PARAMS_FILENAME: "__params__"
SAVE_DIR: "line_freeze_model"
MODEL:
DEFAULT_NORM_TYPE: "bn"
MODEL_NAME: "deeplabv3p"
DEEPLAB:
BACKBONE: "mobilenet"
TEST:
TEST_MODEL: "./test/models/line/"
TRAIN:
MODEL_SAVE_DIR: "snapshots/line_v4/"
PRETRAINED_MODEL: "./models/deeplabv3p_mobilenetv2_init/"
RESUME: False
SNAPSHOT_EPOCH: 40
SOLVER:
LR: 0.01
LR_POLICY: "poly"
OPTIMIZER: "sgd"
SNAPSHOT: 10
......@@ -23,8 +23,6 @@ AUG:
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 4
MEAN: [0.5, 0.5, 0.5]
STD: [0.5, 0.5, 0.5]
DATASET:
DATA_DIR: "./dataset/cityscapes/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
......
TRAIN_CROP_SIZE: (513, 513) # (width, height), for unpadding rangescaling and stepscaling
EVAL_CROP_SIZE: (513, 513) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: u"unpadding" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (513, 513) # (width, height), for unpadding
INF_RESIZE_VALUE: 513 # for rangescaling
MAX_RESIZE_VALUE: 400 # for rangescaling
MIN_RESIZE_VALUE: 513 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
RICH_CROP:
ENABLE: True
ASPECT_RATIO: 0
BLUR: True
BLUR_RATIO: 0.1
FLIP: True
FLIP_RATIO: 0.2
MAX_ROTATION: 45
MIN_AREA_RATIO: 0
BRIGHTNESS_JITTER_RATIO: 0.5
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 24
MEAN: [104.008, 116.669, 122.675]
STD: [1.0, 1.0, 1.0]
DATASET:
DATA_DIR: u"./data/humanseg/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 2
TEST_FILE_LIST: u"data/humanseg/list/val.txt"
TRAIN_FILE_LIST: u"data/humanseg/list/train.txt"
VAL_FILE_LIST: u"data/humanseg/list/val.txt"
IGNORE_INDEX: 255
SEPARATOR: "|"
FREEZE:
MODEL_FILENAME: "__model__"
PARAMS_FILENAME: "__params__"
SAVE_DIR: "human_freeze_model"
MODEL:
DEFAULT_NORM_TYPE: u"bn"
MODEL_NAME: "deeplabv3p"
DEEPLAB:
BACKBONE: "xception_65"
TEST:
TEST_MODEL: "snapshots/humanseg/aic_v2/final/"
TRAIN:
MODEL_SAVE_DIR: "snapshots/humanseg/aic_v2/"
PRETRAINED_MODEL: "pretrain/xception65_pretrained/"
RESUME: False
SNAPSHOT_EPOCH: 5
SOLVER:
LR: 0.1
NUM_EPOCHS: 40
LR_POLICY: "poly"
OPTIMIZER: "sgd"
EVAL_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling
TRAIN_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding
INF_RESIZE_VALUE: 500 # for rangescaling
MAX_RESIZE_VALUE: 600 # for rangescaling
MIN_RESIZE_VALUE: 400 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
RICH_CROP:
ENABLE: False
ASPECT_RATIO: 0.33
BLUR: True
BLUR_RATIO: 0.1
FLIP: True
FLIP_RATIO: 0.2
MAX_ROTATION: 15
MIN_AREA_RATIO: 0.5
BRIGHTNESS_JITTER_RATIO: 0.5
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 10
#MEAN: [104.008, 116.669, 122.675]
#STD: [1.0, 1.0, 1.0]
MEAN: [127.5, 127.5, 127.5]
STD: [127.5, 127.5, 127.5]
DATASET:
DATA_DIR: "./data/COCO2014/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 21
TEST_FILE_LIST: "data/COCO2014/ImageSets/val.txt"
TRAIN_FILE_LIST: "data/COCO2014/ImageSets/train.txt"
VAL_FILE_LIST: "data/COCO2014/ImageSets/val.txt"
SEPARATOR: "|"
IGNORE_INDEX: 255
FREEZE:
MODEL_FILENAME: "model"
PARAMS_FILENAME: "params"
MODEL:
DEFAULT_NORM_TYPE: "bn"
MODEL_NAME: "unet"
UNET:
UPSAMPLE_MODE: "bilinear"
TEST:
TEST_MODEL: "snapshots/coco_v1/"
TRAIN:
MODEL_SAVE_DIR: "snapshots/coco_v1/"
PRETRAINED_MODEL: ""
RESUME: False
SNAPSHOT_EPOCH: 10
SOLVER:
LR: 0.025
WEIGHT_DECAY: 0.00004
NUM_EPOCHS: 50
LR_POLICY: "piecewise"
OPTIMIZER: "Adam"
DECAY_EPOCH: "20,35,45"
......@@ -25,8 +25,6 @@ AUG:
CONTRAST_JITTER_RATIO: 0.5
SATURATION_JITTER_RATIO: 0.5
BATCH_SIZE: 6
MEAN: [104.008, 116.669, 122.675]
STD: [1.0, 1.0, 1.0]
DATASET:
DATA_DIR: "./dataset/pet/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册