未验证 提交 802222ae 编写于 作者: C cc 提交者: GitHub

[Seg] Add PP-LiteSeg (Doc & APP) and PP-HumanSeg (APP) (#5576)

* Add PP-LiteSeg to Model Center

* Add APP for PP-HumanSegV2 and PP-LiteSeg
上级 6c1080cd
import codecs
import os
import sys
import time
import zipfile
import gradio as gr
import numpy as np
import cv2
import requests
import yaml
from paddle.inference import Config as PredictConfig
from paddle.inference import create_predictor
lasttime = time.time()
FLUSH_INTERVAL = 0.1
def progress(str, end=False):
global lasttime
if end:
str += "\n"
lasttime = 0
if time.time() - lasttime >= FLUSH_INTERVAL:
sys.stdout.write("\r%s" % str)
lasttime = time.time()
sys.stdout.flush()
def _download_file(url, savepath, print_progress=True):
if print_progress:
print("Connecting to {}".format(url))
r = requests.get(url, stream=True, timeout=15)
total_length = r.headers.get('content-length')
if total_length is None:
with open(savepath, 'wb') as f:
shutil.copyfileobj(r.raw, f)
else:
with open(savepath, 'wb') as f:
dl = 0
total_length = int(total_length)
starttime = time.time()
if print_progress:
print("Downloading %s" % os.path.basename(savepath))
for data in r.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if print_progress:
done = int(50 * dl / total_length)
progress("[%-50s] %.2f%%" %
('=' * done, float(100 * dl) / total_length))
if print_progress:
progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
def uncompress(path):
files = zipfile.ZipFile(path, 'r')
filelist = files.namelist()
rootpath = filelist[0]
for file in filelist:
files.extract(file, './')
class DeployConfig:
def __init__(self, path):
with codecs.open(path, 'r', 'utf-8') as file:
self.dic = yaml.load(file, Loader=yaml.FullLoader)
self._dir = os.path.dirname(path)
@property
def model(self):
return os.path.join(self._dir, self.dic['Deploy']['model'])
@property
def params(self):
return os.path.join(self._dir, self.dic['Deploy']['params'])
class Predictor:
def __init__(self, cfg):
"""
Prepare for prediction.
The usage and docs of paddle inference, please refer to
https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
"""
self.cfg = DeployConfig(cfg)
self._init_base_config()
self._init_cpu_config()
self.predictor = create_predictor(self.pred_cfg)
def _init_base_config(self):
self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
self.pred_cfg.enable_memory_optim()
self.pred_cfg.switch_ir_optim(True)
def _init_cpu_config(self):
"""
Init the config for x86 cpu.
"""
self.pred_cfg.disable_gpu()
self.pred_cfg.set_cpu_math_library_num_threads(10)
def _preprocess(self, img):
# resize to (256, 144).
img = cv2.resize(img, (256, 144))
img = (img / 255 - 0.5) / 0.5
img = np.transpose(img, [2, 0, 1])[np.newaxis, :]
return img
def run(self, img):
input_names = self.predictor.get_input_names()
input_handle = {}
for i in range(len(input_names)):
input_handle[input_names[i]] = self.predictor.get_input_handle(
input_names[i])
output_names = self.predictor.get_output_names()
output_handle = self.predictor.get_output_handle(output_names[0])
img_inputs = img.astype('float32')
ori_h, ori_w = img_inputs.shape[:2]
img_inputs = self._preprocess(img=img_inputs)
input_handle[input_names[0]].copy_from_cpu(img_inputs)
self.predictor.run()
results = output_handle.copy_to_cpu()
alpha = results[0,1, :, :].squeeze()
alpha = cv2.resize(alpha, (ori_w, ori_h))
alpha = (alpha * 255).astype('uint8')
rgba = np.concatenate([img, alpha[:,:,np.newaxis]], axis=-1)
return rgba
def model_inference(image):
# Download inference model
url = 'https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/portrait_pp_humansegv2_lite_256x144_smaller/portrait_pp_humansegv2_lite_256x144_inference_model_with_softmax.zip'
savepath = './portrait_pp_humansegv2_lite_256x144_inference_model_with_softmax.zip'
if not os.path.exists('./portrait_pp_humansegv2_lite_256x144_inference_model_with_softmax'):
_download_file(url=url, savepath=savepath)
uncompress(savepath)
# Inference
predictor = Predictor(cfg='./portrait_pp_humansegv2_lite_256x144_inference_model_with_softmax/deploy.yaml')
alpha = predictor.run(image)
return alpha
def clear_all():
return None, None
with gr.Blocks() as demo:
gr.Markdown("Segmentation")
with gr.Column(scale=1, min_width=100):
img_in = gr.Image(
value="https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/portrait_heng.jpg",
label="Input")
with gr.Row():
btn1 = gr.Button("Clear")
btn2 = gr.Button("Submit")
img_out = gr.Image(label="Output").style(height=200)
btn2.click(fn=model_inference, inputs=img_in, outputs=[img_out])
btn1.click(fn=clear_all, inputs=None, outputs=[img_in, img_out])
gr.Button.style(1)
demo.launch(share=True)
【PP-HumanSegV2-App-YAML】
APP_Info:
title: PP-HumanSegV2-App
colorFrom: blue
colorTo: yellow
sdk: gradio
sdk_version: 3.4.1
app_file: app.py
license: apache-2.0
device: cpu
\ No newline at end of file
gradio
paddlepaddle
opencv-python
pyyaml >= 5.1
paddleseg
\ No newline at end of file
......@@ -115,6 +115,7 @@
"source": [
"# 安装PaddleSeg\n",
"%cd ~/PaddleSeg\n",
"!git checkout release/2.6\n",
"!pip install -v -e ."
]
},
......
import codecs
import os
import sys
import time
import zipfile
import gradio as gr
import numpy as np
import cv2
import requests
import yaml
from paddle.inference import Config as PredictConfig
from paddle.inference import create_predictor
from PIL import Image as PILImage
lasttime = time.time()
FLUSH_INTERVAL = 0.1
def progress(str, end=False):
global lasttime
if end:
str += "\n"
lasttime = 0
if time.time() - lasttime >= FLUSH_INTERVAL:
sys.stdout.write("\r%s" % str)
lasttime = time.time()
sys.stdout.flush()
def _download_file(url, savepath, print_progress=True):
if print_progress:
print("Connecting to {}".format(url))
r = requests.get(url, stream=True, timeout=15)
total_length = r.headers.get('content-length')
if total_length is None:
with open(savepath, 'wb') as f:
shutil.copyfileobj(r.raw, f)
else:
with open(savepath, 'wb') as f:
dl = 0
total_length = int(total_length)
starttime = time.time()
if print_progress:
print("Downloading %s" % os.path.basename(savepath))
for data in r.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if print_progress:
done = int(50 * dl / total_length)
progress("[%-50s] %.2f%%" %
('=' * done, float(100 * dl) / total_length))
if print_progress:
progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
def uncompress(path):
files = zipfile.ZipFile(path, 'r')
filelist = files.namelist()
rootpath = filelist[0]
for file in filelist:
files.extract(file, './')
class DeployConfig:
def __init__(self, path):
with codecs.open(path, 'r', 'utf-8') as file:
self.dic = yaml.load(file, Loader=yaml.FullLoader)
self._dir = os.path.dirname(path)
@property
def model(self):
return os.path.join(self._dir, self.dic['Deploy']['model'])
@property
def params(self):
return os.path.join(self._dir, self.dic['Deploy']['params'])
class Predictor:
def __init__(self, cfg):
"""
Prepare for prediction.
The usage and docs of paddle inference, please refer to
https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
"""
self.cfg = DeployConfig(cfg)
self._init_base_config()
self._init_cpu_config()
self.predictor = create_predictor(self.pred_cfg)
def _init_base_config(self):
self.pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
self.pred_cfg.enable_memory_optim()
self.pred_cfg.switch_ir_optim(True)
def _init_cpu_config(self):
"""
Init the config for x86 cpu.
"""
self.pred_cfg.disable_gpu()
self.pred_cfg.set_cpu_math_library_num_threads(10)
def _preprocess(self, img):
# resize to (256, 144).
img = (img / 255 - 0.5) / 0.5
img = np.transpose(img, [2, 0, 1])[np.newaxis, :]
return img
def get_pseudo_color_map(self, pred, color_map=None):
"""
Get the pseudo color image.
Args:
pred (numpy.ndarray): the origin predicted image.
color_map (list, optional): the palette color map. Default: None,
use paddleseg's default color map.
Returns:
(numpy.ndarray): the pseduo image.
"""
pred_mask = PILImage.fromarray(pred.astype(np.uint8), mode='P')
if color_map is None:
color_map = self.get_color_map_list(256)
pred_mask.putpalette(color_map)
return pred_mask
def get_color_map_list(self, num_classes, custom_color=None):
"""
Returns the color map for visualizing the segmentation mask,
which can support arbitrary number of classes.
Args:
num_classes (int): Number of classes.
custom_color (list, optional): Save images with a custom color map. Default: None, use paddleseg's default color map.
Returns:
(list). The color map.
"""
num_classes += 1
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
color_map = color_map[3:]
if custom_color:
color_map[:len(custom_color)] = custom_color
return color_map
def run(self, img):
input_names = self.predictor.get_input_names()
input_handle = {}
for i in range(len(input_names)):
input_handle[input_names[i]] = self.predictor.get_input_handle(
input_names[i])
output_names = self.predictor.get_output_names()
output_handle = self.predictor.get_output_handle(output_names[0])
img_inputs = img.astype('float32')
ori_h, ori_w = img_inputs.shape[:2]
img_inputs = self._preprocess(img=img_inputs)
input_handle[input_names[0]].copy_from_cpu(img_inputs)
self.predictor.run()
results = output_handle.copy_to_cpu()
result = results[0].squeeze().astype('uint8')
result = cv2.resize(result, (ori_w, ori_h), interpolation=cv2.INTER_NEAREST)
result = self.get_pseudo_color_map(result)
# result = (result * 10).astype('uint8')
return result
def model_inference(image):
# Download inference model
url = 'https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model.zip'
savepath = './pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model.zip'
if not os.path.exists('./pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model'):
_download_file(url=url, savepath=savepath)
uncompress(savepath)
# Inference
predictor = Predictor(cfg='./pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model/deploy.yaml')
alpha = predictor.run(image)
return alpha
def clear_all():
return None, None
with gr.Blocks() as demo:
gr.Markdown("Segmentation")
with gr.Column(scale=1, min_width=100):
img_in = gr.Image(
value="https://user-images.githubusercontent.com/48357642/201077761-3ebeda52-b15d-4913-b64c-0798d1f922a5.png",
label="Input")
with gr.Row():
btn1 = gr.Button("Clear")
btn2 = gr.Button("Submit")
img_out = gr.Image(label="Output").style(height=200)
btn2.click(fn=model_inference, inputs=img_in, outputs=[img_out])
btn1.click(fn=clear_all, inputs=None, outputs=[img_in, img_out])
gr.Button.style(1)
demo.launch(share=True)
【PP-LiteSeg-App-YAML】
APP_Info:
title: PP-LiteSeg-App
colorFrom: blue
colorTo: yellow
sdk: gradio
sdk_version: 3.4.1
app_file: app.py
license: apache-2.0
device: cpu
\ No newline at end of file
gradio
paddlepaddle
opencv-python
pyyaml >= 5.1
paddleseg
## 1. 推理 Benchmark
### 1.1 软硬件环境
* 语义分割模型的精度mIoU:针对Cityscapes数据集,使用PaddleSeg进行训练和测试。
* 语义分割模型的速度FPS:硬件是Nvidia GPU (1080Ti),为了和其他方法保持相同的,首先使用[脚本](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/deploy/python/infer_onnx_trt.py)将模型转为ONNX格式,然后使用原生TRT预测引擎进行测试。
### 1.2 数据集
* 使用Cityscapes开源数据集进行测试。
### 1.3 指标
<div align="center">
|模型|编码器|输入图像分辨率|精度mIoU(Val)|精度mIoU(Test)|速度FPS|
|-|-|-|-|-|-|
ESPNet | ESPNet | 512x1024 | - | 60.3 | 112.9 |
ESPNetV2 | ESPNetV2 | 512x1024 | 66.4 | 66.2 | - |
SwiftNet | ResNet18 | 1024x2048 | 75.4 | 75.5 | 39.9 |
BiSeNetV1 | Xception39 | 768x1536 | 69.0 | 68.4 | 105.8 |
BiSeNetV1-L | ResNet18 | 768x1536 | 74.8 | 74.7 | 65.5 |
BiSeNetV2 | - | 512x1024 | 73.4 | 72.6 | 156 |
BiSeNetV2-L | - | 512x1024 | 75.8 | 75.3 | 47.3 |
FasterSeg | - | 1024x2048 | 73.1 | 71.5 | 163.9 |
SFNet | DF1 | 1024x2048 | - | 74.5 | 121 |
STDC1-Seg50 | STDC1 | 512x1024 | 72.2 | 71.9 | 250.4 |
STDC2-Seg50 | STDC2 | 512x1024 | 74.2 | 73.4 | 188.6 |
STDC1-Seg75 | STDC1 | 768x1536 | 74.5 | 75.3 | 126.7 |
STDC2-Seg75 | STDC2 | 768x1536 | 77.0 | 76.8 | 97.0 |
PP-LiteSeg-T1 | STDC1 | 512x1024 | 73.1 | 72.0 | 273.6 |
PP-LiteSeg-B1 | STDC2 | 512x1024 | 75.3 | 73.9 | 195.3 |
PP-LiteSeg-T2 | STDC1 | 768x1536 | 76.0 | 74.9 | 143.6 |
PP-LiteSeg-B2 | STDC2 | 768x1536 | 78.2 | 77.5 | 102.6|
</div>
<div align="center">
<img src="https://user-images.githubusercontent.com/52520497/162148733-70be896a-eadb-4790-94e5-f48dad356b2d.png" width = "500" height = "430" alt="iou_fps" />
</div>
## 2. 相关使用说明
1. https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/configs/pp_liteseg
# 模型列表
## 1 Cityscapes上语义分割模型
| 模型名 | 骨干网络 | 训练迭代次数 | 训练输入尺寸 | 预测输入尺寸 | 精度mIoU | 精度mIoU(flip) | 精度mIoU(ms+flip) | 下载链接 |
| --- | --- | --- | ---| --- | --- | --- | --- | --- |
|PP-LiteSeg-T|STDC1|160000|1024x512|1025x512|73.10%|73.89%|-|[config](./pp_liteseg_stdc1_cityscapes_1024x512_scale0.5_160k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/pp_liteseg_stdc1_cityscapes_1024x512_scale0.5_160k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc1_cityscapes_1024x512_scale0.5_160k_inference_model.zip)|
|PP-LiteSeg-T|STDC1|160000|1024x512|1536x768|76.03%|76.74%|-|[config](./pp_liteseg_stdc1_cityscapes_1024x512_scale0.75_160k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/pp_liteseg_stdc1_cityscapes_1024x512_scale0.75_160k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc1_cityscapes_1024x512_scale0.75_160k_inference_model.zip)|
|PP-LiteSeg-T|STDC1|160000|1024x512|2048x1024|77.04%|77.73%|77.46%|[config](./pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model.zip)|
|PP-LiteSeg-B|STDC2|160000|1024x512|1024x512|75.25%|75.65%|-|[config](./pp_liteseg_stdc2_cityscapes_1024x512_scale0.5_160k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/pp_liteseg_stdc2_cityscapes_1024x512_scale0.5_160k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc2_cityscapes_1024x512_scale0.5_160k_inference_model.zip)|
|PP-LiteSeg-B|STDC2|160000|1024x512|1536x768|78.75%|79.23%|-|[config](./pp_liteseg_stdc2_cityscapes_1024x512_scale0.75_160k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/pp_liteseg_stdc2_cityscapes_1024x512_scale0.75_160k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc2_cityscapes_1024x512_scale0.75_160k_inference_model.zip)|
|PP-LiteSeg-B|STDC2|160000|1024x512|2048x1024|79.04%|79.52%|79.85%|[config](./pp_liteseg_stdc2_cityscapes_1024x512_scale1.0_160k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/cityscapes/pp_liteseg_stdc2_cityscapes_1024x512_scale1.0_160k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc2_cityscapes_1024x512_scale1.0_160k_inference_model.zip)|
## 2 CamVid上语义分割模型
| 模型名 | 骨干网络 | 训练迭代次数 | 训练输入尺寸 | 预测输入尺寸 | 精度mIoU | 精度mIoU(flip) | 精度mIoU(ms+flip) | 下载链接 |
| --- | --- | --- | ---| --- | --- | --- | --- | --- |
|PP-LiteSeg-T|STDC1|10000|960x720|960x720|73.30%|73.89%|73.66%|[config](./pp_liteseg_stdc1_camvid_960x720_10k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/camvid/pp_liteseg_stdc1_camvid_960x720_10k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc1_camvid_960x720_10k_inference_model.zip)|
|PP-LiteSeg-B|STDC2|10000|960x720|960x720|75.10%|75.85%|75.48%|[config](./pp_liteseg_stdc2_camvid_960x720_10k.yml)\|[训练模型](https://paddleseg.bj.bcebos.com/dygraph/camvid/pp_liteseg_stdc2_camvid_960x720_10k/model.pdparams)\|[预测模型](https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc2_camvid_960x720_10k_inference_model.zip)|
---
Model_Info:
name: "PP-LiteSeg"
description: "PP-LiteSeg轻量级实时语义分割SOTA模型"
description_en: "PP-LiteSeg real-time semantic segmentation model with SOTA performance"
icon: "@后续UE统一设计之后,会存到bos上某个位置"
from_repo: "PaddleSeg"
Task:
- tag_en: "Computer Vision"
tag: "计算机视觉"
sub_tag_en: "Image Segmentation"
sub_tag: "图像分割"
Example:
- tag_en: "工业/能源"
tag: "工业/能源"
sub_tag_en: "缺陷检测"
title: "【PaddleSeg实践范例】使用PP-LiteSeg进行质检缺陷分割"
sub_tag: "缺陷检测"
url: "https://aistudio.baidu.com/aistudio/projectdetail/3877107"
- tag_en: "智慧交通"
tag: "智慧交通"
sub_tag_en: "语义分割"
title: "【PaddleSeg实践范例】使用PP-LiteSeg进行遥感道路分割"
sub_tag: "语义分割"
url: "https://aistudio.baidu.com/aistudio/projectdetail/3873145"
Datasets: "Cityscapes, CamVid"
Pulisher: "Baidu"
License: "apache.2.0"
Paper:
- title: "PP-LiteSeg: A Superior Real-Time Semantic Segmentation Model"
url: "https://arxiv.org/abs/2204.02681"
IfTraining: 1
IfOnlineDemo: 1
{
"cells": [
{
"cell_type": "markdown",
"id": "ff1ef672-bc9d-48ae-aa31-267f24026f6a",
"metadata": {},
"source": [
"## 1. PP-LiteSeg模型简介\n",
"\n",
"语义分割作为视觉三大任务之一,在实际应用中具有广泛的需求。尽管基于深度学习的语义分割技术取得了重大进展,但是有时候语义分割模型的精度和性能无法同时满足业务需求。\n",
"\n",
"针对上述问题,PaddleSeg团队提出了一个新的轻量级实时语义分割模型PP-LiteSeg。具体来说,PP-LiteSeg模型中提出了轻量级解码器(FLD),以减少解码器的计算开销。为了加强特征表示,我们提出了统一注意力融合模块(UAFM),该模块利用空间和通道注意力来产生权重,然后将输入特征与权重融合。此外,我们提出了简易金字塔池化模块(SPPM),以低计算聚合全局上下文。\n",
"\n",
"在Cityscapes测试集上使用NVIDIA GTX 1080Ti进行实验,PP-LiteSeg的精度和速度可以达到 72.0% mIoU / 273.6 FPS 以及 77.5% mIoU / 102.6 FPS。与其他模型相比,PP-LiteSeg在精度和速度之间实现了SOTA平衡。\n",
"\n",
"PP-LiteSeg模型由飞桨官方出品,是PaddleSeg团队推出的SOTA模型。 更多关于PaddleSeg可以点击 https://github.com/PaddlePaddle/PaddleSeg 进行了解。"
]
},
{
"cell_type": "markdown",
"id": "55360c7a-3191-40bf-99c5-64c1c3d89967",
"metadata": {},
"source": [
"## 2. 模型效果及应用场景\n",
"\n",
"### 2.1 实时语义分割任务\n",
"\n",
"#### 2.1.1 数据集\n",
"\n",
"数据集以Cityscapes为主,分为训练集和测试集。\n",
"\n",
"#### 2.1.2 模型效果速览\n",
"\n",
"PP-LiteSeg模型在测试图片上的分割效果如下。\n",
"\n",
"原图:\n",
"<div align=\"center\">\n",
"<img src=\"https://user-images.githubusercontent.com/48357642/201077761-3ebeda52-b15d-4913-b64c-0798d1f922a5.png\" width = \"60%\" />\n",
"</div>\n",
"\n",
"分割后的图:\n",
"<div align=\"center\">\n",
"<img src=\"https://user-images.githubusercontent.com/48357642/201077985-29954838-9df6-4ab4-9f91-23e9a20be513.png\" width = \"60%\" />\n",
"</div>"
]
},
{
"cell_type": "markdown",
"id": "be5dce27-7842-4af5-8ba7-8a1d71b31680",
"metadata": {},
"source": [
"## 3. 模型如何使用\n",
"\n",
"### 3.1 模型推理\n",
"\n",
"* 安装PaddlePaddle\n",
"\n",
"安装PaddlePaddle,要求PaddlePaddle >= 2.2.0。由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用。\n",
"\n",
"在AIStudio中,大家选择可以直接选择安装好PaddlePaddle的环境。 如果需要执行安装PaddlePaddle,请参考PaddlePaddle官网。\n",
" \n"
]
},
{
"cell_type": "markdown",
"id": "99436a4d-9c54-4b5e-b494-68424a09d7a5",
"metadata": {},
"source": [
"\n",
"* 下载PaddleSeg\n",
"\n",
"(不在Jupyter Notebook上运行时需要将\"!\"或者\"%\"去掉。)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a16f87a6-85ea-4050-a698-ea634db9c235",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"%cd ~\n",
"!git clone https://gitee.com/PaddlePaddle/PaddleSeg.git"
]
},
{
"cell_type": "markdown",
"id": "846586a0-456d-49da-a4d3-6192f26c2e01",
"metadata": {},
"source": [
"* 安装PaddleSeg"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a234d0dc-a4bd-48b2-af1b-168538b6d9b6",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"# 安装PaddleSeg\n",
"%cd ~/PaddleSeg\n",
"!git checkout release/2.6\n",
"!pip install -v -e ."
]
},
{
"cell_type": "markdown",
"id": "01b72385-c22e-414a-8d3e-5fcaea6e34b4",
"metadata": {},
"source": [
"* 快速体验"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b63e945-38d8-45dd-a066-3b5c05fc06a2",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"# 下载模型\n",
"!wget https://paddleseg.bj.bcebos.com/inference/pp_liteseg_infer_models/pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model.zip\n",
"!unzip pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model.zip\n",
"# 下载测试图片\n",
"!wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png\n",
"# 预测\n",
"!python deploy/python/infer.py \\\n",
" --config ./pp_liteseg_stdc1_cityscapes_1024x512_scale1.0_160k_inference_model/deploy.yaml \\\n",
" --image_path ./cityscapes_demo.png \\\n",
" --save_dir output/result"
]
},
{
"cell_type": "markdown",
"id": "5d99268c-5dcf-47bb-97f7-c11b54ebed48",
"metadata": {},
"source": [
"结果保存在`PaddleSeg/output/result/cityscapes_demo.png`(如下图)。\n",
"\n",
"<div align=\"center\">\n",
"<img src=\"https://user-images.githubusercontent.com/48357642/201077985-29954838-9df6-4ab4-9f91-23e9a20be513.png\" width = \"60%\" />\n",
"</div>\n"
]
},
{
"cell_type": "markdown",
"id": "40e633a3-8f48-4883-8de7-fc88b3cb7ea7",
"metadata": {},
"source": [
"### 3.2 模型训练\n",
"\n",
"* 准备\n",
"\n",
"参考前文,安装PaddleSeg。参考[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)文档准备Cityscapes数据集,整理如下。\n",
"\n",
"```\n",
"PaddleSeg/data\n",
"├── cityscapes\n",
"│   ├── gtFine\n",
"│   ├── infer.list\n",
"│   ├── leftImg8bit\n",
"│   ├── test.list\n",
"│   ├── train.list\n",
"│   ├── trainval.list\n",
"│   └── val.list\n",
"```\n"
]
},
{
"cell_type": "markdown",
"id": "4fd01e45-577f-4357-af98-26d815403ea8",
"metadata": {},
"source": [
"* 训练\n",
"\n",
"PP-LiteSeg模型的配置文件保存在`PaddleSeg/configs/pp_liteseg/`下。使用train.py脚本,我们设置相应的配置文件并开始训练模型。\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "034f5d95-61c4-4ee0-9445-e432ba04b366",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!export CUDA_VISIBLE_DEVICES=0,1,2,3\n",
"!python -m paddle.distributed.launch train.py \\\n",
" --config configs/pp_liteseg/pp_liteseg_stdc1_cityscapes_1024x512_scale0.5_160k.yml \\\n",
" --save_dir output/pp_liteseg_stdc1_cityscapes_1024x512_scale0.5_160k \\\n",
" --save_interval 1000 \\\n",
" --num_workers 3 \\\n",
" --do_eval \\\n",
" --use_vdl"
]
},
{
"cell_type": "markdown",
"id": "f005c439-6677-439a-9e2d-9317f084d303",
"metadata": {},
"source": [
"训练完成后,模型权重保存在`PaddleSeg/output/xxx/best_model/model.pdparams`中。\n",
"\n",
"模型训练的详细文档,可参考[模型训练](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/train/train.md)。"
]
},
{
"cell_type": "markdown",
"id": "3abb9723-e839-407f-a3dc-e9ed6bca5f45",
"metadata": {},
"source": [
"## 4. 模型原理\n",
"\n",
"PP-LiteSeg模型的结构如下图。\n",
"\n",
"<div align=\"center\">\n",
"<img src=\"https://user-images.githubusercontent.com/52520497/162148786-c8b91fd1-d006-4bad-8599-556daf959a75.png\" width = \"50%\" />\n",
"</div>\n",
"\n",
"* 提出了灵活轻量的解码器(FLD)\n",
"\n",
"我们提出的灵活、轻量级的解码器(FLD),在增大特征图空间尺寸的时候,逐渐减少通道数。此外,FLD的计算量可以很容易地根据编码器进行调整。灵活的设计减轻了解码器的冗余,平衡了编码器和解码器的算量,使整个模型更高效。\n",
"\n",
"* 提出了统一注意力融合模块(UAFM)\n",
"\n",
"加强特征表达是提高分割精度的关键方法,大家通常通过融合解码器中的低层细节特征和深层语义特征来实现。然而,现有方法中的融合模块通常具有较高的计算成本。我们提出了统一的注意力融合模块(UAFM)来有效地增强特征表示。在UAFM中,有两种注意力模块,即通道和空间注意力模块。UAFM模块利用通道和空间注意力来增强特征表示。\n",
"\n",
"* 提出了简易金字塔池化模块(SPPM)\n",
"\n",
"上下文聚合是提高分割精度的另一个关键,但以前的聚合模块对于实时网络来说非常耗时。我们设计了一个简易的金字塔池模块(SPPM),该模块减少了特征图的中间通道和输出通道,删除了short cut链接,并用加法操作取代了级联操作。实验结果表明,SPPM以较小的额外推理时间提高了分割精度。\n",
"\n",
"在Cityscapes和CamVid数据集上,我们做了大量实验来评估PP-LiteSeg模型。PP-LiteSeg模型在分割精度和推理速度之间实现了最佳权衡。具体来说,PP-LiteSeg在Cityscapes测试集上实现了72.0% mIoU / 273.6 FPS 和 77.5% mIoU / 102.6 FPS。\n"
]
},
{
"cell_type": "markdown",
"id": "b23dc65f-eff4-4cad-98c8-9856c5cb9ee1",
"metadata": {},
"source": [
"## 5. 学术引用\n",
"\n",
"如果我们的项目在学术上帮助到你,请考虑以下引用:\n",
"\n",
"```\n",
"@article{peng2022pp-liteseg,\n",
" title={PP-LiteSeg: A Superior Real-Time Semantic Segmentation Model},\n",
" author={Juncai Peng, Yi Liu, Shiyu Tang, Yuying Hao, Lutao Chu, Guowei Chen, Zewu Wu, Zeyu Chen, Zhiliang Yu, Yuning Du, Qingqing Dang,Baohua Lai, Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma.},\n",
" journal={arXiv e-prints},\n",
" pages={arXiv--2204},\n",
" year={2022}\n",
"}\n",
"```\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "py35-paddle1.2.0"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册