diff --git a/contrib/HumanSeg/README.md b/contrib/HumanSeg/README.md
index c5ba095749e592bbb3d866935bfde4a904323862..52a57590dd6bce71c785cc10d37b74357d0f15ce 100644
--- a/contrib/HumanSeg/README.md
+++ b/contrib/HumanSeg/README.md
@@ -18,9 +18,9 @@ $ pip install -r requirements.txt
HumanSeg开放了在大规模人像数据上训练的三个预训练模型,满足多种使用场景的需求
| 模型类型 | Checkpoint | Inference Model | Quant Inference Model | 备注 |
| --- | --- | --- | --- | --- |
-| HumanSeg-server | [humanseg_server_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server.zip) | [humanseg_server_export](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_export.zip) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景 |
-| HumanSeg-mobile | [humanseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile.zip) | [humanseg_mobile_export](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_export.zip) | [humanseg_mobile_quant](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景 |
-| HumanSeg-lite | [humanseg_lite](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite.zip) | [humanseg_lite_export](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_export.zip) | [humanseg_lite_quant](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_quant.zip) | 超轻量级模型, 适用于手机自拍人像,且有移动端实时分割场景 |
+| HumanSeg-server | [humanseg_server_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_ckpt.zip) | [humanseg_server_inference](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景 |
+| HumanSeg-mobile | [humanseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_ckpt.zip) | [humanseg_mobile_inference](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景 |
+| HumanSeg-lite | [humanseg_lite_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_ckpt.zip) | [humanseg_lite_inference](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_inference.zip) | [humanseg_lite_quant](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_quant.zip) | 超轻量级模型, 适用于手机自拍人像,且有移动端实时分割场景 |
**NOTE:**
其中Checkpoint为模型权重,用于Fine-tuning场景。
diff --git a/contrib/HumanSeg/datasets/ade20k.py b/contrib/HumanSeg/datasets/ade20k.py
deleted file mode 100644
index ddd6876c2b5ba6f116b0c304cb3521e585be2034..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/datasets/ade20k.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/contrib/HumanSeg/datasets/cityscapes.py b/contrib/HumanSeg/datasets/cityscapes.py
deleted file mode 100644
index ddd6876c2b5ba6f116b0c304cb3521e585be2034..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/datasets/cityscapes.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/contrib/HumanSeg/datasets/pascalvoc.py b/contrib/HumanSeg/datasets/pascalvoc.py
deleted file mode 100644
index ddd6876c2b5ba6f116b0c304cb3521e585be2034..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/datasets/pascalvoc.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/contrib/HumanSeg/deploy/README.md b/contrib/HumanSeg/deploy/README.md
deleted file mode 100644
index b5baf00a62b53109d201b3263ae2acd03c0cec43..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/deploy/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# 人像分割预测部署
-
-本模型基于飞浆开源的人像分割模型,并做了大量的针对视频的光流追踪优化,提供了完整的支持视频流的人像分割解决方案,并提供了高性能的`Python`集成部署方案。
-
-
-## 模型下载
-
-支持的模型文件如下,请根据应用场景选择合适的模型:
-
-|模型文件 | 说明 |
-| --- | --- |
-|[humanseg_lite_quant]() | 小模型, 适合轻量级计算环境 |
-|[humanseg_lite]()| 小模型,适合轻量级计算环境 |
-|[humanseg_mobile_quant]() | 小模型, 适合轻量级计算环境 |
-|[humanseg_mobile]()| 小模型,适合轻量级计算环境 |
-|[humanseg_server_quant]() | 服务端GPU环境 |
-|[humanseg_server]() | 服务端GPU环境 |
-
-**注意:下载后解压到合适的路径,后续该路径将做为预测参数用于加载模型。**
-
-
-## 预测部署
-- [Python预测部署](./python)
-
-## 效果预览
-
-
-
-
-
diff --git a/contrib/HumanSeg/deploy/python/README.md b/contrib/HumanSeg/deploy/python/README.md
deleted file mode 100644
index e5aa6f2f6804cd006933abb5f163771ea03ac670..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/deploy/python/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# 人像分割Python预测部署方案
-
-本方案基于Python实现,最小化依赖并把所有模型加载、数据预处理、预测、光流处理等后处理都封装在文件`infer.py`中,用户可以直接使用或集成到自己项目中。
-
-
-## 前置依赖
-- Windows(7,8,10) / Linux (Ubuntu 16.04) or MacOS 10.1+
-- Paddle 1.7+
-- Python 3.6+
-
-注意:
-1. 仅支持Paddle 1.7以上版本
-2. MacOS上不支持GPU预测
-
-其它未涉及情形,能正常安装`Paddle` 和`OpenCV`通常都能正常使用。
-
-
-## 安装依赖
-
-执行如下命令
-
-```shell
-pip install -r requirements.txt
-```
-
-## 运行
-
-
-1. 输入图片进行分割
-```
-python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --img_path /PATH/TO/INPUT/IMAGE
-```
-
-预测结果会保存为`result.jpeg`。
-2. 输入视频进行分割
-```shell
-python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --video_path /PATH/TO/INPUT/VIDEO
-```
-
-预测结果会保存在`result.avi`。
-
-3. 使用摄像头视频流
-```shell
-python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --use_camera True
-```
-预测结果会通过可视化窗口实时显示。
-
-**注意:**
-
-
-`GPU`默认关闭, 如果要使用`GPU`进行加速,则先运行
-```
-export CUDA_VISIBLE_DEVICES=0
-```
-然后在前面的预测命令中增加参数`--use_gpu True`即可。
diff --git a/contrib/HumanSeg/deploy/python/infer.py b/contrib/HumanSeg/deploy/python/infer.py
deleted file mode 100644
index 74efbe1b170fcd47e2720050a426c066e80c8c89..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/deploy/python/infer.py
+++ /dev/null
@@ -1,342 +0,0 @@
-# coding: utf8
-# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""实时人像分割Python预测部署"""
-
-import os
-import argparse
-import numpy as np
-import cv2
-
-import paddle.fluid as fluid
-
-
-def humanseg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow):
- """计算光流跟踪匹配点和光流图
- 输入参数:
- pre_gray: 上一帧灰度图
- cur_gray: 当前帧灰度图
- prev_cfd: 上一帧光流图
- dl_weights: 融合权重图
- disflow: 光流数据结构
- 返回值:
- is_track: 光流点跟踪二值图,即是否具有光流点匹配
- track_cfd: 光流跟踪图
- """
- check_thres = 8
- hgt, wdh = pre_gray.shape[:2]
- track_cfd = np.zeros_like(prev_cfd)
- is_track = np.zeros_like(pre_gray)
- # 计算前向光流
- flow_fw = disflow.calc(pre_gray, cur_gray, None)
- # 计算后向光流
- flow_bw = disflow.calc(cur_gray, pre_gray, None)
- get_round = lambda data: (int)(data + 0.5) if data >= 0 else (int)(data -
- 0.5)
- for row in range(hgt):
- for col in range(wdh):
- # 计算光流处理后对应点坐标
- # (row, col) -> (cur_x, cur_y)
- fxy_fw = flow_fw[row, col]
- dx_fw = get_round(fxy_fw[0])
- cur_x = dx_fw + col
- dy_fw = get_round(fxy_fw[1])
- cur_y = dy_fw + row
- if cur_x < 0 or cur_x >= wdh or cur_y < 0 or cur_y >= hgt:
- continue
- fxy_bw = flow_bw[cur_y, cur_x]
- dx_bw = get_round(fxy_bw[0])
- dy_bw = get_round(fxy_bw[1])
- # 光流移动小于阈值
- lmt = ((dy_fw + dy_bw) * (dy_fw + dy_bw) +
- (dx_fw + dx_bw) * (dx_fw + dx_bw))
- if lmt >= check_thres:
- continue
- # 静止点降权
- if abs(dy_fw) <= 0 and abs(dx_fw) <= 0 and abs(dy_bw) <= 0 and abs(
- dx_bw) <= 0:
- dl_weights[cur_y, cur_x] = 0.05
- is_track[cur_y, cur_x] = 1
- track_cfd[cur_y, cur_x] = prev_cfd[row, col]
- return track_cfd, is_track, dl_weights
-
-
-def humanseg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track):
- """光流追踪图和人像分割结构融合
- 输入参数:
- track_cfd: 光流追踪图
- dl_cfd: 当前帧分割结果
- dl_weights: 融合权重图
- is_track: 光流点匹配二值图
- 返回值:
- cur_cfd: 光流跟踪图和人像分割结果融合图
- """
- cur_cfd = dl_cfd.copy()
- idxs = np.where(is_track > 0)
- for i in range(len(idxs)):
- x, y = idxs[0][i], idxs[1][i]
- dl_score = dl_cfd[y, x]
- track_score = track_cfd[y, x]
- if dl_score > 0.9 or dl_score < 0.1:
- if dl_weights[x, y] < 0.1:
- cur_cfd[x, y] = 0.3 * dl_score + 0.7 * track_score
- else:
- cur_cfd[x, y] = 0.4 * dl_score + 0.6 * track_score
- else:
- cur_cfd[x, y] = dl_weights[x, y] * dl_score + (
- 1 - dl_weights[x, y]) * track_score
- return cur_cfd
-
-
-def threshold_mask(img, thresh_bg, thresh_fg):
- """设置背景和前景阈值mask
- 输入参数:
- img : 原始图像, np.uint8 类型.
- thresh_bg : 背景阈值百分比,低于该值置为0.
- thresh_fg : 前景阈值百分比,超过该值置为1.
- 返回值:
- dst : 原始图像设置完前景背景阈值mask结果, np.float32 类型.
- """
- dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg)
- dst[np.where(dst > 1)] = 1
- dst[np.where(dst < 0)] = 0
- return dst.astype(np.float32)
-
-
-def optflow_handle(cur_gray, scoremap, is_init):
- """光流优化
- Args:
- cur_gray : 当前帧灰度图
- scoremap : 当前帧分割结果
- is_init : 是否第一帧
- Returns:
- dst : 光流追踪图和预测结果融合图, 类型为 np.float32
- """
- width, height = scoremap.shape[0], scoremap.shape[1]
- disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
- prev_gray = np.zeros((height, width), np.uint8)
- prev_cfd = np.zeros((height, width), np.float32)
- cur_cfd = scoremap.copy()
- if is_init:
- is_init = False
- if height <= 64 or width <= 64:
- disflow.setFinestScale(1)
- elif height <= 160 or width <= 160:
- disflow.setFinestScale(2)
- else:
- disflow.setFinestScale(3)
- fusion_cfd = cur_cfd
- else:
- weights = np.ones((width, height), np.float32) * 0.3
- track_cfd, is_track, weights = humanseg_tracking(
- prev_gray, cur_gray, prev_cfd, weights, disflow)
- fusion_cfd = humanseg_track_fuse(track_cfd, cur_cfd, weights, is_track)
- fusion_cfd = cv2.GaussianBlur(fusion_cfd, (3, 3), 0)
- return fusion_cfd
-
-
-class HumanSeg:
- """人像分割类
- 封装了人像分割模型的加载,数据预处理,预测,后处理等
- """
-
- def __init__(self, model_dir, mean, scale, eval_size, use_gpu=False):
-
- self.mean = np.array(mean).reshape((3, 1, 1))
- self.scale = np.array(scale).reshape((3, 1, 1))
- self.eval_size = eval_size
- self.load_model(model_dir, use_gpu)
-
- def load_model(self, model_dir, use_gpu):
- """加载模型并创建predictor
- Args:
- model_dir: 预测模型路径, 包含 `__model__` 和 `__params__`
- use_gpu: 是否使用GPU加速
- """
- prog_file = os.path.join(model_dir, '__model__')
- params_file = os.path.join(model_dir, '__params__')
- config = fluid.core.AnalysisConfig(prog_file, params_file)
- if use_gpu:
- config.enable_use_gpu(100, 0)
- config.switch_ir_optim(True)
- else:
- config.disable_gpu()
- config.disable_glog_info()
- config.switch_specify_input_names(True)
- config.enable_memory_optim()
- self.predictor = fluid.core.create_paddle_predictor(config)
-
- def preprocess(self, image):
- """图像预处理
- hwc_rgb 转换为 chw_bgr,并进行归一化
- 输入参数:
- image: 原始图像
- 返回值:
- 经过预处理后的图片结果
- """
- img_mat = cv2.resize(
- image, self.eval_size, interpolation=cv2.INTER_LINEAR)
- # HWC -> CHW
- img_mat = img_mat.swapaxes(1, 2)
- img_mat = img_mat.swapaxes(0, 1)
- # Convert to float
- img_mat = img_mat[:, :, :].astype('float32')
- img_mat = (img_mat / 255. - self.mean) / self.scale
- img_mat = img_mat[np.newaxis, :, :, :]
- return img_mat
-
- def postprocess(self, image, output_data):
- """对预测结果进行后处理
- Args:
- image: 原始图,opencv 图片对象
- output_data: Paddle预测结果原始数据
- Returns:
- 原图和预测结果融合并做了光流优化的结果图
- """
- scoremap = output_data[0, 1, :, :]
- scoremap = (scoremap * 255).astype(np.uint8)
- ori_h, ori_w = image.shape[0], image.shape[1]
- evl_h, evl_w = self.eval_size[0], self.eval_size[1]
- # 光流处理
- cur_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- cur_gray = cv2.resize(cur_gray, (evl_w, evl_h))
- optflow_map = optflow_handle(cur_gray, scoremap, False)
- optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
- optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8)
- optflow_map = cv2.resize(optflow_map, (ori_w, ori_h))
- optflow_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2)
- bg_im = np.ones_like(optflow_map) * 255
- comb = (optflow_map * image + (1 - optflow_map) * bg_im).astype(
- np.uint8)
- return comb
-
- def run_predict(self, image):
- """运行预测并返回可视化结果图
- 输入参数:
- image: 需要预测的原始图, opencv图片对象
- 返回值:
- 可视化的预测结果图
- """
- im_mat = self.preprocess(image)
- im_tensor = fluid.core.PaddleTensor(im_mat.copy().astype('float32'))
- output_data = self.predictor.run([im_tensor])[1]
- output_data = output_data.as_ndarray()
- return self.postprocess(image, output_data)
-
- def image_segment(self, path):
- """对图片文件进行分割
- 结果保存到`result.jpeg`文件中
- """
- img_mat = cv2.imread(path)
- img_mat = self.run_predict(img_mat)
- cv2.imwrite('result.jpeg', img_mat)
-
- def video_segment(self, path=None):
- """
- 对视屏流进行分割,
- path为None时默认打开摄像头。
- """
- if path is None:
- cap = cv2.VideoCapture(0)
- else:
- cap = cv2.VideoCapture(path)
- if not cap.isOpened():
- raise IOError("Error opening video stream or file")
- return
-
- if path is not None:
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- fps = cap.get(cv2.CAP_PROP_FPS)
- # 用于保存预测结果视频
- out = cv2.VideoWriter('result.avi',
- cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
- fps, (width, height))
- # 开始获取视频帧
- while cap.isOpened():
- ret, frame = cap.read()
- if ret:
- img_mat = self.run_predict(frame)
- out.write(img_mat)
- else:
- break
- cap.release()
- out.release()
-
- else:
- while cap.isOpened():
- ret, frame = cap.read()
- if ret:
- img_mat = self.run_predict(frame)
- cv2.imshow('HumanSegmentation', img_mat)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
- else:
- break
- cap.release()
-
-
-def main(args):
- """预测程序入口
- 完成模型加载, 对视频、摄像头、图片文件等预测过程
- """
- model_dir = args.model_dir
- use_gpu = args.use_gpu
-
- # 加载模型
- mean = [0.5, 0.5, 0.5]
- scale = [0.5, 0.5, 0.5]
- eval_size = (192, 192)
- model = HumanSeg(model_dir, mean, scale, eval_size, use_gpu)
- if args.use_camera:
- # 开启摄像头
- model.video_segment()
- elif args.video_path:
- # 使用视频文件作为输入
- model.video_segment(args.video_path)
- elif args.img_path:
- # 使用图片文件作为输入
- model.image_segment(args.img_path)
- else:
- raise ValueError(
- 'One of (--model_dir, --video_path, --use_camera) should be given.')
-
-
-def parse_args():
- """解析命令行参数
- """
- parser = argparse.ArgumentParser('Realtime Human Segmentation')
- parser.add_argument(
- '--model_dir',
- type=str,
- default='',
- help='path of human segmentation model')
- parser.add_argument(
- '--img_path', type=str, default='', help='path of input image')
- parser.add_argument(
- '--video_path', type=str, default='', help='path of input video')
- parser.add_argument(
- '--use_camera',
- type=bool,
- default=False,
- help='input video stream from camera')
- parser.add_argument(
- '--use_gpu', type=bool, default=False, help='enable gpu')
- return parser.parse_args()
-
-
-if __name__ == "__main__":
- args = parse_args()
- main(args)
diff --git a/contrib/HumanSeg/deploy/python/requirements.txt b/contrib/HumanSeg/deploy/python/requirements.txt
deleted file mode 100644
index 953dae0cf5e2036ad093907b30ac9a3a10858d27..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/deploy/python/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-opencv-python==4.1.2.30
-opencv-contrib-python==4.2.0.32
diff --git a/contrib/HumanSeg/nets/unet.py b/contrib/HumanSeg/nets/unet.py
deleted file mode 100644
index 681202a6dfbba575e0d05f819116580d5738256b..0000000000000000000000000000000000000000
--- a/contrib/HumanSeg/nets/unet.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from collections import OrderedDict
-import paddle.fluid as fluid
-
-from .libs import scope
-from .libs import bn_relu, conv, max_pool, deconv
-from .libs import sigmoid_to_softmax
-from .seg_modules import softmax_with_loss
-from .seg_modules import dice_loss, bce_loss
-
-
-class UNet(object):
- """实现Unet模型
- `"U-Net: Convolutional Networks for Biomedical Image Segmentation"
- `
-
- Args:
- num_classes (int): 类别数
- mode (str): 网络运行模式,根据mode构建网络的输入和返回。
- 当mode为'train'时,输入为image(-1, 3, -1, -1)和label (-1, 1, -1, -1) 返回loss。
- 当mode为'train'时,输入为image (-1, 3, -1, -1)和label (-1, 1, -1, -1),返回loss,
- pred (与网络输入label 相同大小的预测结果,值代表相应的类别),label,mask(非忽略值的mask,
- 与label相同大小,bool类型)。
- 当mode为'test'时,输入为image(-1, 3, -1, -1)返回pred (-1, 1, -1, -1)和
- logit (-1, num_classes, -1, -1) 通道维上代表每一类的概率值。
- upsample_mode (str): UNet decode时采用的上采样方式,取值为'bilinear'时利用双线行差值进行上菜样,
- 当输入其他选项时则利用反卷积进行上菜样,默认为'bilinear'。
- use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。
- use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
- 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。
- class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
- num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
- 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
- 即平时使用的交叉熵损失函数。
- ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。
-
- Raises:
- ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
- ValueError: class_weight为list, 但长度不等于num_class。
- class_weight为str, 但class_weight.low()不等于dynamic。
- TypeError: class_weight不为None时,其类型不是list或str。
- """
-
- def __init__(self,
- num_classes,
- mode='train',
- upsample_mode='bilinear',
- use_bce_loss=False,
- use_dice_loss=False,
- class_weight=None,
- ignore_index=255):
- # dice_loss或bce_loss只适用两类分割中
- if num_classes > 2 and (use_bce_loss or use_dice_loss):
- raise Exception(
- "dice loss and bce loss is only applicable to binary classfication"
- )
-
- if class_weight is not None:
- if isinstance(class_weight, list):
- if len(class_weight) != num_classes:
- raise ValueError(
- "Length of class_weight should be equal to number of classes"
- )
- elif isinstance(class_weight, str):
- if class_weight.lower() != 'dynamic':
- raise ValueError(
- "if class_weight is string, must be dynamic!")
- else:
- raise TypeError(
- 'Expect class_weight is a list or string but receive {}'.
- format(type(class_weight)))
- self.num_classes = num_classes
- self.mode = mode
- self.upsample_mode = upsample_mode
- self.use_bce_loss = use_bce_loss
- self.use_dice_loss = use_dice_loss
- self.class_weight = class_weight
- self.ignore_index = ignore_index
-
- def _double_conv(self, data, out_ch):
- param_attr = fluid.ParamAttr(
- name='weights',
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0),
- initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33))
- with scope("conv0"):
- data = bn_relu(
- conv(
- data, out_ch, 3, stride=1, padding=1,
- param_attr=param_attr))
- with scope("conv1"):
- data = bn_relu(
- conv(
- data, out_ch, 3, stride=1, padding=1,
- param_attr=param_attr))
- return data
-
- def _down(self, data, out_ch):
- # 下采样:max_pool + 2个卷积
- with scope("down"):
- data = max_pool(data, 2, 2, 0)
- data = self._double_conv(data, out_ch)
- return data
-
- def _up(self, data, short_cut, out_ch):
- # 上采样:data上采样(resize或deconv), 并与short_cut concat
- param_attr = fluid.ParamAttr(
- name='weights',
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0),
- initializer=fluid.initializer.XavierInitializer(),
- )
- with scope("up"):
- if self.upsample_mode == 'bilinear':
- short_cut_shape = fluid.layers.shape(short_cut)
- data = fluid.layers.resize_bilinear(data, short_cut_shape[2:])
- else:
- data = deconv(
- data,
- out_ch // 2,
- filter_size=2,
- stride=2,
- padding=0,
- param_attr=param_attr)
- data = fluid.layers.concat([data, short_cut], axis=1)
- data = self._double_conv(data, out_ch)
- return data
-
- def _encode(self, data):
- # 编码器设置
- short_cuts = []
- with scope("encode"):
- with scope("block1"):
- data = self._double_conv(data, 64)
- short_cuts.append(data)
- with scope("block2"):
- data = self._down(data, 128)
- short_cuts.append(data)
- with scope("block3"):
- data = self._down(data, 256)
- short_cuts.append(data)
- with scope("block4"):
- data = self._down(data, 512)
- short_cuts.append(data)
- with scope("block5"):
- data = self._down(data, 512)
- return data, short_cuts
-
- def _decode(self, data, short_cuts):
- # 解码器设置,与编码器对称
- with scope("decode"):
- with scope("decode1"):
- data = self._up(data, short_cuts[3], 256)
- with scope("decode2"):
- data = self._up(data, short_cuts[2], 128)
- with scope("decode3"):
- data = self._up(data, short_cuts[1], 64)
- with scope("decode4"):
- data = self._up(data, short_cuts[0], 64)
- return data
-
- def _get_logit(self, data, num_classes):
- # 根据类别数设置最后一个卷积层输出
- param_attr = fluid.ParamAttr(
- name='weights',
- regularizer=fluid.regularizer.L2DecayRegularizer(
- regularization_coeff=0.0),
- initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
- with scope("logit"):
- data = conv(
- data,
- num_classes,
- 3,
- stride=1,
- padding=1,
- param_attr=param_attr)
- return data
-
- def _get_loss(self, logit, label, mask):
- avg_loss = 0
- if not (self.use_dice_loss or self.use_bce_loss):
- avg_loss += softmax_with_loss(
- logit,
- label,
- mask,
- num_classes=self.num_classes,
- weight=self.class_weight,
- ignore_index=self.ignore_index)
- else:
- if self.use_dice_loss:
- avg_loss += dice_loss(logit, label, mask)
- if self.use_bce_loss:
- avg_loss += bce_loss(
- logit, label, mask, ignore_index=self.ignore_index)
-
- return avg_loss
-
- def generate_inputs(self):
- inputs = OrderedDict()
- inputs['image'] = fluid.data(
- dtype='float32', shape=[None, 3, None, None], name='image')
- if self.mode == 'train':
- inputs['label'] = fluid.data(
- dtype='int32', shape=[None, 1, None, None], name='label')
- elif self.mode == 'eval':
- inputs['label'] = fluid.data(
- dtype='int32', shape=[None, 1, None, None], name='label')
- return inputs
-
- def build_net(self, inputs):
- # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1
- if self.use_dice_loss or self.use_bce_loss:
- self.num_classes = 1
-
- image = inputs['image']
- encode_data, short_cuts = self._encode(image)
- decode_data = self._decode(encode_data, short_cuts)
- logit = self._get_logit(decode_data, self.num_classes)
-
- if self.num_classes == 1:
- out = sigmoid_to_softmax(logit)
- out = fluid.layers.transpose(out, [0, 2, 3, 1])
- else:
- out = fluid.layers.transpose(logit, [0, 2, 3, 1])
-
- pred = fluid.layers.argmax(out, axis=3)
- pred = fluid.layers.unsqueeze(pred, axes=[3])
-
- if self.mode == 'train':
- label = inputs['label']
- mask = label != self.ignore_index
- return self._get_loss(logit, label, mask)
-
- elif self.mode == 'eval':
- label = inputs['label']
- mask = label != self.ignore_index
- loss = self._get_loss(logit, label, mask)
- return loss, pred, label, mask
- else:
- if self.num_classes == 1:
- logit = sigmoid_to_softmax(logit)
- else:
- logit = fluid.layers.softmax(logit, axis=1)
- return pred, logit
diff --git a/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py b/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py
index 9947bbdeaa57d1200514f5659194a0e37deb5a6a..e573df05f94f5a612ef6c2f5a2eb2c9cd55cc2f1 100644
--- a/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py
+++ b/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py
@@ -22,20 +22,20 @@ sys.path.append(TEST_PATH)
from test_utils import download_file_and_uncompress
model_urls = {
- "humanseg_server":
- "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server.zip",
- "humanseg_server_export":
- "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_export.zip",
- "humanseg_mobile":
- "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile.zip",
- "humanseg_mobile_export":
- "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_export.zip",
+ "humanseg_server_ckpt":
+ "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_ckpt.zip",
+ "humanseg_server_inference":
+ "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip",
+ "humanseg_mobile_ckpt":
+ "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_ckpt.zip",
+ "humanseg_mobile_inference":
+ "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip",
"humanseg_mobile_quant":
"https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip",
- "humanseg_lite":
- "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite.zip",
- "humanseg_lite_export":
- "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_export.zip",
+ "humanseg_lite_ckpt":
+ "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_ckpt.zip",
+ "humanseg_lite_inference":
+ "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_inference.zip",
"humanseg_lite_quant":
"https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_quant.zip",
}