提交 b9c9ed27 编写于 作者: W wuzewu

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleSeg into develop

...@@ -89,12 +89,13 @@ pip install -r requirements.txt ...@@ -89,12 +89,13 @@ pip install -r requirements.txt
* [数据和配置校验](./docs/check.md) * [数据和配置校验](./docs/check.md)
* [分割模型介绍](./docs/models.md) * [分割模型介绍](./docs/models.md)
* [预训练模型下载](./docs/model_zoo.md) * [预训练模型下载](./docs/model_zoo.md)
* [DeepLabv3+模型使用教程](./turtorial/finetune_deeplabv3plus.md) * [DeepLabv3+模型使用教程](./tutorial/finetune_deeplabv3plus.md)
* [U-Net模型使用教程](./turtorial/finetune_unet.md) * [U-Net模型使用教程](./tutorial/finetune_unet.md)
* [ICNet模型使用教程](./turtorial/finetune_icnet.md) * [ICNet模型使用教程](./tutorial/finetune_icnet.md)
* [PSPNet模型使用教程](./turtorial/finetune_pspnet.md) * [PSPNet模型使用教程](./tutorial/finetune_pspnet.md)
* [HRNet模型使用教程](./turtorial/finetune_hrnet.md) * [HRNet模型使用教程](./tutorial/finetune_hrnet.md)
* [Fast-SCNN模型使用教程](./turtorial/finetune_fast_scnn.md) * [Fast-SCNN模型使用教程](./tutorial/finetune_fast_scnn.md)
* [OCRNet模型使用教程](./tutorial/finetune_ocrnet.md)
### 预测部署 ### 预测部署
......
EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling
TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling
AUG:
AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding
INF_RESIZE_VALUE: 500 # for rangescaling
MAX_RESIZE_VALUE: 600 # for rangescaling
MIN_RESIZE_VALUE: 400 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
TO_RGB: True
BATCH_SIZE: 16
DATASET:
DATA_DIR: "./dataset/cityscapes/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 19
TEST_FILE_LIST: "dataset/cityscapes/val.list"
TRAIN_FILE_LIST: "dataset/cityscapes/train.list"
VAL_FILE_LIST: "dataset/cityscapes/val.list"
IGNORE_INDEX: 255
SEPARATOR: " "
FREEZE:
MODEL_FILENAME: "model"
PARAMS_FILENAME: "params"
MODEL:
DEFAULT_NORM_TYPE: "bn"
MODEL_NAME: "deeplabv3p"
DEEPLAB:
ASPP_WITH_SEP_CONV: True
DECODER_USE_SEP_CONV: True
BACKBONE: "resnet_vd_50"
BACKBONE_LR_MULT_LIST: [0.1, 0.1, 0.2, 0.2, 1.0]
TRAIN:
PRETRAINED_MODEL_DIR: u"pretrained_model/resnet50_vd_imagenet"
MODEL_SAVE_DIR: "saved_model/deeplabv3p_resnet50_vd_bn_cityscapes"
SNAPSHOT_EPOCH: 10
SYNC_BATCH_NORM: True
TEST:
TEST_MODEL: "saved_model/deeplabv3p_resnet50_vd_bn_cityscapes/final"
SOLVER:
LR: 0.05
LR_POLICY: "poly"
OPTIMIZER: "sgd"
NUM_EPOCHS: 700
EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling
TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling
AUG:
# AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling
AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
FIX_RESIZE_SIZE: (1024, 512) # (width, height), for unpadding
INF_RESIZE_VALUE: 500 # for rangescaling
MAX_RESIZE_VALUE: 600 # for rangescaling
MIN_RESIZE_VALUE: 400 # for rangescaling
MAX_SCALE_FACTOR: 2.0 # for stepscaling
MIN_SCALE_FACTOR: 0.5 # for stepscaling
SCALE_STEP_SIZE: 0.25 # for stepscaling
MIRROR: True
BATCH_SIZE: 4
#BATCH_SIZE: 4
DATASET:
DATA_DIR: "./dataset/cityscapes/"
IMAGE_TYPE: "rgb" # choice rgb or rgba
NUM_CLASSES: 19
TEST_FILE_LIST: "./dataset/cityscapes/val.list"
TRAIN_FILE_LIST: "./dataset/cityscapes/train.list"
VAL_FILE_LIST: "./dataset/cityscapes/val.list"
VIS_FILE_LIST: "./dataset/cityscapes/val.list"
IGNORE_INDEX: 255
SEPARATOR: " "
FREEZE:
MODEL_FILENAME: "model"
PARAMS_FILENAME: "params"
MODEL:
MODEL_NAME: "ocrnet"
DEFAULT_NORM_TYPE: "bn"
HRNET:
STAGE2:
NUM_CHANNELS: [18, 36]
STAGE3:
NUM_CHANNELS: [18, 36, 72]
STAGE4:
NUM_CHANNELS: [18, 36, 72, 144]
OCR:
OCR_MID_CHANNELS: 512
OCR_KEY_CHANNELS: 256
MULTI_LOSS_WEIGHT: [1.0, 1.0]
TRAIN:
PRETRAINED_MODEL_DIR: u"./pretrained_model/ocrnet_w18_cityscape/best_model"
MODEL_SAVE_DIR: "output/ocrnet_w18_bn_cityscapes"
SNAPSHOT_EPOCH: 1
SYNC_BATCH_NORM: True
TEST:
TEST_MODEL: "output/ocrnet_w18_bn_cityscapes/first"
SOLVER:
LR: 0.01
LR_POLICY: "poly"
OPTIMIZER: "sgd"
NUM_EPOCHS: 500
...@@ -37,8 +37,6 @@ ACE2P模型包含三个分支: ...@@ -37,8 +37,6 @@ ACE2P模型包含三个分支:
![](imgs/result.jpg) ![](imgs/result.jpg)
![](ACE2P/imgs/result.jpg)
人体解析(Human Parsing)是细粒度的语义分割任务,旨在识别像素级别的人类图像的组成部分(例如,身体部位和服装)。本章节使用冠军模型Augmented Context Embedding with Edge Perceiving (ACE2P)进行预测分割。 人体解析(Human Parsing)是细粒度的语义分割任务,旨在识别像素级别的人类图像的组成部分(例如,身体部位和服装)。本章节使用冠军模型Augmented Context Embedding with Edge Perceiving (ACE2P)进行预测分割。
## 代码使用说明 ## 代码使用说明
...@@ -79,11 +77,11 @@ python -u infer.py --example ACE2P ...@@ -79,11 +77,11 @@ python -u infer.py --example ACE2P
原图: 原图:
![](ACE2P/imgs/117676_2149260.jpg) ![](imgs/117676_2149260.jpg)
预测结果: 预测结果:
![](ACE2P/imgs/117676_2149260.png) ![](imgs/117676_2149260.png)
### 备注 ### 备注
......
...@@ -27,6 +27,7 @@ import cv2 ...@@ -27,6 +27,7 @@ import cv2
import yaml import yaml
import shutil import shutil
import paddleslim as slim import paddleslim as slim
import paddle
import utils import utils
import utils.logging as logging import utils.logging as logging
...@@ -37,6 +38,15 @@ from nets import DeepLabv3p, ShuffleSeg, HRNet ...@@ -37,6 +38,15 @@ from nets import DeepLabv3p, ShuffleSeg, HRNet
import transforms as T import transforms as T
def save_infer_program(test_program, ckpt_dir):
_test_program = test_program.clone()
_test_program.desc.flush()
_test_program.desc._set_version()
paddle.fluid.core.save_op_compatible_info(_test_program.desc)
with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f:
f.write(_test_program.desc.serialize_to_string())
def dict2str(dict_input): def dict2str(dict_input):
out = '' out = ''
for k, v in dict_input.items(): for k, v in dict_input.items():
...@@ -244,6 +254,7 @@ class SegModel(object): ...@@ -244,6 +254,7 @@ class SegModel(object):
if self.status == 'Normal': if self.status == 'Normal':
fluid.save(self.train_prog, osp.join(save_dir, 'model')) fluid.save(self.train_prog, osp.join(save_dir, 'model'))
save_infer_program(self.test_prog, save_dir)
model_info['status'] = 'Normal' model_info['status'] = 'Normal'
elif self.status == 'Quant': elif self.status == 'Quant':
fluid.save(self.test_prog, osp.join(save_dir, 'model')) fluid.save(self.test_prog, osp.join(save_dir, 'model'))
......
...@@ -21,5 +21,3 @@ import readers ...@@ -21,5 +21,3 @@ import readers
from utils.utils import get_environ_info from utils.utils import get_environ_info
env_info = get_environ_info() env_info = get_environ_info()
log_level = 2
...@@ -30,6 +30,16 @@ from utils.utils import seconds_to_hms, get_environ_info ...@@ -30,6 +30,16 @@ from utils.utils import seconds_to_hms, get_environ_info
from utils.metrics import ConfusionMatrix from utils.metrics import ConfusionMatrix
import transforms.transforms as T import transforms.transforms as T
import utils import utils
import paddle
def save_infer_program(test_program, ckpt_dir):
_test_program = test_program.clone()
_test_program.desc.flush()
_test_program.desc._set_version()
paddle.fluid.core.save_op_compatible_info(_test_program.desc)
with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f:
f.write(_test_program.desc.serialize_to_string())
def dict2str(dict_input): def dict2str(dict_input):
...@@ -238,6 +248,7 @@ class BaseModel(object): ...@@ -238,6 +248,7 @@ class BaseModel(object):
if self.status == 'Normal': if self.status == 'Normal':
fluid.save(self.train_prog, osp.join(save_dir, 'model')) fluid.save(self.train_prog, osp.join(save_dir, 'model'))
save_infer_program(self.test_prog, save_dir)
model_info['status'] = self.status model_info['status'] = self.status
with open( with open(
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import time import time
import os import os
import sys import sys
import __init__
levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'} levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'}
...@@ -25,7 +24,6 @@ def log(level=2, message=""): ...@@ -25,7 +24,6 @@ def log(level=2, message=""):
current_time = time.time() current_time = time.time()
time_array = time.localtime(current_time) time_array = time.localtime(current_time)
current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
if __init__.log_level >= level:
print("{} [{}]\t{}".format(current_time, levels[level], print("{} [{}]\t{}".format(current_time, levels[level],
message).encode("utf-8").decode("latin1")) message).encode("utf-8").decode("latin1"))
sys.stdout.flush() sys.stdout.flush()
......
# SpatialEmbeddings
## 模型概述
本模型是基于proposal-free的实例分割模型,快速实时,同时准确率高,适用于自动驾驶等实时场景。
本模型基于KITTI中MOTS数据集训练得到,是论文 Segment as Points for Efficient Online Multi-Object Tracking and Segmentation中的分割部分
[论文地址](https://arxiv.org/pdf/2007.01550.pdf)
## KITTI MOTS指标
KITTI MOTS验证集AP:0.76, AP_50%:0.915
## 代码使用说明
### 1. 模型下载
执行以下命令下载并解压SpatialEmbeddings预测模型:
```
python download_SpatialEmbeddings_kitti.py
```
或点击[链接](https://paddleseg.bj.bcebos.com/models/SpatialEmbeddings_kitti.tar)进行手动下载并解压。
### 2. 数据下载
前往KITTI官网下载MOTS比赛数据[链接](https://www.vision.rwth-aachen.de/page/mots)
下载后解压到./data文件夹下, 并生成验证集图片路径的test.txt
### 3. 快速预测
使用GPU预测
```
python -u infer.py --use_gpu
```
使用CPU预测:
```
python -u infer.py
```
数据及模型路径等详细配置见config.py文件
#### 4. 预测结果示例:
原图:
![](imgs/kitti_0007_000518_ori.png)
预测结果:
![](imgs/kitti_0007_000518_pred.png)
## 引用
**论文**
*Instance Segmentation by Jointly Optimizing Spatial Embeddings and Clustering Bandwidth*
**代码**
https://github.com/davyneven/SpatialEmbeddings
# -*- coding: utf-8 -*-
from utils.util import AttrDict, merge_cfg_from_args, get_arguments
import os
args = get_arguments()
cfg = AttrDict()
# 待预测图像所在路径
cfg.data_dir = "data"
# 待预测图像名称列表
cfg.data_list_file = os.path.join("data", "test.txt")
# 模型加载路径
cfg.model_path = 'SpatialEmbeddings_kitti'
# 预测结果保存路径
cfg.vis_dir = "result"
# 待预测图像输入尺寸
cfg.input_size = (384, 1248)
# sigma值
cfg.n_sigma = 2
# 中心点阈值
cfg.threshold = 0.94
# 点集数阈值
cfg.min_pixel = 160
merge_cfg_from_args(args, cfg)
kitti/0007/kitti_0007_000512.png
kitti/0007/kitti_0007_000518.png
# coding: utf8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TEST_PATH = os.path.join(LOCAL_PATH, "..", "..", "test")
sys.path.append(TEST_PATH)
from test_utils import download_file_and_uncompress
if __name__ == "__main__":
download_file_and_uncompress(
url='https://paddleseg.bj.bcebos.com/models/SpatialEmbeddings_kitti.tar',
savepath=LOCAL_PATH,
extrapath=LOCAL_PATH,
extraname='SpatialEmbeddings_kitti')
print("Pretrained Model download success!")
# -*- coding: utf-8 -*-
import os
import numpy as np
from utils.util import get_arguments
from utils.palette import get_palette
from utils.data_util import Cluster, pad_img
from PIL import Image as PILImage
import importlib
import paddle.fluid as fluid
from models import SpatialEmbeddings
args = get_arguments()
config = importlib.import_module('config')
cfg = getattr(config, 'cfg')
cluster = Cluster()
# 预测数据集类
class TestDataSet():
def __init__(self):
self.data_dir = cfg.data_dir
self.data_list_file = cfg.data_list_file
self.data_list = self.get_data_list()
self.data_num = len(self.data_list)
def get_data_list(self):
# 获取预测图像路径列表
data_list = []
data_file_handler = open(self.data_list_file, 'r')
for line in data_file_handler:
img_name = line.strip()
name_prefix = img_name.split('.')[0]
if len(img_name.split('.')) == 1:
img_name = img_name + '.jpg'
img_path = os.path.join(self.data_dir, img_name)
data_list.append(img_path)
return data_list
def preprocess(self, img):
# 图像预处理
h, w = img.shape[:2]
h_new, w_new = cfg.input_size
img = np.pad(img, ((0, h_new - h), (0, w_new - w), (0, 0)), 'edge')
img = img.astype(np.float32)/255.0
img = img.transpose((2, 0, 1))
img = np.expand_dims(img, axis=0)
return img
def get_data(self, index):
# 获取图像信息
img_path = self.data_list[index]
img = np.array(PILImage.open(img_path))
if img is None:
return img, img,img_path, None
img_name = img_path.split(os.sep)[-1]
name_prefix = img_name.replace('.'+img_name.split('.')[-1],'')
img_shape = img.shape[:2]
img_process = self.preprocess(img)
return img_process, name_prefix, img_shape
def get_model(main_prog, startup_prog):
img_shape = [3, cfg.input_size[0], cfg.input_size[1]]
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
input = fluid.layers.data(name='image', shape=img_shape, dtype='float32')
output = SpatialEmbeddings(input)
return input, output
def infer():
if not os.path.exists(cfg.vis_dir):
os.makedirs(cfg.vis_dir)
startup_prog = fluid.Program()
test_prog = fluid.Program()
input, output = get_model(test_prog, startup_prog)
test_prog = test_prog.clone(for_test=True)
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
# 加载预测模型
def if_exist(var):
return os.path.exists(os.path.join(cfg.model_path, var.name))
fluid.io.load_vars(exe, cfg.model_path, main_program=test_prog, predicate=if_exist)
#加载预测数据集
test_dataset = TestDataSet()
data_num = test_dataset.data_num
for idx in range(data_num):
# 数据获取
image, im_name, im_shape = test_dataset.get_data(idx)
if image is None:
print(im_name, 'is None')
continue
# 预测
outputs = exe.run(program=test_prog, feed={'image': image}, fetch_list=output)
instance_map, predictions = cluster.cluster(outputs[0][0], n_sigma=cfg.n_sigma, \
min_pixel=cfg.min_pixel, threshold=cfg.threshold)
# 预测结果保存
instance_map = pad_img(instance_map, image.shape[2:])
instance_map = instance_map[:im_shape[0], :im_shape[1]]
output_im = PILImage.fromarray(np.asarray(instance_map, dtype=np.uint8))
palette = get_palette(len(predictions) + 1)
output_im.putpalette(palette)
result_path = os.path.join(cfg.vis_dir, im_name+'.png')
output_im.save(result_path)
if (idx + 1) % 100 == 0:
print('%d processd' % (idx + 1))
print('%d processd done' % (idx + 1))
return 0
if __name__ == "__main__":
infer()
from paddle.fluid.initializer import Constant
from paddle.fluid.param_attr import ParamAttr
import paddle.fluid as fluid
def conv(input,
num_filters,
filter_size=None,
stride=1,
padding=0,
dilation=1,
act=None,
name='conv'):
return fluid.layers.conv2d(input,
filter_size=filter_size,
num_filters=num_filters,
stride=stride,
padding=padding,
dilation=dilation,
act=act,
name=name,
param_attr=name+'_weights',
bias_attr=name+'_bias')
def conv_transpose(input,
num_filters,
output_size=None,
filter_size=None,
stride=1,
padding=0,
act=None,
name='conv_transpose'):
return fluid.layers.conv2d_transpose(input,
filter_size=filter_size,
num_filters=num_filters,
stride=stride,
padding=padding,
act=act,
name=name,
param_attr=name+'_weights',
bias_attr=name+'_bias')
EPSILON=0.0010000000474974513
def bn(input, name):
bn_id = name.replace('batch_norm','')
return fluid.layers.batch_norm(input,
is_test=True,
epsilon=EPSILON,
param_attr='bn_scale'+bn_id + '_scale',
bias_attr='bn_scale'+bn_id+'_offset',
moving_mean_name=name + '_mean',
moving_variance_name=name + '_variance',
name=name)
def max_pool(input, pool_size=2, pool_stride=2, name=None):
return fluid.layers.pool2d(input,
pool_size=pool_size,
pool_stride=pool_stride,
ceil_mode=True,
pool_type='max',
exclusive=False,
name=name)
def SpatialEmbeddings(input):
conv1 = conv(input, filter_size=3, num_filters=13, stride=2, padding=1, name='conv1')
max_pool1 = fluid.layers.pool2d(input, pool_size=2, pool_stride=2, name='max_pool1')
cat1 = fluid.layers.concat([conv1, max_pool1], axis=1, name='cat1')
bn_scale1 = bn(cat1, name='batch_norm1')
relu1 = fluid.layers.relu(bn_scale1)
conv2 = conv(relu1, filter_size=3, num_filters=48, stride=2, padding=1, name='conv2')
max_pool2 = fluid.layers.pool2d(relu1, pool_size=2, pool_stride=2, name='max_pool2')
cat2 = fluid.layers.concat([conv2, max_pool2], axis=1, name='cat2')
bn_scale2 = bn(cat2, name='batch_norm2')
relu2 = fluid.layers.relu(bn_scale2)
relu3 = conv(relu2, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv3', act='relu')
conv4 = conv(relu3, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv4')
bn_scale3 = bn(conv4, name='batch_norm3')
relu4 = fluid.layers.relu(bn_scale3)
relu5 = conv(relu4, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv5', act='relu')
conv6 = conv(relu5, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv6')
bn_scale4 = bn(conv6, name='batch_norm4')
add1 = fluid.layers.elementwise_add(x=bn_scale4, y=relu2, name='add1')
relu6 = fluid.layers.relu(add1)
relu7 = conv(relu6, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv7', act='relu')
conv8 = conv(relu7, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv8')
bn_scale5 = bn(conv8, name='batch_norm5')
relu8 = fluid.layers.relu(bn_scale5)
relu9 = conv(relu8, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv9', act='relu')
conv10 = conv(relu9, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv10')
bn_scale6 = bn(conv10, name='batch_norm6')
add2 = fluid.layers.elementwise_add(x=bn_scale6, y=relu6, name='add2')
relu10 = fluid.layers.relu(add2)
relu11 = conv(relu10, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv11', act='relu')
conv12 = conv(relu11, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv12')
bn_scale7 = bn(conv12, name='batch_norm7')
relu12 = fluid.layers.relu(bn_scale7)
relu13 = conv(relu12, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv13', act='relu')
conv14 = conv(relu13, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv14')
bn_scale8 = bn(conv14, name='batch_norm8')
add3 = fluid.layers.elementwise_add(x=bn_scale8, y=relu10, name='add3')
relu14 = fluid.layers.relu(add3)
relu15 = conv(relu14, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv15', act='relu')
conv16 = conv(relu15, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv16')
bn_scale9 = bn(conv16, name='batch_norm9')
relu16 = fluid.layers.relu(bn_scale9)
relu17 = conv(relu16, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv17', act='relu')
conv18 = conv(relu17, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv18')
bn_scale10 = bn(conv18, name='batch_norm10')
add4 = fluid.layers.elementwise_add(x=bn_scale10, y=relu14, name='add4')
relu18 = fluid.layers.relu(add4)
relu19 = conv(relu18, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv19', act='relu')
conv20 = conv(relu19, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv20')
bn_scale11 = bn(conv20, name='batch_norm11')
relu20 = fluid.layers.relu(bn_scale11)
relu21 = conv(relu20, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv21', act='relu')
conv22 = conv(relu21, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv22')
bn_scale12 = bn(conv22, name='batch_norm12')
add5 = fluid.layers.elementwise_add(x=bn_scale12, y=relu18, name='add5')
relu22 = fluid.layers.relu(add5)
conv23 = conv(relu22, filter_size=3, num_filters=64, stride=2, padding=1, name='conv23')
max_pool3 = fluid.layers.pool2d(relu22, pool_size=2, pool_stride=2, name='max_pool3')
cat3 = fluid.layers.concat([conv23, max_pool3], axis=1, name='cat3')
bn_scale13 = bn(cat3, name='batch_norm13')
relu23 = fluid.layers.relu(bn_scale13)
relu24 = conv(relu23, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv24', act='relu')
conv25 = conv(relu24, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv25')
bn_scale14 = bn(conv25, name='batch_norm14')
relu25 = fluid.layers.relu(bn_scale14)
relu26 = conv(relu25, filter_size=[3, 1], num_filters=128, padding=[2, 0], dilation=[2, 1], name='conv26', act='relu')
conv27 = conv(relu26, filter_size=[1, 3], num_filters=128, padding=[0, 2], dilation=[1, 2], name='conv27')
bn_scale15 = bn(conv27, name='batch_norm15')
add6 = fluid.layers.elementwise_add(x=bn_scale15, y=relu23, name='add6')
relu27 = fluid.layers.relu(add6)
relu28 = conv(relu27, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv28', act='relu')
conv29 = conv(relu28, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv29')
bn_scale16 = bn(conv29, name='batch_norm16')
relu29 = fluid.layers.relu(bn_scale16)
relu30 = conv(relu29, filter_size=[3, 1], num_filters=128, padding=[4, 0], dilation=[4, 1], name='conv30', act='relu')
conv31 = conv(relu30, filter_size=[1, 3], num_filters=128, padding=[0, 4], dilation=[1, 4], name='conv31')
bn_scale17 = bn(conv31, name='batch_norm17')
add7 = fluid.layers.elementwise_add(x=bn_scale17, y=relu27, name='add7')
relu31 = fluid.layers.relu(add7)
relu32 = conv(relu31, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv32', act='relu')
conv33 = conv(relu32, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv33')
bn_scale18 = bn(conv33, name='batch_norm18')
relu33 = fluid.layers.relu(bn_scale18)
relu34 = conv(relu33, filter_size=[3, 1], num_filters=128, padding=[8, 0], dilation=[8, 1], name='conv34', act='relu')
conv35 = conv(relu34, filter_size=[1, 3], num_filters=128, padding=[0, 8], dilation=[1, 8], name='conv35')
bn_scale19 = bn(conv35, name='batch_norm19')
add8 = fluid.layers.elementwise_add(x=bn_scale19, y=relu31, name='add8')
relu35 = fluid.layers.relu(add8)
relu36 = conv(relu35, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv36', act='relu')
conv37 = conv(relu36, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv37')
bn_scale20 = bn(conv37, name='batch_norm20')
relu37 = fluid.layers.relu(bn_scale20)
relu38 = conv(relu37, filter_size=[3, 1], num_filters=128, padding=[16, 0], dilation=[16, 1], name='conv38', act='relu')
conv39 = conv(relu38, filter_size=[1, 3], num_filters=128, padding=[0, 16], dilation=[1, 16], name='conv39')
bn_scale21 = bn(conv39, name='batch_norm21')
add9 = fluid.layers.elementwise_add(x=bn_scale21, y=relu35, name='add9')
relu39 = fluid.layers.relu(add9)
relu40 = conv(relu39, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv40', act='relu')
conv41 = conv(relu40, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv41')
bn_scale22 = bn(conv41, name='batch_norm22')
relu41 = fluid.layers.relu(bn_scale22)
relu42 = conv(relu41, filter_size=[3, 1], num_filters=128, padding=[2, 0], dilation=[2, 1], name='conv42', act='relu')
conv43 = conv(relu42, filter_size=[1, 3], num_filters=128, padding=[0, 2], dilation=[1, 2], name='conv43')
bn_scale23 = bn(conv43, name='batch_norm23')
add10 = fluid.layers.elementwise_add(x=bn_scale23, y=relu39, name='add10')
relu43 = fluid.layers.relu(add10)
relu44 = conv(relu43, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv44', act='relu')
conv45 = conv(relu44, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv45')
bn_scale24 = bn(conv45, name='batch_norm24')
relu45 = fluid.layers.relu(bn_scale24)
relu46 = conv(relu45, filter_size=[3, 1], num_filters=128, padding=[4, 0], dilation=[4, 1], name='conv46', act='relu')
conv47 = conv(relu46, filter_size=[1, 3], num_filters=128, padding=[0, 4], dilation=[1, 4], name='conv47')
bn_scale25 = bn(conv47, name='batch_norm25')
add11 = fluid.layers.elementwise_add(x=bn_scale25, y=relu43, name='add11')
relu47 = fluid.layers.relu(add11)
relu48 = conv(relu47, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv48', act='relu')
conv49 = conv(relu48, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv49')
bn_scale26 = bn(conv49, name='batch_norm26')
relu49 = fluid.layers.relu(bn_scale26)
relu50 = conv(relu49, filter_size=[3, 1], num_filters=128, padding=[8, 0], dilation=[8, 1], name='conv50', act='relu')
conv51 = conv(relu50, filter_size=[1, 3], num_filters=128, padding=[0, 8], dilation=[1, 8], name='conv51')
bn_scale27 = bn(conv51, name='batch_norm27')
add12 = fluid.layers.elementwise_add(x=bn_scale27, y=relu47, name='add12')
relu51 = fluid.layers.relu(add12)
relu52 = conv(relu51, filter_size=[3, 1], num_filters=128, padding=[1, 0], name='conv52', act='relu')
conv53 = conv(relu52, filter_size=[1, 3], num_filters=128, padding=[0, 1], name='conv53')
bn_scale28 = bn(conv53, name='batch_norm28')
relu53 = fluid.layers.relu(bn_scale28)
relu54 = conv(relu53, filter_size=[3, 1], num_filters=128, padding=[16, 0], dilation=[16, 1], name='conv54', act='relu')
conv55 = conv(relu54, filter_size=[1, 3], num_filters=128, padding=[0, 16], dilation=[1, 16], name='conv55')
bn_scale29 = bn(conv55, name='batch_norm29')
add13 = fluid.layers.elementwise_add(x=bn_scale29, y=relu51, name='add13')
relu55 = fluid.layers.relu(add13)
conv_transpose1 = conv_transpose(relu55, filter_size=3, num_filters=64, stride=2, padding=1, name='conv_transpose1')
conv_transpose4 = conv_transpose(relu55, filter_size=3, num_filters=64, stride=2, padding=1, name='conv_transpose4')
bn_scale30 = bn(conv_transpose1, name='batch_norm30')
bn_scale40 = bn(conv_transpose4, name='batch_norm40')
relu56 = fluid.layers.relu(bn_scale30)
relu74 = fluid.layers.relu(bn_scale40)
relu57 = conv(relu56, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv56', act='relu')
relu75 = conv(relu74, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv72', act='relu')
conv57 = conv(relu57, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv57')
conv73 = conv(relu75, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv73')
bn_scale31 = bn(conv57, name='batch_norm31')
bn_scale41 = bn(conv73, name='batch_norm41')
relu58 = fluid.layers.relu(bn_scale31)
relu76 = fluid.layers.relu(bn_scale41)
relu59 = conv(relu58, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv58', act='relu')
relu77 = conv(relu76, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv74', act='relu')
conv59 = conv(relu59, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv59')
conv75 = conv(relu77, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv75')
bn_scale32 = bn(conv59, name='batch_norm32')
bn_scale42 = bn(conv75, name='batch_norm42')
add14 = fluid.layers.elementwise_add(x=bn_scale32, y=relu56, name='add14')
add18 = fluid.layers.elementwise_add(x=bn_scale42, y=relu74, name='add18')
relu60 = fluid.layers.relu(add14)
relu78 = fluid.layers.relu(add18)
relu61 = conv(relu60, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv60', act='relu')
relu79 = conv(relu78, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv76', act='relu')
conv61 = conv(relu61, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv61')
conv77 = conv(relu79, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv77')
bn_scale33 = bn(conv61, name='batch_norm33')
bn_scale43 = bn(conv77, name='batch_norm43')
relu62 = fluid.layers.relu(bn_scale33)
relu80 = fluid.layers.relu(bn_scale43)
relu63 = conv(relu62, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv62', act='relu')
relu81 = conv(relu80, filter_size=[3, 1], num_filters=64, padding=[1, 0], name='conv78', act='relu')
conv63 = conv(relu63, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv63')
conv79 = conv(relu81, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv79')
bn_scale34 = bn(conv63, name='batch_norm34')
bn_scale44 = bn(conv79, name='batch_norm44')
add15 = fluid.layers.elementwise_add(x=bn_scale34, y=relu60, name='add15')
add19 = fluid.layers.elementwise_add(x=bn_scale44, y=relu78, name='add19')
relu64 = fluid.layers.relu(add15)
relu82 = fluid.layers.relu(add19)
conv_transpose2 = conv_transpose(relu64, filter_size=3, num_filters=16, stride=2, padding=1, name='conv_transpose2')
conv_transpose5 = conv_transpose(relu82, filter_size=3, num_filters=16, stride=2, padding=1, name='conv_transpose5')
bn_scale35 = bn(conv_transpose2, name='batch_norm35')
bn_scale45 = bn(conv_transpose5, name='batch_norm45')
relu65 = fluid.layers.relu(bn_scale35)
relu83 = fluid.layers.relu(bn_scale45)
relu66 = conv(relu65, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv64', act='relu')
relu84 = conv(relu83, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv80', act='relu')
conv65 = conv(relu66, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv65')
conv81 = conv(relu84, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv81')
bn_scale36 = bn(conv65, name='batch_norm36')
bn_scale46 = bn(conv81, name='batch_norm46')
relu67 = fluid.layers.relu(bn_scale36)
relu85 = fluid.layers.relu(bn_scale46)
relu68 = conv(relu67, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv66', act='relu')
relu86 = conv(relu85, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv82', act='relu')
conv67 = conv(relu68, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv67')
conv83 = conv(relu86, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv83')
bn_scale37 = bn(conv67, name='batch_norm37')
bn_scale47 = bn(conv83, name='batch_norm47')
add16 = fluid.layers.elementwise_add(x=bn_scale37, y=relu65, name='add16')
add20 = fluid.layers.elementwise_add(x=bn_scale47, y=relu83, name='add20')
relu69 = fluid.layers.relu(add16)
relu87 = fluid.layers.relu(add20)
relu70 = conv(relu69, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv68', act='relu')
relu88 = conv(relu87, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv84', act='relu')
conv69 = conv(relu70, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv69')
conv85 = conv(relu88, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv85')
bn_scale38 = bn(conv69, name='batch_norm38')
bn_scale48 = bn(conv85, name='batch_norm48')
relu71 = fluid.layers.relu(bn_scale38)
relu89 = fluid.layers.relu(bn_scale48)
relu72 = conv(relu71, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv70', act='relu')
relu90 = conv(relu89, filter_size=[3, 1], num_filters=16, padding=[1, 0], name='conv86', act='relu')
conv71 = conv(relu72, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv71')
conv87 = conv(relu90, filter_size=[1, 3], num_filters=16, padding=[0, 1], name='conv87')
bn_scale39 = bn(conv71, name='batch_norm39')
bn_scale49 = bn(conv87, name='batch_norm49')
add17 = fluid.layers.elementwise_add(x=bn_scale39, y=relu69, name='add17')
add21 = fluid.layers.elementwise_add(x=bn_scale49, y=relu87, name='add21')
relu73 = fluid.layers.relu(add17)
relu91 = fluid.layers.relu(add21)
conv_transpose3 = conv_transpose(relu73, filter_size=2, num_filters=4, stride=2, name='conv_transpose3')
conv_transpose6 = conv_transpose(relu91, filter_size=2, num_filters=1, stride=2, name='conv_transpose6')
cat4 = fluid.layers.concat([conv_transpose3, conv_transpose6], axis=1, name='cat4')
return cat4
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from PIL import Image as PILImage
def sigmoid_np(x):
return 1/(1+np.exp(-x))
class Cluster:
def __init__(self, ):
xm = np.repeat(np.linspace(0, 2, 2048)[np.newaxis, np.newaxis,:], 1024, axis=1)
ym = np.repeat(np.linspace(0, 1, 1024)[np.newaxis, :, np.newaxis], 2048, axis=2)
self.xym = np.vstack((xm, ym))
def cluster(self, prediction, n_sigma=1, min_pixel=160, threshold=0.5):
height, width = prediction.shape[1:3]
xym_s = self.xym[:, 0:height, 0:width]
spatial_emb = np.tanh(prediction[0:2]) + xym_s
sigma = prediction[2:2+n_sigma]
seed_map = sigmoid_np(prediction[2+n_sigma:2+n_sigma + 1])
instance_map = np.zeros((height, width), np.float32)
instances = []
count = 1
mask = seed_map > 0.5
if mask.sum() > min_pixel:
spatial_emb_masked = spatial_emb[np.repeat(mask, \
spatial_emb.shape[0], 0)].reshape(2, -1)
sigma_masked = sigma[np.repeat(mask, n_sigma, 0)].reshape(n_sigma, -1)
seed_map_masked = seed_map[mask].reshape(1, -1)
unclustered = np.ones(mask.sum(), np.float32)
instance_map_masked = np.zeros(mask.sum(), np.float32)
while(unclustered.sum() > min_pixel):
seed = (seed_map_masked * unclustered).argmax().item()
seed_score = (seed_map_masked * unclustered).max().item()
if seed_score < threshold:
break
center = spatial_emb_masked[:, seed:seed+1]
unclustered[seed] = 0
s = np.exp(sigma_masked[:, seed:seed+1]*10)
dist = np.exp(-1*np.sum((spatial_emb_masked-center)**2 *s, 0))
proposal = (dist > 0.5).squeeze()
if proposal.sum() > min_pixel:
if unclustered[proposal].sum()/proposal.sum()> 0.5:
instance_map_masked[proposal.squeeze()] = count
instance_mask = np.zeros((height, width), np.float32)
instance_mask[mask.squeeze()] = proposal
instances.append(
{'mask': (instance_mask.squeeze()*255).astype(np.uint8), \
'score': seed_score})
count += 1
unclustered[proposal] = 0
instance_map[mask.squeeze()] = instance_map_masked
return instance_map, instances
def pad_img(img, dst_shape, mode='constant'):
img_h, img_w = img.shape[:2]
dst_h, dst_w = dst_shape
pad_shape = ((0, max(0, dst_h - img_h)), (0, max(0, dst_w - img_w)))
return np.pad(img, pad_shape, mode)
def save_for_eval(predictions, infer_shape, im_shape, vis_dir, im_name):
txt_file = os.path.join(vis_dir, im_name + '.txt')
with open(txt_file, 'w') as f:
for id, pred in enumerate(predictions):
save_name = im_name + '_{:02d}.png'.format(id)
pred_mask = pad_img(pred['mask'], infer_shape)
pred_mask = pred_mask[:im_shape[0], :im_shape[1]]
im = PILImage.fromarray(pred_mask)
im.save(os.path.join(vis_dir, save_name))
cl = 26
score = pred['score']
f.writelines("{} {} {:.02f}\n".format(save_name, cl, score))
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: RainbowSecret
## Microsoft Research
## yuyua@microsoft.com
## Copyright (c) 2018
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
def get_palette(num_cls):
""" Returns the color map for visualizing the segmentation mask.
Args:
num_cls: Number of classes
Returns:
The color map
"""
n = num_cls
palette = [0] * (n * 3)
for j in range(0, n):
lab = j
palette[j * 3 + 0] = 0
palette[j * 3 + 1] = 0
palette[j * 3 + 2] = 0
i = 0
while lab:
palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
i += 1
lab >>= 3
return palette
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import os
def get_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--use_gpu",
action="store_true",
help="Use gpu or cpu to test.")
parser.add_argument('--example',
type=str,
help='RoadLine, HumanSeg or ACE2P')
return parser.parse_args()
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
def __getattr__(self, name):
if name in self.__dict__:
return self.__dict__[name]
elif name in self:
return self[name]
else:
raise AttributeError(name)
def __setattr__(self, name, value):
if name in self.__dict__:
self.__dict__[name] = value
else:
self[name] = value
def merge_cfg_from_args(args, cfg):
"""Merge config keys, values in args into the global config."""
for k, v in vars(args).items():
d = cfg
try:
value = eval(v)
except:
value = v
if value is not None:
cfg[k] = value
...@@ -82,7 +82,7 @@ deeplabv3p_xception65_humanseg ...@@ -82,7 +82,7 @@ deeplabv3p_xception65_humanseg
### 4.2. 修改配置 ### 4.2. 修改配置
基于`PaddleSeg`训练的模型导出时,会自动生成对应的预测模型配置文件,请参考文档:[模型导出](../docs/model_export.md) 基于`PaddleSeg`训练的模型导出时,会自动生成对应的预测模型配置文件,请参考文档:[模型导出](../../docs/model_export.md)
`inference`源代码(即本目录)的`conf`目录下提供了示例人像分割模型的配置文件`humanseg.yaml`, 相关的字段含义和说明如下: `inference`源代码(即本目录)的`conf`目录下提供了示例人像分割模型的配置文件`humanseg.yaml`, 相关的字段含义和说明如下:
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
### 2.4 效果展示 ### 2.4 效果展示
<img src="example/human_1.png" width="20%" ><img src="example/human_2.png" width="20%" ><img src="example/human_3.png" width="20%" > <img src="example/human_1.png" width="20%" ><img src="example/human_2.png" width="20%" ><img src="example/human_3.png" width="20%" >
## 3.模型导出 ## 3.模型导出
此demo的人像分割模型为[下载链接](https://paddleseg.bj.bcebos.com/models/humanseg_mobilenetv2_1_0_bn_freeze_model_pr_po.zip),是基于Deeplab_v3+mobileNet_v2的humanseg模型,关于humanseg的介绍移步[特色垂类分割模型](./contrib),更多的分割模型导出可参考:[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.2.0/docs/model_export.md) 此demo的人像分割模型为[下载链接](https://paddleseg.bj.bcebos.com/models/humanseg_mobilenetv2_1_0_bn_freeze_model_pr_po.zip),是基于Deeplab_v3+mobileNet_v2的humanseg模型,关于humanseg的介绍移步[特色垂类分割模型](../../contrib),更多的分割模型导出可参考:[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.2.0/docs/model_export.md)
## 4.模型转换 ## 4.模型转换
......
...@@ -45,7 +45,7 @@ TRAIN Group存放所有和训练相关的配置 ...@@ -45,7 +45,7 @@ TRAIN Group存放所有和训练相关的配置
是否在多卡间同步BN的均值和方差。 是否在多卡间同步BN的均值和方差。
Synchronized Batch Norm跨GPU批归一化策略最早在[MegDet: A Large Mini-Batch Object Detector](https://arxiv.org/abs/1711.07240) Synchronized Batch Norm跨GPU批归一化策略最早在[MegDet: A Large Mini-Batch Object Detector](https://arxiv.org/abs/1711.07240)
论文中提出,在[Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/pdf/1902.04103.pdf)论文中以Yolov3验证了这一策略的有效性[PaddleCV/yolov3](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/yolov3)实现了这一系列策略并比Darknet框架版本在COCO17数据上mAP高5.9. 论文中提出,在[Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/pdf/1902.04103.pdf)论文中以Yolov3验证了这一策略的有效性
PaddleSeg基于PaddlePaddle框架的sync_batch_norm策略,可以支持通过多卡实现大batch size的分割模型训练,可以得到更高的mIoU精度。 PaddleSeg基于PaddlePaddle框架的sync_batch_norm策略,可以支持通过多卡实现大batch size的分割模型训练,可以得到更高的mIoU精度。
......
# 模型导出 # 模型导出
通过训练得到一个满足要求的模型后,如果想要将该模型接入到C++预测库或者Serving服务,我们需要通过`pdseg/export_model.py`来导出该模型。 通过训练得到一个满足要求的模型后,如果想要将该模型接入到C++预测库或者Serving服务,我们需要通过[`pdseg/export_model.py`](../../pdseg/export_model.py)来导出该模型。
该脚本的使用方法和`train.py/eval.py/vis.py`完全一样。 该脚本的使用方法和`train.py/eval.py/vis.py`完全一样。
......
...@@ -14,6 +14,7 @@ PaddleSeg对所有内置的分割模型都提供了公开数据集下的预训 ...@@ -14,6 +14,7 @@ PaddleSeg对所有内置的分割模型都提供了公开数据集下的预训
| MobileNetV2_0.5x | ImageNet | 0.5x | [MobileNetV2_0.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | | MobileNetV2_0.5x | ImageNet | 0.5x | [MobileNetV2_0.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% |
| MobileNetV2_1.5x | ImageNet | 1.5x | [MobileNetV2_1.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% | | MobileNetV2_1.5x | ImageNet | 1.5x | [MobileNetV2_1.5x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% |
| MobileNetV2_2.0x | ImageNet | 2.0x | [MobileNetV2_2.0x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% | | MobileNetV2_2.0x | ImageNet | 2.0x | [MobileNetV2_2.0x](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% |
| MobileNetV3_Large_ssld_1.0x | ImageNet | 1.0x | [MobileNetV3_Large_ssld_1.0x](https://paddleseg.bj.bcebos.com/models/MobileNetV3_large_x1_0_ssld_pretrained.tar) | 79.00%/94.50% |
用户可以结合实际场景的精度和预测性能要求,选取不同`Depth multiplier`参数的MobileNet模型。 用户可以结合实际场景的精度和预测性能要求,选取不同`Depth multiplier`参数的MobileNet模型。
...@@ -37,6 +38,7 @@ PaddleSeg对所有内置的分割模型都提供了公开数据集下的预训 ...@@ -37,6 +38,7 @@ PaddleSeg对所有内置的分割模型都提供了公开数据集下的预训
|---|---|---|---| |---|---|---|---|
| ResNet50(适配PSPNet) | ImageNet | [resnet50_v2_pspnet](https://paddleseg.bj.bcebos.com/resnet50_v2_pspnet.tgz)| -- | | ResNet50(适配PSPNet) | ImageNet | [resnet50_v2_pspnet](https://paddleseg.bj.bcebos.com/resnet50_v2_pspnet.tgz)| -- |
| ResNet101(适配PSPNet) | ImageNet | [resnet101_v2_pspnet](https://paddleseg.bj.bcebos.com/resnet101_v2_pspnet.tgz)| -- | | ResNet101(适配PSPNet) | ImageNet | [resnet101_v2_pspnet](https://paddleseg.bj.bcebos.com/resnet101_v2_pspnet.tgz)| -- |
| ResNet50_vd | ImageNet | [ResNet50_vd_ssld_pretrained.tgz](https://paddleseg.bj.bcebos.com/models/ResNet50_vd_ssld_pretrained.tgz) | 83.0%/96.4% |
## COCO预训练模型 ## COCO预训练模型
...@@ -57,12 +59,15 @@ train数据集合为Cityscapes训练集合,测试为Cityscapes的验证集合 ...@@ -57,12 +59,15 @@ train数据集合为Cityscapes训练集合,测试为Cityscapes的验证集合
| 模型 | 数据集合 | 下载地址 |Output Stride| mutli-scale test| mIoU on val| | 模型 | 数据集合 | 下载地址 |Output Stride| mutli-scale test| mIoU on val|
|---|---|---|---|---|---| |---|---|---|---|---|---|
| DeepLabv3+/MobileNetv2/bn | Cityscapes |[mobilenet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz) |16|false| 0.698| | DeepLabv3+/MobileNetv2/bn | Cityscapes |[mobilenet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz) |16|false| 0.698|
| DeepLabv3+/MobileNetv3_Large/bn | Cityscapes |[deeplabv3p_mobilenetv3_large_cityscapes.tar.gz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_mobilenetv3_large_cityscapes.tar.gz) |32|false| 0.7328|
| DeepLabv3+/Xception65/gn | Cityscapes |[deeplabv3p_xception65_gn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz) |16|false| 0.7824 | | DeepLabv3+/Xception65/gn | Cityscapes |[deeplabv3p_xception65_gn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz) |16|false| 0.7824 |
| DeepLabv3+/Xception65/bn | Cityscapes |[deeplabv3p_xception65_bn_cityscapes_.tgz](https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz) | 16 | false | 0.7930 | | DeepLabv3+/Xception65/bn | Cityscapes |[deeplabv3p_xception65_bn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz) | 16 | false | 0.7930 |
| DeepLabv3+/ResNet50_vd/bn | Cityscapes |[deeplabv3p_resnet50_vd_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/deeplabv3p_resnet50_vd_cityscapes.tgz) | 16 | false | 0.8006 |
| ICNet/bn | Cityscapes |[icnet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/icnet_cityscapes.tar.gz) |16|false| 0.6831 | | ICNet/bn | Cityscapes |[icnet_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/icnet_cityscapes.tar.gz) |16|false| 0.6831 |
| PSPNet/bn | Cityscapes |[pspnet50_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet50_cityscapes.tgz) |16|false| 0.7013 | | PSPNet/bn | Cityscapes |[pspnet50_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet50_cityscapes.tgz) |16|false| 0.7013 |
| PSPNet/bn | Cityscapes |[pspnet101_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet101_cityscapes.tgz) |16|false| 0.7734 | | PSPNet/bn | Cityscapes |[pspnet101_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/pspnet101_cityscapes.tgz) |16|false| 0.7734 |
| HRNet_W18/bn | Cityscapes |[hrnet_w18_bn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz) | 4 | false | 0.7936 | | HRNet_W18/bn | Cityscapes |[hrnet_w18_bn_cityscapes.tgz](https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz) | 4 | false | 0.7936 |
| Fast-SCNN/bn | Cityscapes |[fast_scnn_cityscapes.tar](https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar) | 32 | false | 0.6964 | | Fast-SCNN/bn | Cityscapes |[fast_scnn_cityscapes.tar](https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar) | 32 | false | 0.6964 |
| OCRNet/bn | Cityscapes |[ocrnet_w18_bn_cityscapes.tar.gz](https://paddleseg.bj.bcebos.com/models/ocrnet_w18_bn_cityscapes.tar.gz) | 4 | false | 0.8023 |
测试环境为python 3.7.3,v100,cudnn 7.6.2。 测试环境为python 3.7.3,v100,cudnn 7.6.2。
...@@ -75,12 +75,10 @@ Fast-SCNN [7] 是一个面向实时的语义分割网络。在双分支的结构 ...@@ -75,12 +75,10 @@ Fast-SCNN [7] 是一个面向实时的语义分割网络。在双分支的结构
[3] [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) [3] [Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105)
[4] [Fully Convolutional Networks for Semantic Segmentation](https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf) [4] [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038)
[5] [ICNet for Real-Time Semantic Segmentation on High-Resolution Images](https://arxiv.org/abs/1704.08545) [5] [ICNet for Real-Time Semantic Segmentation on High-Resolution Images](https://arxiv.org/abs/1704.08545)
[6] [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919) [6] [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919)
[7] [Fast-SCNN: Fast Semantic Segmentation Network](https://arxiv.org/abs/1902.04502) [7] [Fast-SCNN: Fast Semantic Segmentation Network](https://arxiv.org/abs/1902.04502)
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
## 2.下载待训练数据 ## 2.下载待训练数据
![](../turtorial/imgs/optic.png) ![](../tutorial/imgs/optic.png)
我们提前准备好了一份眼底医疗分割数据集--视盘分割(optic disc segmentation),包含267张训练图片、76张验证图片、38张测试图片。通过以下命令进行下载: 我们提前准备好了一份眼底医疗分割数据集--视盘分割(optic disc segmentation),包含267张训练图片、76张验证图片、38张测试图片。通过以下命令进行下载:
......
# 动态图执行 # 动态图执行
## 数据集设置 ## 下载及添加路径
``` ```
data_dir='data/path' git clone https://github.com/PaddlePaddle/PaddleSeg
train_list='train/list/path' cd PaddleSeg
val_list='val/list/path' export PYTHONPATH=$PYTHONPATH:`pwd`
test_list='test/list/path' cd dygraph
num_classes=number/of/dataset/classes
``` ```
## 训练 ## 训练
``` ```
python3 train.py --model_name UNet \ python3 train.py --model_name unet \
--data_dir $data_dir \ --dataset OpticDiscSeg \
--train_list $train_list \
--val_list $val_list \
--num_classes $num_classes \
--input_size 192 192 \ --input_size 192 192 \
--num_epochs 4 \ --iters 10 \
--save_interval_epochs 1 \ --save_interval_iters 1 \
--do_eval \
--save_dir output --save_dir output
``` ```
## 评估 ## 评估
``` ```
python3 val.py --model_name UNet \ python3 val.py --model_name unet \
--data_dir $data_dir \ --dataset OpticDiscSeg \
--val_list $val_list \
--num_classes $num_classes \
--input_size 192 192 \ --input_size 192 192 \
--model_dir output/epoch_1 --model_dir output/best_model
``` ```
## 预测 ## 预测
``` ```
python3 infer.py --model_name UNet \ python3 infer.py --model_name unet \
--data_dir $data_dir \ --dataset OpticDiscSeg \
--test_list $test_list \ --model_dir output/best_model \
--num_classes $num_classes \ --input_size 192 192
--input_size 192 192 \
--model_dir output/epoch_1
``` ```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import dygraph.models
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from dygraph.datasets import DATASETS
import dygraph.transforms as T
#from dygraph.models import MODELS
from dygraph.cvlibs import manager
from dygraph.utils import get_environ_info
from dygraph.utils import logger
from dygraph.core import train
def parse_args():
parser = argparse.ArgumentParser(description='Model training')
# params of model
parser.add_argument(
'--model_name',
dest='model_name',
help='Model type for training, which is one of {}'.format(
str(list(manager.MODELS.components_dict.keys()))),
type=str,
default='UNet')
# params of dataset
parser.add_argument(
'--dataset',
dest='dataset',
help="The dataset you want to train, which is one of {}".format(
str(list(DATASETS.keys()))),
type=str,
default='OpticDiscSeg')
parser.add_argument(
'--dataset_root',
dest='dataset_root',
help="dataset root directory",
type=str,
default=None)
# params of training
parser.add_argument(
"--input_size",
dest="input_size",
help="The image size for net inputs.",
nargs=2,
default=[512, 512],
type=int)
parser.add_argument(
'--iters',
dest='iters',
help='iters for training',
type=int,
default=10000)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size of one gpu or cpu',
type=int,
default=2)
parser.add_argument(
'--learning_rate',
dest='learning_rate',
help='Learning rate',
type=float,
default=0.01)
parser.add_argument(
'--pretrained_model',
dest='pretrained_model',
help='The path of pretrained model',
type=str,
default=None)
parser.add_argument(
'--resume_model',
dest='resume_model',
help='The path of resume model',
type=str,
default=None)
parser.add_argument(
'--save_interval_iters',
dest='save_interval_iters',
help='The interval iters for save a model snapshot',
type=int,
default=5)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output')
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
parser.add_argument(
'--do_eval',
dest='do_eval',
help='Eval while training',
action='store_true')
parser.add_argument(
'--log_iters',
dest='log_iters',
help='Display logging information at every log_iters',
default=10,
type=int)
parser.add_argument(
'--use_vdl',
dest='use_vdl',
help='Whether to record the data to VisualDL during training',
action='store_true')
return parser.parse_args()
def main(args):
env_info = get_environ_info()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
raise Exception('`--dataset` is invalid. it should be one of {}'.format(
str(list(DATASETS.keys()))))
dataset = DATASETS[args.dataset]
with fluid.dygraph.guard(places):
# Creat dataset reader
train_transforms = T.Compose([
T.RandomHorizontalFlip(0.5),
T.ResizeStepScaling(0.5, 2.0, 0.25),
T.RandomPaddingCrop(args.input_size),
T.RandomDistort(),
T.Normalize(),
])
train_dataset = dataset(
dataset_root=args.dataset_root,
transforms=train_transforms,
mode='train')
eval_dataset = None
if args.do_eval:
eval_transforms = T.Compose(
[T.Padding((2049, 1025)),
T.Normalize()])
eval_dataset = dataset(
dataset_root=args.dataset_root,
transforms=eval_transforms,
mode='val')
model = manager.MODELS[args.model_name](
num_classes=train_dataset.num_classes)
# Creat optimizer
# todo, may less one than len(loader)
num_iters_each_epoch = len(train_dataset) // (
args.batch_size * ParallelEnv().nranks)
lr_decay = fluid.layers.polynomial_decay(
args.learning_rate, args.iters, end_learning_rate=0, power=0.9)
optimizer = fluid.optimizer.Momentum(
lr_decay,
momentum=0.9,
parameter_list=model.parameters(),
regularization=fluid.regularizer.L2Decay(regularization_coeff=4e-5))
train(
model,
train_dataset,
places=places,
eval_dataset=eval_dataset,
optimizer=optimizer,
save_dir=args.save_dir,
iters=args.iters,
batch_size=args.batch_size,
resume_model=args.resume_model,
save_interval_iters=args.save_interval_iters,
log_iters=args.log_iters,
num_classes=train_dataset.num_classes,
num_workers=args.num_workers,
use_vdl=args.use_vdl)
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from dygraph.datasets import DATASETS
import dygraph.transforms as T
#from dygraph.models import MODELS
from dygraph.cvlibs import manager
from dygraph.utils import get_environ_info
from dygraph.utils import logger
from dygraph.core import train
def parse_args():
parser = argparse.ArgumentParser(description='Model training')
# params of model
parser.add_argument(
'--model_name',
dest='model_name',
help='Model type for training, which is one of {}'.format(
str(list(manager.MODELS.components_dict.keys()))),
type=str,
default='UNet')
# params of dataset
parser.add_argument(
'--dataset',
dest='dataset',
help="The dataset you want to train, which is one of {}".format(
str(list(DATASETS.keys()))),
type=str,
default='OpticDiscSeg')
parser.add_argument(
'--dataset_root',
dest='dataset_root',
help="dataset root directory",
type=str,
default=None)
# params of training
parser.add_argument(
"--input_size",
dest="input_size",
help="The image size for net inputs.",
nargs=2,
default=[512, 512],
type=int)
parser.add_argument(
'--iters',
dest='iters',
help='iters for training',
type=int,
default=10000)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size of one gpu or cpu',
type=int,
default=2)
parser.add_argument(
'--learning_rate',
dest='learning_rate',
help='Learning rate',
type=float,
default=0.01)
parser.add_argument(
'--pretrained_model',
dest='pretrained_model',
help='The path of pretrained model',
type=str,
default=None)
parser.add_argument(
'--resume_model',
dest='resume_model',
help='The path of resume model',
type=str,
default=None)
parser.add_argument(
'--save_interval_iters',
dest='save_interval_iters',
help='The interval iters for save a model snapshot',
type=int,
default=5)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output')
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
parser.add_argument(
'--do_eval',
dest='do_eval',
help='Eval while training',
action='store_true')
parser.add_argument(
'--log_iters',
dest='log_iters',
help='Display logging information at every log_iters',
default=10,
type=int)
parser.add_argument(
'--use_vdl',
dest='use_vdl',
help='Whether to record the data to VisualDL during training',
action='store_true')
return parser.parse_args()
def main(args):
env_info = get_environ_info()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['\n', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace()
if args.dataset not in DATASETS:
raise Exception('`--dataset` is invalid. it should be one of {}'.format(
str(list(DATASETS.keys()))))
dataset = DATASETS[args.dataset]
with fluid.dygraph.guard(places):
# Creat dataset reader
train_transforms = T.Compose([
T.RandomHorizontalFlip(0.5),
T.ResizeStepScaling(0.5, 2.0, 0.25),
T.RandomPaddingCrop(args.input_size),
T.RandomDistort(),
T.Normalize(),
])
train_dataset = dataset(
dataset_root=args.dataset_root,
transforms=train_transforms,
mode='train')
eval_dataset = None
if args.do_eval:
eval_transforms = T.Compose([T.Normalize()])
eval_dataset = dataset(
dataset_root=args.dataset_root,
transforms=eval_transforms,
mode='val')
model = manager.MODELS[args.model_name](
num_classes=train_dataset.num_classes,
pretrained_model=args.pretrained_model)
# Creat optimizer
# todo, may less one than len(loader)
num_iters_each_epoch = len(train_dataset) // (
args.batch_size * ParallelEnv().nranks)
lr_decay = fluid.layers.polynomial_decay(
args.learning_rate, args.iters, end_learning_rate=0, power=0.9)
optimizer = fluid.optimizer.Momentum(
lr_decay,
momentum=0.9,
parameter_list=model.parameters(),
regularization=fluid.regularizer.L2Decay(regularization_coeff=4e-5))
train(
model,
train_dataset,
places=places,
eval_dataset=eval_dataset,
optimizer=optimizer,
save_dir=args.save_dir,
iters=args.iters,
batch_size=args.batch_size,
resume_model=args.resume_model,
save_interval_iters=args.save_interval_iters,
log_iters=args.log_iters,
num_classes=train_dataset.num_classes,
num_workers=args.num_workers,
use_vdl=args.use_vdl)
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .train import train
from .val import evaluate
from .infer import infer
__all__ = ['train', 'evaluate', 'infer']
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import paddle.fluid as fluid
import cv2
import tqdm
from dygraph import utils
import dygraph.utils.logger as logger
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
ckpt_path = os.path.join(model_dir, 'model')
para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
model.set_dict(para_state_dict)
model.eval()
added_saved_dir = os.path.join(save_dir, 'added')
pred_saved_dir = os.path.join(save_dir, 'prediction')
logger.info("Start to predict...")
for im, im_info, im_path in tqdm.tqdm(test_dataset):
im = to_variable(im)
pred, _ = model(im)
pred = pred.numpy()
pred = np.squeeze(pred).astype('uint8')
for info in im_info[::-1]:
if info[0] == 'resize':
h, w = info[1][0], info[1][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
im_file = im_path.replace(test_dataset.dataset_root, '')
if im_file[0] == '/':
im_file = im_file[1:]
# save added image
added_image = utils.visualize(im_path, pred, weight=0.6)
added_image_path = os.path.join(added_saved_dir, im_file)
mkdir(added_image_path)
cv2.imwrite(added_image_path, added_image)
# save prediction
pred_im = utils.visualize(im_path, pred, weight=0.0)
pred_saved_path = os.path.join(pred_saved_dir, im_file)
mkdir(pred_saved_path)
cv2.imwrite(pred_saved_path, pred_im)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import DataLoader
from paddle.incubate.hapi.distributed import DistributedBatchSampler
import dygraph.utils.logger as logger
from dygraph.utils import load_pretrained_model
from dygraph.utils import resume
from dygraph.utils import Timer, calculate_eta
from .val import evaluate
def train(model,
train_dataset,
places=None,
eval_dataset=None,
optimizer=None,
save_dir='output',
iters=10000,
batch_size=2,
resume_model=None,
save_interval_iters=1000,
log_iters=10,
num_classes=None,
num_workers=8,
use_vdl=False):
ignore_index = model.ignore_index
nranks = ParallelEnv().nranks
start_iter = 0
if resume_model is not None:
start_iter = resume(model, optimizer, resume_model)
if not os.path.isdir(save_dir):
if os.path.exists(save_dir):
os.remove(save_dir)
os.makedirs(save_dir)
if nranks > 1:
strategy = fluid.dygraph.prepare_context()
ddp_model = fluid.dygraph.DataParallel(model, strategy)
batch_sampler = DistributedBatchSampler(
train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
loader = DataLoader(
train_dataset,
batch_sampler=batch_sampler,
places=places,
num_workers=num_workers,
return_list=True,
)
if use_vdl:
from visualdl import LogWriter
log_writer = LogWriter(save_dir)
timer = Timer()
avg_loss = 0.0
iters_per_epoch = len(batch_sampler)
best_mean_iou = -1.0
best_model_iter = -1
train_reader_cost = 0.0
train_batch_cost = 0.0
timer.start()
iter = 0
while iter < iters:
for data in loader:
iter += 1
if iter > iters:
break
train_reader_cost += timer.elapsed_time()
images = data[0]
labels = data[1].astype('int64')
if nranks > 1:
loss = ddp_model(images, labels)
# apply_collective_grads sum grads over multiple gpus.
loss = ddp_model.scale_loss(loss)
loss.backward()
ddp_model.apply_collective_grads()
else:
loss = model(images, labels)
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
avg_loss += loss.numpy()[0]
lr = optimizer.current_step_lr()
train_batch_cost += timer.elapsed_time()
if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0:
avg_loss /= log_iters
avg_train_reader_cost = train_reader_cost / log_iters
avg_train_batch_cost = train_batch_cost / log_iters
train_reader_cost = 0.0
train_batch_cost = 0.0
remain_iters = iters - iter
eta = calculate_eta(remain_iters, avg_train_batch_cost)
logger.info(
"[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
.format((iter - 1) // iters_per_epoch + 1, iter, iters,
avg_loss * nranks, lr, avg_train_batch_cost,
avg_train_reader_cost, eta))
if use_vdl:
log_writer.add_scalar('Train/loss', avg_loss * nranks, iter)
log_writer.add_scalar('Train/lr', lr, iter)
log_writer.add_scalar('Train/batch_cost',
avg_train_batch_cost, iter)
log_writer.add_scalar('Train/reader_cost',
avg_train_reader_cost, iter)
avg_loss = 0.0
if (iter % save_interval_iters == 0
or iter == iters) and ParallelEnv().local_rank == 0:
current_save_dir = os.path.join(save_dir,
"iter_{}".format(iter))
if not os.path.isdir(current_save_dir):
os.makedirs(current_save_dir)
fluid.save_dygraph(model.state_dict(),
os.path.join(current_save_dir, 'model'))
fluid.save_dygraph(optimizer.state_dict(),
os.path.join(current_save_dir, 'model'))
if eval_dataset is not None:
mean_iou, avg_acc = evaluate(
model,
eval_dataset,
model_dir=current_save_dir,
num_classes=num_classes,
ignore_index=ignore_index,
iter_id=iter)
if mean_iou > best_mean_iou:
best_mean_iou = mean_iou
best_model_iter = iter
best_model_dir = os.path.join(save_dir, "best_model")
fluid.save_dygraph(
model.state_dict(),
os.path.join(best_model_dir, 'model'))
logger.info(
'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
.format(best_model_iter, best_mean_iou))
if use_vdl:
log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter)
model.train()
timer.restart()
if use_vdl:
log_writer.close()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import tqdm
import cv2
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
import dygraph.utils.logger as logger
from dygraph.utils import ConfusionMatrix
from dygraph.utils import Timer, calculate_eta
def evaluate(model,
eval_dataset=None,
model_dir=None,
num_classes=None,
ignore_index=255,
iter_id=None):
ckpt_path = os.path.join(model_dir, 'model')
para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
model.set_dict(para_state_dict)
model.eval()
total_iters = len(eval_dataset)
conf_mat = ConfusionMatrix(num_classes, streaming=True)
logger.info(
"Start to evaluating(total_samples={}, total_iters={})...".format(
len(eval_dataset), total_iters))
timer = Timer()
timer.start()
for iter, (im, im_info, label) in tqdm.tqdm(
enumerate(eval_dataset), total=total_iters):
im = to_variable(im)
pred, _ = model(im)
pred = pred.numpy().astype('float32')
pred = np.squeeze(pred)
for info in im_info[::-1]:
if info[0] == 'resize':
h, w = info[1][0], info[1][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif info[0] == 'padding':
h, w = info[1][0], info[1][1]
pred = pred[0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(
info[0]))
pred = pred[np.newaxis, :, :, np.newaxis]
pred = pred.astype('int64')
mask = label != ignore_index
conf_mat.calculate(pred=pred, label=label, ignore=mask)
_, iou = conf_mat.mean_iou()
time_iter = timer.elapsed_time()
remain_iter = total_iters - iter - 1
logger.debug(
"[EVAL] iter_id={}, iter={}/{}, iou={:4f}, sec/iter={:.4f} | ETA {}"
.format(iter_id, iter + 1, total_iters, iou, time_iter,
calculate_eta(remain_iter, time_iter)))
timer.restart()
category_iou, miou = conf_mat.mean_iou()
category_acc, macc = conf_mat.accuracy()
logger.info("[EVAL] #Images={} mAcc={:.4f} mIoU={:.4f}".format(
len(eval_dataset), macc, miou))
logger.info("[EVAL] Category IoU: " + str(category_iou))
logger.info("[EVAL] Category Acc: " + str(category_acc))
logger.info("[EVAL] Kappa:{:.4f} ".format(conf_mat.kappa()))
return miou, macc
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- encoding: utf-8 -*-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Sequence
import inspect
class ComponentManager:
"""
Implement a manager class to add the new component properly.
The component can be added as either class or function type.
For example:
>>> model_manager = ComponentManager()
>>> class AlexNet: ...
>>> class ResNet: ...
>>> model_manager.add_component(AlexNet)
>>> model_manager.add_component(ResNet)
or pass a sequence alliteratively:
>>> model_manager.add_component([AlexNet, ResNet])
>>> print(model_manager.components_dict)
output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
Or an easier way, using it as a Python decorator, while just add it above the class declaration.
>>> model_manager = ComponentManager()
>>> @model_manager.add_component
>>> class AlexNet: ...
>>> @model_manager.add_component
>>> class ResNet: ...
>>> print(model_manager.components_dict)
output: {'AlexNet': <class '__main__.AlexNet'>, 'ResNet': <class '__main__.ResNet'>}
"""
def __init__(self):
self._components_dict = dict()
def __len__(self):
return len(self._components_dict)
def __repr__(self):
return "{}:{}".format(self.__class__.__name__, list(self._components_dict.keys()))
def __getitem__(self, item):
if item not in self._components_dict.keys():
raise KeyError("{} does not exist in the current {}".format(item, self))
return self._components_dict[item]
@property
def components_dict(self):
return self._components_dict
def _add_single_component(self, component):
"""
Add a single component into the corresponding manager
Args:
component (function | class): a new component
Returns:
None
"""
# Currently only support class or function type
if not (inspect.isclass(component) or inspect.isfunction(component)):
raise TypeError("Expect class/function type, but received {}".format(type(component)))
# Obtain the internal name of the component
component_name = component.__name__
# Check whether the component was added already
if component_name in self._components_dict.keys():
raise KeyError("{} exists already!".format(component_name))
else:
# Take the internal name of the component as its key
self._components_dict[component_name] = component
def add_component(self, components):
"""
Add component(s) into the corresponding manager
Args:
components (function | class | list | tuple): support three types of components
Returns:
None
"""
# Check whether the type is a sequence
if isinstance(components, Sequence):
for component in components:
self._add_single_component(component)
else:
component = components
self._add_single_component(component)
return components
MODELS = ComponentManager()
BACKBONES = ComponentManager()
\ No newline at end of file
...@@ -12,5 +12,15 @@ ...@@ -12,5 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .dataset import Dataset
from .optic_disc_seg import OpticDiscSeg from .optic_disc_seg import OpticDiscSeg
from .cityscapes import Cityscapes from .cityscapes import Cityscapes
from .voc import PascalVOC
from .ade import ADE20K
DATASETS = {
"OpticDiscSeg": OpticDiscSeg,
"Cityscapes": Cityscapes,
"PascalVOC": PascalVOC,
"ADE20K": ADE20K
}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from PIL import Image
from .dataset import Dataset
from dygraph.utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
URL = "http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip"
class ADE20K(Dataset):
"""ADE20K dataset `http://sceneparsing.csail.mit.edu/`.
Args:
dataset_root: The dataset directory.
mode: Which part of dataset to use.. it is one of ('train', 'val'). Default: 'train'.
transforms: Transforms for image.
download: Whether to download dataset if `dataset_root` is None.
"""
def __init__(self,
dataset_root=None,
mode='train',
transforms=None,
download=True):
self.dataset_root = dataset_root
self.transforms = transforms
self.mode = mode
self.file_list = list()
self.num_classes = 150
if mode.lower() not in ['train', 'val']:
raise Exception(
"`mode` should be one of ('train', 'val') in ADE20K dataset, but got {}."
.format(mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
if self.dataset_root is None:
if not download:
raise Exception(
"`dataset_root` not set and auto download disabled.")
self.dataset_root = download_file_and_uncompress(
url=URL,
savepath=DATA_HOME,
extrapath=DATA_HOME,
extraname='ADEChallengeData2016')
elif not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if mode == 'train':
img_dir = os.path.join(self.dataset_root, 'images/training')
grt_dir = os.path.join(self.dataset_root, 'annotations/training')
elif mode == 'val':
img_dir = os.path.join(self.dataset_root, 'images/validation')
grt_dir = os.path.join(self.dataset_root, 'annotations/validation')
img_files = os.listdir(img_dir)
grt_files = [i.replace('.jpg', '.png') for i in img_files]
for i in range(len(img_files)):
img_path = os.path.join(img_dir, img_files[i])
grt_path = os.path.join(grt_dir, grt_files[i])
self.file_list.append([img_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
if self.mode == 'test':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, im_info, image_path
elif self.mode == 'val':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
label = np.asarray(Image.open(grt_path))
label = label - 1
label = label[np.newaxis, np.newaxis, :, :]
return im, im_info, label
else:
im, im_info, label = self.transforms(im=image_path, label=grt_path)
label = label - 1
return im, label
...@@ -13,73 +13,62 @@ ...@@ -13,73 +13,62 @@
# limitations under the License. # limitations under the License.
import os import os
import glob
from paddle.fluid.io import Dataset from .dataset import Dataset
from utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') class Cityscapes(Dataset):
URL = "https://paddleseg.bj.bcebos.com/dataset/cityscapes.tar" """Cityscapes dataset `https://www.cityscapes-dataset.com/`.
The folder structure is as follow:
cityscapes
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
Args:
dataset_root: Cityscapes dataset directory.
mode: Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
transforms: Transforms for image.
"""
class Cityscapes(Dataset): def __init__(self, dataset_root, transforms=None, mode='train'):
def __init__(self, self.dataset_root = dataset_root
data_dir=None,
transforms=None,
mode='train',
download=True):
self.data_dir = data_dir
self.transforms = transforms self.transforms = transforms
self.file_list = list() self.file_list = list()
self.mode = mode self.mode = mode
self.num_classes = 19 self.num_classes = 19
if mode.lower() not in ['train', 'eval', 'test']: if mode.lower() not in ['train', 'val', 'test']:
raise Exception( raise Exception(
"mode should be 'train', 'eval' or 'test', but got {}.".format( "mode should be 'train', 'val' or 'test', but got {}.".format(
mode)) mode))
if self.transforms is None: if self.transforms is None:
raise Exception("transform is necessary, but it is None.") raise Exception("`transforms` is necessary, but it is None.")
self.data_dir = data_dir
if self.data_dir is None:
if not download:
raise Exception("data_file not set and auto download disabled.")
self.data_dir = download_file_and_uncompress(
url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
if mode == 'train':
file_list = os.path.join(self.data_dir, 'train.list')
elif mode == 'eval':
file_list = os.path.join(self.data_dir, 'val.list')
else:
file_list = os.path.join(self.data_dir, 'test.list')
with open(file_list, 'r') as f: img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
for line in f: grt_dir = os.path.join(self.dataset_root, 'gtFine')
items = line.strip().split() if self.dataset_root is None or not os.path.isdir(
if len(items) != 2: self.dataset_root) or not os.path.isdir(
if mode == 'train' or mode == 'eval': img_dir) or not os.path.isdir(grt_dir):
raise Exception( raise Exception(
"File list format incorrect! It should be" "The dataset is not Found or the folder structure is nonconfoumance."
" image_name label_name\\n") )
image_path = os.path.join(self.data_dir, items[0])
grt_path = None
else:
image_path = os.path.join(self.data_dir, items[0])
grt_path = os.path.join(self.data_dir, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx): grt_files = sorted(
image_path, grt_path = self.file_list[idx] glob.glob(
im, im_info, label = self.transforms(im=image_path, label=grt_path) os.path.join(grt_dir, mode, '*', '*_gtFine_labelTrainIds.png')))
if self.mode == 'train': img_files = sorted(
return im, label glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
elif self.mode == 'eval':
return im, label
if self.mode == 'test':
return im, im_info, image_path
def __len__(self): self.file_list = [[img_path, grt_path]
return len(self.file_list) for img_path, grt_path in zip(img_files, grt_files)]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle.fluid as fluid
import numpy as np
from PIL import Image
class Dataset(fluid.io.Dataset):
"""Pass in a custom dataset that conforms to the format.
Args:
dataset_root: The dataset directory.
num_classes: Number of classes.
mode: which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
train_list: The train dataset file. When image_set is 'train', train_list is necessary.
The contents of train_list file are as follow:
image1.jpg ground_truth1.png
image2.jpg ground_truth2.png
val_list: The evaluation dataset file. When image_set is 'val', val_list is necessary.
The contents is the same as train_list
test_list: The test dataset file. When image_set is 'test', test_list is necessary.
The annotation file is not necessary in test_list file.
separator: The separator of dataset list. Default: ' '.
transforms: Transforms for image.
Examples:
todo
"""
def __init__(self,
dataset_root,
num_classes,
mode='train',
train_list=None,
val_list=None,
test_list=None,
separator=' ',
transforms=None):
self.dataset_root = dataset_root
self.transforms = transforms
self.file_list = list()
self.mode = mode
self.num_classes = num_classes
if mode.lower() not in ['train', 'val', 'test']:
raise Exception(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
self.dataset_root = dataset_root
if not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if mode == 'train':
if train_list is None:
raise Exception(
'When `mode` is "train", `train_list` is necessary, but it is None.'
)
elif not os.path.exists(train_list):
raise Exception(
'`train_list` is not found: {}'.format(train_list))
else:
file_list = train_list
elif mode == 'val':
if val_list is None:
raise Exception(
'When `mode` is "val", `val_list` is necessary, but it is None.'
)
elif not os.path.exists(val_list):
raise Exception('`val_list` is not found: {}'.format(val_list))
else:
file_list = val_list
else:
if test_list is None:
raise Exception(
'When `mode` is "test", `test_list` is necessary, but it is None.'
)
elif not os.path.exists(test_list):
raise Exception(
'`test_list` is not found: {}'.format(test_list))
else:
file_list = test_list
with open(file_list, 'r') as f:
for line in f:
items = line.strip().split(separator)
if len(items) != 2:
if mode == 'train' or mode == 'val':
raise Exception(
"File list format incorrect! In training or evaluation task it should be"
" image_name{}label_name\\n".format(separator))
image_path = os.path.join(self.dataset_root, items[0])
grt_path = None
else:
image_path = os.path.join(self.dataset_root, items[0])
grt_path = os.path.join(self.dataset_root, items[1])
self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
if self.mode == 'test':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, im_info, image_path
elif self.mode == 'val':
im, im_info, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
label = np.asarray(Image.open(grt_path))
label = label[np.newaxis, np.newaxis, :, :]
return im, im_info, label
else:
im, im_info, label = self.transforms(im=image_path, label=grt_path)
return im, label
def __len__(self):
return len(self.file_list)
...@@ -14,9 +14,8 @@ ...@@ -14,9 +14,8 @@
import os import os
from paddle.fluid.io import Dataset from .dataset import Dataset
from dygraph.utils.download import download_file_and_uncompress
from utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
...@@ -24,62 +23,52 @@ URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" ...@@ -24,62 +23,52 @@ URL = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip"
class OpticDiscSeg(Dataset): class OpticDiscSeg(Dataset):
def __init__(self, def __init__(self,
data_dir=None, dataset_root=None,
transforms=None, transforms=None,
mode='train', mode='train',
download=True): download=True):
self.data_dir = data_dir self.dataset_root = dataset_root
self.transforms = transforms self.transforms = transforms
self.file_list = list() self.file_list = list()
self.mode = mode self.mode = mode
self.num_classes = 2 self.num_classes = 2
if mode.lower() not in ['train', 'eval', 'test']: if mode.lower() not in ['train', 'val', 'test']:
raise Exception( raise Exception(
"mode should be 'train', 'eval' or 'test', but got {}.".format( "`mode` should be 'train', 'val' or 'test', but got {}.".format(
mode)) mode))
if self.transforms is None: if self.transforms is None:
raise Exception("transform is necessary, but it is None.") raise Exception("`transforms` is necessary, but it is None.")
self.data_dir = data_dir if self.dataset_root is None:
if self.data_dir is None:
if not download: if not download:
raise Exception("data_file not set and auto download disabled.") raise Exception(
self.data_dir = download_file_and_uncompress( "`data_root` not set and auto download disabled.")
self.dataset_root = download_file_and_uncompress(
url=URL, savepath=DATA_HOME, extrapath=DATA_HOME) url=URL, savepath=DATA_HOME, extrapath=DATA_HOME)
elif not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
if mode == 'train': if mode == 'train':
file_list = os.path.join(self.data_dir, 'train_list.txt') file_list = os.path.join(self.dataset_root, 'train_list.txt')
elif mode == 'eval': elif mode == 'val':
file_list = os.path.join(self.data_dir, 'val_list.txt') file_list = os.path.join(self.dataset_root, 'val_list.txt')
else: else:
file_list = os.path.join(self.data_dir, 'test_list.txt') file_list = os.path.join(self.dataset_root, 'test_list.txt')
with open(file_list, 'r') as f: with open(file_list, 'r') as f:
for line in f: for line in f:
items = line.strip().split() items = line.strip().split()
if len(items) != 2: if len(items) != 2:
if mode == 'train' or mode == 'eval': if mode == 'train' or mode == 'val':
raise Exception( raise Exception(
"File list format incorrect! It should be" "File list format incorrect! It should be"
" image_name label_name\\n") " image_name label_name\\n")
image_path = os.path.join(self.data_dir, items[0]) image_path = os.path.join(self.dataset_root, items[0])
grt_path = None grt_path = None
else: else:
image_path = os.path.join(self.data_dir, items[0]) image_path = os.path.join(self.dataset_root, items[0])
grt_path = os.path.join(self.data_dir, items[1]) grt_path = os.path.join(self.dataset_root, items[1])
self.file_list.append([image_path, grt_path]) self.file_list.append([image_path, grt_path])
def __getitem__(self, idx):
image_path, grt_path = self.file_list[idx]
im, im_info, label = self.transforms(im=image_path, label=grt_path)
if self.mode == 'train':
return im, label
elif self.mode == 'eval':
return im, label
if self.mode == 'test':
return im, im_info, image_path
def __len__(self):
return len(self.file_list)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from .dataset import Dataset
from dygraph.utils.download import download_file_and_uncompress
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
class PascalVOC(Dataset):
"""Pascal VOC dataset `http://host.robots.ox.ac.uk/pascal/VOC/`. If you want to augment the dataset,
please run the voc_augment.py in tools.
Args:
dataset_root: The dataset directory.
mode: Which part of dataset to use.. it is one of ('train', 'val', 'test'). Default: 'train'.
transforms: Transforms for image.
download: Whether to download dataset if dataset_root is None.
"""
def __init__(self,
dataset_root=None,
mode='train',
transforms=None,
download=True):
self.dataset_root = dataset_root
self.transforms = transforms
self.mode = mode
self.file_list = list()
self.num_classes = 21
if mode.lower() not in ['train', 'trainval', 'trainaug', 'val']:
raise Exception(
"`mode` should be one of ('train', 'trainval', 'trainaug', 'val') in PascalVOC dataset, but got {}."
.format(mode))
if self.transforms is None:
raise Exception("`transforms` is necessary, but it is None.")
if self.dataset_root is None:
if not download:
raise Exception(
"`dataset_root` not set and auto download disabled.")
self.dataset_root = download_file_and_uncompress(
url=URL,
savepath=DATA_HOME,
extrapath=DATA_HOME,
extraname='VOCdevkit')
elif not os.path.exists(self.dataset_root):
raise Exception('there is not `dataset_root`: {}.'.format(
self.dataset_root))
image_set_dir = os.path.join(self.dataset_root, 'VOC2012', 'ImageSets',
'Segmentation')
if mode == 'train':
file_list = os.path.join(image_set_dir, 'train.txt')
elif mode == 'val':
file_list = os.path.join(image_set_dir, 'val.txt')
elif mode == 'trainval':
file_list = os.path.join(image_set_dir, 'trainval.txt')
elif mode == 'trainaug':
file_list = os.path.join(image_set_dir, 'train.txt')
file_list_aug = os.path.join(image_set_dir, 'aug.txt')
if not os.path.exists(file_list_aug):
raise Exception(
"When `mode` is 'trainaug', Pascal Voc dataset should be augmented, "
"Please make sure voc_augment.py has been properly run when using this mode."
)
img_dir = os.path.join(self.dataset_root, 'VOC2012', 'JPEGImages')
grt_dir = os.path.join(self.dataset_root, 'VOC2012',
'SegmentationClass')
grt_dir_aug = os.path.join(self.dataset_root, 'VOC2012',
'SegmentationClassAug')
with open(file_list, 'r') as f:
for line in f:
line = line.strip()
image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
grt_path = os.path.join(grt_dir, ''.join([line, '.png']))
self.file_list.append([image_path, grt_path])
if mode == 'trainaug':
with open(file_list_aug, 'r') as f:
for line in f:
line = line.strip()
image_path = os.path.join(img_dir, ''.join([line, '.jpg']))
grt_path = os.path.join(grt_dir_aug, ''.join([line,
'.png']))
self.file_list.append([image_path, grt_path])
...@@ -13,21 +13,15 @@ ...@@ -13,21 +13,15 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
import os
from paddle.fluid.dygraph.base import to_variable
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
import cv2
import tqdm
from datasets import OpticDiscSeg, Cityscapes from dygraph.datasets import DATASETS
import transforms as T import dygraph.transforms as T
import models from dygraph.cvlibs import manager
import utils from dygraph.utils import get_environ_info
import utils.logging as logging from dygraph.core import infer
from utils import get_environ_info
def parse_args(): def parse_args():
...@@ -37,18 +31,25 @@ def parse_args(): ...@@ -37,18 +31,25 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--model_name', '--model_name',
dest='model_name', dest='model_name',
help="Model type for traing, which is one of ('UNet')", help='Model type for testing, which is one of {}'.format(
str(list(manager.MODELS.components_dict.keys()))),
type=str, type=str,
default='UNet') default='UNet')
# params of dataset # params of infer
parser.add_argument( parser.add_argument(
'--dataset', '--dataset',
dest='dataset', dest='dataset',
help= help="The dataset you want to test, which is one of {}".format(
"The dataset you want to train, which is one of ('OpticDiscSeg', 'Cityscapes')", str(list(DATASETS.keys()))),
type=str, type=str,
default='OpticDiscSeg') default='OpticDiscSeg')
parser.add_argument(
'--dataset_root',
dest='dataset_root',
help="dataset root directory",
type=str,
default=None)
# params of prediction # params of prediction
parser.add_argument( parser.add_argument(
...@@ -80,74 +81,26 @@ def parse_args(): ...@@ -80,74 +81,26 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def infer(model, test_dataset=None, model_dir=None, save_dir='output'):
ckpt_path = os.path.join(model_dir, 'model')
para_state_dict, opti_state_dict = fluid.load_dygraph(ckpt_path)
model.set_dict(para_state_dict)
model.eval()
added_saved_dir = os.path.join(save_dir, 'added')
pred_saved_dir = os.path.join(save_dir, 'prediction')
logging.info("Start to predict...")
for im, im_info, im_path in tqdm.tqdm(test_dataset):
im = im[np.newaxis, ...]
im = to_variable(im)
pred, _ = model(im, mode='test')
pred = pred.numpy()
pred = np.squeeze(pred).astype('uint8')
keys = list(im_info.keys())
for k in keys[::-1]:
if k == 'shape_before_resize':
h, w = im_info[k][0], im_info[k][1]
pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST)
elif k == 'shape_before_padding':
h, w = im_info[k][0], im_info[k][1]
pred = pred[0:h, 0:w]
im_file = im_path.replace(test_dataset.data_dir, '')
if im_file[0] == '/':
im_file = im_file[1:]
# save added image
added_image = utils.visualize(im_path, pred, weight=0.6)
added_image_path = os.path.join(added_saved_dir, im_file)
mkdir(added_image_path)
cv2.imwrite(added_image_path, added_image)
# save prediction
pred_im = utils.visualize(im_path, pred, weight=0.0)
pred_saved_path = os.path.join(pred_saved_dir, im_file)
mkdir(pred_saved_path)
cv2.imwrite(pred_saved_path, pred_im)
def main(args): def main(args):
env_info = get_environ_info() env_info = get_environ_info()
places = fluid.CUDAPlace(ParallelEnv().dev_id) \ places = fluid.CUDAPlace(ParallelEnv().dev_id) \
if env_info['place'] == 'cuda' and fluid.is_compiled_with_cuda() \ if env_info['Paddle compiled with cuda'] and env_info['GPUs used'] \
else fluid.CPUPlace() else fluid.CPUPlace()
if args.dataset.lower() == 'opticdiscseg': if args.dataset not in DATASETS:
dataset = OpticDiscSeg raise Exception('`--dataset` is invalid. it should be one of {}'.format(
elif args.dataset.lower() == 'cityscapes': str(list(DATASETS.keys()))))
dataset = Cityscapes dataset = DATASETS[args.dataset]
else:
raise Exception(
"The --dataset set wrong. It should be one of ('OpticDiscSeg', 'Cityscapes')"
)
with fluid.dygraph.guard(places): with fluid.dygraph.guard(places):
test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()]) test_transforms = T.Compose([T.Resize(args.input_size), T.Normalize()])
test_dataset = dataset(transforms=test_transforms, mode='test') test_dataset = dataset(
dataset_root=args.dataset_root,
transforms=test_transforms,
mode='test')
if args.model_name == 'UNet': model = manager.MODELS[args.model_name](
model = models.UNet(num_classes=test_dataset.num_classes) num_classes=test_dataset.num_classes)
infer( infer(
model, model,
......
...@@ -12,4 +12,8 @@ ...@@ -12,4 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .architectures import *
from .unet import UNet from .unet import UNet
from .deeplab import *
from .fcn import *
from .pspnet import *
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import layer_utils
from .hrnet import *
from .resnet_vd import *
from .xception_deeplab import *
from .mobilenetv3 import *
此差异已折叠。
# -*- encoding: utf-8 -*-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn.functional as F
from paddle import fluid
from paddle.fluid import dygraph
from paddle.fluid.dygraph import Conv2D
from paddle.nn import SyncBatchNorm as BatchNorm
from paddle.nn.layer import activation
class ConvBnRelu(dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
using_sep_conv=False,
**kwargs):
super(ConvBnRelu, self).__init__()
if using_sep_conv:
self.conv = DepthwiseConvBnRelu(num_channels,
num_filters,
filter_size,
**kwargs)
else:
self.conv = Conv2D(num_channels,
num_filters,
filter_size,
**kwargs)
self.batch_norm = BatchNorm(num_filters)
def forward(self, x):
x = self.conv(x)
x = self.batch_norm(x)
x = F.relu(x)
return x
class ConvBn(dygraph.Layer):
def __init__(self, num_channels, num_filters, filter_size, **kwargs):
super(ConvBn, self).__init__()
self.conv = Conv2D(num_channels,
num_filters,
filter_size,
**kwargs)
self.batch_norm = BatchNorm(num_filters)
def forward(self, x):
x = self.conv(x)
x = self.batch_norm(x)
return x
class ConvReluPool(dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(ConvReluPool, self).__init__()
self.conv = Conv2D(num_channels,
num_filters,
filter_size=3,
stride=1,
padding=1,
dilation=1)
def forward(self, x):
x = self.conv(x)
x = F.relu(x)
x = fluid.layers.pool2d(x, pool_size=2, pool_type="max", pool_stride=2)
return x
class ConvBnReluUpsample(dygraph.Layer):
def __init__(self, num_channels, num_filters):
super(ConvBnReluUpsample, self).__init__()
self.conv_bn_relu = ConvBnRelu(num_channels, num_filters)
def forward(self, x, upsample_scale=2):
x = self.conv_bn_relu(x)
new_shape = [x.shape[2] * upsample_scale, x.shape[3] * upsample_scale]
x = fluid.layers.resize_bilinear(x, new_shape)
return x
class DepthwiseConvBnRelu(dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
**kwargs):
super(DepthwiseConvBnRelu, self).__init__()
self.depthwise_conv = ConvBn(num_channels,
num_filters=num_channels,
filter_size=filter_size,
groups=num_channels,
use_cudnn=False,
**kwargs)
self.piontwise_conv = ConvBnRelu(num_channels,
num_filters,
filter_size=1,
groups=1)
def forward(self, x):
x = self.depthwise_conv(x)
x = self.piontwise_conv(x)
return x
class Activation(fluid.dygraph.Layer):
"""
The wrapper of activations
For example:
>>> relu = Activation("relu")
>>> print(relu)
<class 'paddle.nn.layer.activation.ReLU'>
>>> sigmoid = Activation("sigmoid")
>>> print(sigmoid)
<class 'paddle.nn.layer.activation.Sigmoid'>
>>> not_exit_one = Activation("not_exit_one")
KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink',
'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax',
'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])"
Args:
act (str): the activation name in lowercase
"""
def __init__(self, act=None):
super(Activation, self).__init__()
self._act = act
upper_act_names = activation.__all__
lower_act_names = [act.lower() for act in upper_act_names]
act_dict = dict(zip(lower_act_names, upper_act_names))
if act is not None:
if act in act_dict.keys():
act_name = act_dict[act]
self.act_func = eval("activation.{}()".format(act_name))
else:
raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys()))
def forward(self, x):
if self._act is not None:
return self.act_func(x)
else:
return x
\ No newline at end of file
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.models.architectures import layer_utils
from dygraph.cvlibs import manager
__all__ = [
"MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
"MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
"MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
"MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
"MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
]
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
def get_padding_same(kernel_size, dilation_rate):
"""
SAME padding implementation given kernel_size and dilation_rate.
The calculation formula as following:
(F-(k+(k -1)*(r-1))+2*p)/s + 1 = F_new
where F: a feature map
k: kernel size, r: dilation rate, p: padding value, s: stride
F_new: new feature map
Args:
kernel_size (int)
dilation_rate (int)
Returns:
padding_same (int): padding value
"""
k = kernel_size
r = dilation_rate
padding_same = (k + (k - 1) * (r - 1) - 1)//2
return padding_same
class MobileNetV3(fluid.dygraph.Layer):
def __init__(self, scale=1.0, model_name="small", class_dim=1000, output_stride=None, **kwargs):
super(MobileNetV3, self).__init__()
inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, "relu", 1],
[3, 64, 24, False, "relu", 2],
[3, 72, 24, False, "relu", 1], # output 1 -> out_index=2
[5, 72, 40, True, "relu", 2],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1], # output 2 -> out_index=5
[3, 240, 80, False, "hard_swish", 2],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish", 1], # output 3 -> out_index=11
[5, 672, 160, True, "hard_swish", 2],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish", 1], # output 3 -> out_index=14
]
self.out_indices = [2, 5, 11, 14]
self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, "relu", 2], # output 1 -> out_index=0
[3, 72, 24, False, "relu", 2],
[3, 88, 24, False, "relu", 1], # output 2 -> out_index=3
[5, 96, 40, True, "hard_swish", 2],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1], # output 3 -> out_index=7
[5, 288, 96, True, "hard_swish", 2],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1], # output 4 -> out_index=10
]
self.out_indices = [0, 3, 7, 10]
self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280
else:
raise NotImplementedError(
"mode[{}_model] is not implemented!".format(model_name))
###################################################
# modify stride and dilation based on output_stride
self.dilation_cfg = [1] * len(self.cfg)
self.modify_bottle_params(output_stride=output_stride)
###################################################
self.conv1 = ConvBNLayer(
in_c=3,
out_c=make_divisible(inplanes * scale),
filter_size=3,
stride=2,
padding=1,
num_groups=1,
if_act=True,
act="hard_swish",
name="conv1")
self.block_list = []
inplanes = make_divisible(inplanes * scale)
for i, (k, exp, c, se, nl, s) in enumerate(self.cfg):
######################################
# add dilation rate
dilation_rate = self.dilation_cfg[i]
######################################
self.block_list.append(
ResidualUnit(
in_c=inplanes,
mid_c=make_divisible(scale * exp),
out_c=make_divisible(scale * c),
filter_size=k,
stride=s,
dilation=dilation_rate,
use_se=se,
act=nl,
name="conv" + str(i + 2)))
self.add_sublayer(
sublayer=self.block_list[-1], name="conv" + str(i + 2))
inplanes = make_divisible(scale * c)
self.last_second_conv = ConvBNLayer(
in_c=inplanes,
out_c=make_divisible(scale * self.cls_ch_squeeze),
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
act="hard_swish",
name="conv_last")
self.pool = Pool2D(
pool_type="avg", global_pooling=True, use_cudnn=False)
self.last_conv = Conv2D(
num_channels=make_divisible(scale * self.cls_ch_squeeze),
num_filters=self.cls_ch_expand,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(name="last_1x1_conv_weights"),
bias_attr=False)
self.out = Linear(
input_dim=self.cls_ch_expand,
output_dim=class_dim,
param_attr=ParamAttr("fc_weights"),
bias_attr=ParamAttr(name="fc_offset"))
def modify_bottle_params(self, output_stride=None):
if output_stride is not None and output_stride % 2 != 0:
raise Exception("output stride must to be even number")
if output_stride is not None:
stride = 2
rate = 1
for i, _cfg in enumerate(self.cfg):
stride = stride * _cfg[-1]
if stride > output_stride:
rate = rate * _cfg[-1]
self.cfg[i][-1] = 1
self.dilation_cfg[i] = rate
def forward(self, inputs, label=None, dropout_prob=0.2):
x = self.conv1(inputs)
# A feature list saves each downsampling feature.
feat_list = []
for i, block in enumerate(self.block_list):
x = block(x)
if i in self.out_indices:
feat_list.append(x)
#print("block {}:".format(i),x.shape, self.dilation_cfg[i])
x = self.last_second_conv(x)
x = self.pool(x)
x = self.last_conv(x)
x = fluid.layers.hard_swish(x)
x = fluid.layers.dropout(x=x, dropout_prob=dropout_prob)
x = fluid.layers.reshape(x, shape=[x.shape[0], x.shape[1]])
x = self.out(x)
return x, feat_list
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
in_c,
out_c,
filter_size,
stride,
padding,
dilation=1,
num_groups=1,
if_act=True,
act=None,
use_cudnn=True,
name=""):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = fluid.dygraph.Conv2D(
num_channels=in_c,
num_filters=out_c,
filter_size=filter_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=num_groups,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
use_cudnn=use_cudnn,
act=None)
self.bn = BatchNorm(
num_features=out_c,
weight_attr=ParamAttr(
name=name + "_bn_scale",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
bias_attr=ParamAttr(
name=name + "_bn_offset",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)))
self._act_op = layer_utils.Activation(act=None)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
if self.act == "relu":
x = fluid.layers.relu(x)
elif self.act == "hard_swish":
x = fluid.layers.hard_swish(x)
else:
print("The activation function is selected incorrectly.")
exit()
return x
class ResidualUnit(fluid.dygraph.Layer):
def __init__(self,
in_c,
mid_c,
out_c,
filter_size,
stride,
use_se,
dilation=1,
act=None,
name=''):
super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_c == out_c
self.if_se = use_se
self.expand_conv = ConvBNLayer(
in_c=in_c,
out_c=mid_c,
filter_size=1,
stride=1,
padding=0,
if_act=True,
act=act,
name=name + "_expand")
self.bottleneck_conv = ConvBNLayer(
in_c=mid_c,
out_c=mid_c,
filter_size=filter_size,
stride=stride,
padding= get_padding_same(filter_size, dilation), #int((filter_size - 1) // 2) + (dilation - 1),
dilation=dilation,
num_groups=mid_c,
if_act=True,
act=act,
name=name + "_depthwise")
if self.if_se:
self.mid_se = SEModule(mid_c, name=name + "_se")
self.linear_conv = ConvBNLayer(
in_c=mid_c,
out_c=out_c,
filter_size=1,
stride=1,
padding=0,
if_act=False,
act=None,
name=name + "_linear")
self.dilation = dilation
def forward(self, inputs):
x = self.expand_conv(inputs)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = fluid.layers.elementwise_add(inputs, x)
return x
class SEModule(fluid.dygraph.Layer):
def __init__(self, channel, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = fluid.dygraph.Pool2D(
pool_type="avg", global_pooling=True, use_cudnn=False)
self.conv1 = fluid.dygraph.Conv2D(
num_channels=channel,
num_filters=channel // reduction,
filter_size=1,
stride=1,
padding=0,
act="relu",
param_attr=ParamAttr(name=name + "_1_weights"),
bias_attr=ParamAttr(name=name + "_1_offset"))
self.conv2 = fluid.dygraph.Conv2D(
num_channels=channel // reduction,
num_filters=channel,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(name + "_2_weights"),
bias_attr=ParamAttr(name=name + "_2_offset"))
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = self.conv2(outputs)
outputs = fluid.layers.hard_sigmoid(outputs)
return fluid.layers.elementwise_mul(x=inputs, y=outputs, axis=0)
def MobileNetV3_small_x0_35(**kwargs):
model = MobileNetV3(model_name="small", scale=0.35, **kwargs)
return model
def MobileNetV3_small_x0_5(**kwargs):
model = MobileNetV3(model_name="small", scale=0.5, **kwargs)
return model
def MobileNetV3_small_x0_75(**kwargs):
model = MobileNetV3(model_name="small", scale=0.75, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_small_x1_0(**kwargs):
model = MobileNetV3(model_name="small", scale=1.0, **kwargs)
return model
def MobileNetV3_small_x1_25(**kwargs):
model = MobileNetV3(model_name="small", scale=1.25, **kwargs)
return model
def MobileNetV3_large_x0_35(**kwargs):
model = MobileNetV3(model_name="large", scale=0.35, **kwargs)
return model
def MobileNetV3_large_x0_5(**kwargs):
model = MobileNetV3(model_name="large", scale=0.5, **kwargs)
return model
def MobileNetV3_large_x0_75(**kwargs):
model = MobileNetV3(model_name="large", scale=0.75, **kwargs)
return model
@manager.BACKBONES.add_component
def MobileNetV3_large_x1_0(**kwargs):
model = MobileNetV3(model_name="large", scale=1.0, **kwargs)
return model
def MobileNetV3_large_x1_25(**kwargs):
model = MobileNetV3(model_name="large", scale=1.25, **kwargs)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.utils import utils
from dygraph.models.architectures import layer_utils
from dygraph.cvlibs import manager
__all__ = [
"ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
]
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(
self,
num_channels,
num_filters,
filter_size,
stride=1,
dilation=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = Pool2D(
pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg', ceil_mode=True)
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2 if dilation ==1 else 0,
dilation=dilation,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
weight_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'))
self._act_op = layer_utils.Activation(act=act)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
y = self._act_op(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
dilation=1,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
self.dilation = dilation
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
dilation=dilation,
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=1,
is_vd_mode=False if if_first or stride==1 else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
####################################################################
# If given dilation rate > 1, using corresponding padding
if self.dilation > 1:
padding = self.dilation
y = fluid.layers.pad(y, [0,0,0,0,padding,padding,padding,padding])
#####################################################################
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class BasicBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv1)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class ResNet_vd(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=1000, output_stride=None, multi_grid=(1, 2, 4), **kwargs):
super(ResNet_vd, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
dilation_dict=None
if output_stride == 8:
dilation_dict = {2: 2, 3: 4}
elif output_stride == 16:
dilation_dict = {3: 2}
self.conv1_1 = ConvBNLayer(
num_channels=3,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
num_channels=32,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
num_channels=32,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
# self.block_list = []
self.stage_list = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
block_list=[]
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
###############################################################################
# Add dilation rate for some segmentation tasks, if dilation_dict is not None.
dilation_rate = dilation_dict[block] if dilation_dict and block in dilation_dict else 1
# Actually block here is 'stage', and i is 'block' in 'stage'
# At the stage 4, expand the the dilation_rate using multi_grid, default (1, 2, 4)
if block == 3:
dilation_rate = dilation_rate * multi_grid[i]
#print("stage {}, block {}: dilation rate".format(block, i), dilation_rate)
###############################################################################
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 and dilation_rate == 1 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name,
dilation=dilation_rate))
block_list.append(bottleneck_block)
shortcut = True
self.stage_list.append(block_list)
else:
for block in range(len(depth)):
shortcut = False
block_list=[]
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
block_list.append(basic_block)
shortcut = True
self.stage_list.append(block_list)
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_channels = num_channels[-1] * 2
stdv = 1.0 / math.sqrt(self.pool2d_avg_channels * 1.0)
self.out = Linear(
self.pool2d_avg_channels,
class_dim,
param_attr=ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name="fc_0.w_0"),
bias_attr=ParamAttr(name="fc_0.b_0"))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
# A feature list saves the output feature map of each stage.
feat_list = []
for i, stage in enumerate(self.stage_list):
for j, block in enumerate(stage):
y = block(y)
#print("stage {} block {}".format(i+1, j+1), y.shape)
feat_list.append(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_channels])
y = self.out(y)
return y, feat_list
# def init_weight(self, pretrained_model=None):
# if pretrained_model is not None:
# if os.path.exists(pretrained_model):
# utils.load_pretrained_model(self, pretrained_model)
def ResNet18_vd(**args):
model = ResNet_vd(layers=18, **args)
return model
def ResNet34_vd(**args):
model = ResNet_vd(layers=34, **args)
return model
@manager.BACKBONES.add_component
def ResNet50_vd(**args):
model = ResNet_vd(layers=50, **args)
return model
@manager.BACKBONES.add_component
def ResNet101_vd(**args):
model = ResNet_vd(layers=101, **args)
return model
def ResNet152_vd(**args):
model = ResNet_vd(layers=152, **args)
return model
def ResNet200_vd(**args):
model = ResNet_vd(layers=200, **args)
return model
\ No newline at end of file
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.models.architectures import layer_utils
from dygraph.cvlibs import manager
__all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
def check_data(data, number):
if type(data) == int:
return [data] * number
assert len(data) == number
return data
def check_stride(s, os):
if s <= os:
return True
else:
return False
def check_points(count, points):
if points is None:
return False
else:
if isinstance(points, list):
return (True if count in points else False)
else:
return (True if count == points else False)
def gen_bottleneck_params(backbone='xception_65'):
if backbone == 'xception_65':
bottleneck_params = {
"entry_flow": (3, [2, 2, 2], [128, 256, 728]),
"middle_flow": (16, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
elif backbone == 'xception_41':
bottleneck_params = {
"entry_flow": (3, [2, 2, 2], [128, 256, 728]),
"middle_flow": (8, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
elif backbone == 'xception_71':
bottleneck_params = {
"entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]),
"middle_flow": (16, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
else:
raise Exception(
"xception backbont only support xception_41/xception_65/xception_71"
)
return bottleneck_params
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
input_channels,
output_channels,
filter_size,
stride=1,
padding=0,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=input_channels,
num_filters=output_channels,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(name=name + "/weights"),
bias_attr=False)
self._bn = BatchNorm(
num_features=output_channels,
epsilon=1e-3,
momentum=0.99,
weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
bias_attr=ParamAttr(name=name + "/BatchNorm/beta"))
self._act_op = layer_utils.Activation(act=act)
def forward(self, inputs):
return self._act_op(self._bn(self._conv(inputs)))
class Seperate_Conv(fluid.dygraph.Layer):
def __init__(self,
input_channels,
output_channels,
stride,
filter,
dilation=1,
act=None,
name=None):
super(Seperate_Conv, self).__init__()
self._conv1 = Conv2D(
num_channels=input_channels,
num_filters=input_channels,
filter_size=filter,
stride=stride,
groups=input_channels,
padding=(filter) // 2 * dilation,
dilation=dilation,
param_attr=ParamAttr(name=name + "/depthwise/weights"),
bias_attr=False)
self._bn1 = BatchNorm(
input_channels,
epsilon=1e-3,
momentum=0.99,
weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"))
self._act_op1 = layer_utils.Activation(act=act)
self._conv2 = Conv2D(
input_channels,
output_channels,
1,
stride=1,
groups=1,
padding=0,
param_attr=ParamAttr(name=name + "/pointwise/weights"),
bias_attr=False)
self._bn2 = BatchNorm(
output_channels,
epsilon=1e-3,
momentum=0.99,
weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"))
self._act_op2 = layer_utils.Activation(act=act)
def forward(self, inputs):
x = self._conv1(inputs)
x = self._bn1(x)
x = self._act_op1(x)
x = self._conv2(x)
x = self._bn2(x)
x = self._act_op2(x)
return x
class Xception_Block(fluid.dygraph.Layer):
def __init__(self,
input_channels,
output_channels,
strides=1,
filter_size=3,
dilation=1,
skip_conv=True,
has_skip=True,
activation_fn_in_separable_conv=False,
name=None):
super(Xception_Block, self).__init__()
repeat_number = 3
output_channels = check_data(output_channels, repeat_number)
filter_size = check_data(filter_size, repeat_number)
strides = check_data(strides, repeat_number)
self.has_skip = has_skip
self.skip_conv = skip_conv
self.activation_fn_in_separable_conv = activation_fn_in_separable_conv
if not activation_fn_in_separable_conv:
self._conv1 = Seperate_Conv(
input_channels,
output_channels[0],
stride=strides[0],
filter=filter_size[0],
dilation=dilation,
name=name + "/separable_conv1")
self._conv2 = Seperate_Conv(
output_channels[0],
output_channels[1],
stride=strides[1],
filter=filter_size[1],
dilation=dilation,
name=name + "/separable_conv2")
self._conv3 = Seperate_Conv(
output_channels[1],
output_channels[2],
stride=strides[2],
filter=filter_size[2],
dilation=dilation,
name=name + "/separable_conv3")
else:
self._conv1 = Seperate_Conv(
input_channels,
output_channels[0],
stride=strides[0],
filter=filter_size[0],
act="relu",
dilation=dilation,
name=name + "/separable_conv1")
self._conv2 = Seperate_Conv(
output_channels[0],
output_channels[1],
stride=strides[1],
filter=filter_size[1],
act="relu",
dilation=dilation,
name=name + "/separable_conv2")
self._conv3 = Seperate_Conv(
output_channels[1],
output_channels[2],
stride=strides[2],
filter=filter_size[2],
act="relu",
dilation=dilation,
name=name + "/separable_conv3")
if has_skip and skip_conv:
self._short = ConvBNLayer(
input_channels,
output_channels[-1],
1,
stride=strides[-1],
padding=0,
name=name + "/shortcut")
def forward(self, inputs):
layer_helper = LayerHelper(self.full_name(), act='relu')
if not self.activation_fn_in_separable_conv:
x = layer_helper.append_activation(inputs)
x = self._conv1(x)
x = layer_helper.append_activation(x)
x = self._conv2(x)
x = layer_helper.append_activation(x)
x = self._conv3(x)
else:
x = self._conv1(inputs)
x = self._conv2(x)
x = self._conv3(x)
if self.has_skip is False:
return x
if self.skip_conv:
skip = self._short(inputs)
else:
skip = inputs
return fluid.layers.elementwise_add(x, skip)
class XceptionDeeplab(fluid.dygraph.Layer):
#def __init__(self, backbone, class_dim=1000):
# add output_stride
def __init__(self, backbone, output_stride=16, class_dim=1000, **kwargs):
super(XceptionDeeplab, self).__init__()
bottleneck_params = gen_bottleneck_params(backbone)
self.backbone = backbone
self._conv1 = ConvBNLayer(
3,
32,
3,
stride=2,
padding=1,
act="relu",
name=self.backbone + "/entry_flow/conv1")
self._conv2 = ConvBNLayer(
32,
64,
3,
stride=1,
padding=1,
act="relu",
name=self.backbone + "/entry_flow/conv2")
"""
bottleneck_params = {
"entry_flow": (3, [2, 2, 2], [128, 256, 728]),
"middle_flow": (16, 1, 728),
"exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, 2048]])
}
if output_stride == 16:
entry_block3_stride = 2
middle_block_dilation = 1
exit_block_dilations = (1, 2)
elif output_stride == 8:
entry_block3_stride = 1
middle_block_dilation = 2
exit_block_dilations = (2, 4)
"""
self.block_num = bottleneck_params["entry_flow"][0]
self.strides = bottleneck_params["entry_flow"][1]
self.chns = bottleneck_params["entry_flow"][2]
self.strides = check_data(self.strides, self.block_num)
self.chns = check_data(self.chns, self.block_num)
self.entry_flow = []
self.middle_flow = []
self.stride = 2
self.output_stride = output_stride
s = self.stride
for i in range(self.block_num):
stride = self.strides[i] if check_stride(s * self.strides[i],
self.output_stride) else 1
xception_block = self.add_sublayer(
self.backbone + "/entry_flow/block" + str(i + 1),
Xception_Block(
input_channels=64 if i == 0 else self.chns[i - 1],
output_channels=self.chns[i],
strides=[1, 1, self.stride],
name=self.backbone + "/entry_flow/block" + str(i + 1)))
self.entry_flow.append(xception_block)
s = s * stride
self.stride = s
self.block_num = bottleneck_params["middle_flow"][0]
self.strides = bottleneck_params["middle_flow"][1]
self.chns = bottleneck_params["middle_flow"][2]
self.strides = check_data(self.strides, self.block_num)
self.chns = check_data(self.chns, self.block_num)
s = self.stride
for i in range(self.block_num):
stride = self.strides[i] if check_stride(s * self.strides[i],
self.output_stride) else 1
xception_block = self.add_sublayer(
self.backbone + "/middle_flow/block" + str(i + 1),
Xception_Block(
input_channels=728,
output_channels=728,
strides=[1, 1, self.strides[i]],
skip_conv=False,
name=self.backbone + "/middle_flow/block" + str(i + 1)))
self.middle_flow.append(xception_block)
s = s * stride
self.stride = s
self.block_num = bottleneck_params["exit_flow"][0]
self.strides = bottleneck_params["exit_flow"][1]
self.chns = bottleneck_params["exit_flow"][2]
self.strides = check_data(self.strides, self.block_num)
self.chns = check_data(self.chns, self.block_num)
s = self.stride
stride = self.strides[0] if check_stride(s * self.strides[0],
self.output_stride) else 1
self._exit_flow_1 = Xception_Block(
728,
self.chns[0], [1, 1, stride],
name=self.backbone + "/exit_flow/block1")
s = s * stride
stride = self.strides[1] if check_stride(s * self.strides[1],
self.output_stride) else 1
self._exit_flow_2 = Xception_Block(
self.chns[0][-1],
self.chns[1], [1, 1, stride],
dilation=2,
has_skip=False,
activation_fn_in_separable_conv=True,
name=self.backbone + "/exit_flow/block2")
s = s * stride
self.stride = s
self._drop = Dropout(p=0.5)
self._pool = Pool2D(pool_type="avg", global_pooling=True)
self._fc = Linear(
self.chns[1][-1],
class_dim,
param_attr=ParamAttr(name="fc_weights"),
bias_attr=ParamAttr(name="fc_bias"))
def forward(self, inputs):
x = self._conv1(inputs)
x = self._conv2(x)
feat_list = []
for i, ef in enumerate(self.entry_flow):
x = ef(x)
if i == 0:
feat_list.append(x)
for mf in self.middle_flow:
x = mf(x)
x = self._exit_flow_1(x)
x = self._exit_flow_2(x)
feat_list.append(x)
x = self._drop(x)
x = self._pool(x)
x = fluid.layers.squeeze(x, axes=[2, 3])
x = self._fc(x)
return x, feat_list
def Xception41_deeplab(**args):
model = XceptionDeeplab('xception_41', **args)
return model
@manager.BACKBONES.add_component
def Xception65_deeplab(**args):
model = XceptionDeeplab("xception_65", **args)
return model
def Xception71_deeplab(**args):
model = XceptionDeeplab("xception_71", **args)
return model
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import logging from . import logger
from . import download from . import download
from .metrics import ConfusionMatrix from .metrics import ConfusionMatrix
from .utils import * from .utils import *
from .timer import Timer, calculate_eta from .timer import Timer, calculate_eta
from .get_environ_info import get_environ_info
...@@ -85,8 +85,8 @@ def _uncompress_file(filepath, extrapath, delete_file, print_progress): ...@@ -85,8 +85,8 @@ def _uncompress_file(filepath, extrapath, delete_file, print_progress):
for total_num, index, rootpath in handler(filepath, extrapath): for total_num, index, rootpath in handler(filepath, extrapath):
if print_progress: if print_progress:
done = int(50 * float(index) / total_num) done = int(50 * float(index) / total_num)
progress("[%-50s] %.2f%%" % progress(
('=' * done, float(100 * index) / total_num)) "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num))
if print_progress: if print_progress:
progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)
...@@ -132,4 +132,4 @@ def download_file_and_uncompress(url, ...@@ -132,4 +132,4 @@ def download_file_and_uncompress(url,
print_progress) print_progress)
savename = os.path.join(extrapath, savename) savename = os.path.join(extrapath, savename)
shutil.move(savename, extraname) shutil.move(savename, extraname)
return savename return extraname
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -26,7 +26,7 @@ from loss import multi_dice_loss ...@@ -26,7 +26,7 @@ from loss import multi_dice_loss
from loss import multi_bce_loss from loss import multi_bce_loss
from lovasz_losses import lovasz_hinge from lovasz_losses import lovasz_hinge
from lovasz_losses import lovasz_softmax from lovasz_losses import lovasz_softmax
from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn, ocrnet
class ModelPhase(object): class ModelPhase(object):
...@@ -85,6 +85,8 @@ def seg_model(image, class_num): ...@@ -85,6 +85,8 @@ def seg_model(image, class_num):
logits = hrnet.hrnet(image, class_num) logits = hrnet.hrnet(image, class_num)
elif model_name == 'fast_scnn': elif model_name == 'fast_scnn':
logits = fast_scnn.fast_scnn(image, class_num) logits = fast_scnn.fast_scnn(image, class_num)
elif model_name == 'ocrnet':
logits = ocrnet.ocrnet(image, class_num)
else: else:
raise Exception( raise Exception(
"unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet, fast_scnn" "unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet, fast_scnn"
......
...@@ -352,6 +352,8 @@ def resnet_vd(input): ...@@ -352,6 +352,8 @@ def resnet_vd(input):
else: else:
raise Exception("deeplab only support stride 8 or 16") raise Exception("deeplab only support stride 8 or 16")
lr_mult_list = cfg.MODEL.DEEPLAB.BACKBONE_LR_MULT_LIST lr_mult_list = cfg.MODEL.DEEPLAB.BACKBONE_LR_MULT_LIST
if lr_mult_list is None:
lr_mult_list = [1.0, 1.0, 1.0, 1.0, 1.0]
model = resnet_vd_backbone( model = resnet_vd_backbone(
layers, stem='deeplab', lr_mult_list=lr_mult_list) layers, stem='deeplab', lr_mult_list=lr_mult_list)
data, decode_shortcuts = model.net( data, decode_shortcuts = model.net(
......
此差异已折叠。
此差异已折叠。
...@@ -248,7 +248,10 @@ cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160] ...@@ -248,7 +248,10 @@ cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160]
# HRNET STAGE4 设置 # HRNET STAGE4 设置
cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3 cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3
cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320] cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320]
########################## OCNET模型配置 ######################################
cfg.MODEL.OCR.OCR_MID_CHANNELS = 512
cfg.MODEL.OCR.OCR_KEY_CHANNELS = 256
########################## 预测部署模型配置 ################################### ########################## 预测部署模型配置 ###################################
# 预测保存的模型名称 # 预测保存的模型名称
cfg.FREEZE.MODEL_FILENAME = '__model__' cfg.FREEZE.MODEL_FILENAME = '__model__'
......
此差异已折叠。
...@@ -2,4 +2,4 @@ pre-commit ...@@ -2,4 +2,4 @@ pre-commit
yapf == 0.26.0 yapf == 0.26.0
flake8 flake8
pyyaml >= 5.1 pyyaml >= 5.1
visualdl == 2.0.0b4 visualdl >= 2.0.0
...@@ -145,8 +145,11 @@ PaddleSeg在AI Studio平台上提供了在线体验的DeepLabv3+图像分割教 ...@@ -145,8 +145,11 @@ PaddleSeg在AI Studio平台上提供了在线体验的DeepLabv3+图像分割教
|mobilenetv2-0-25_bn_imagenet|MobileNetV2|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25 <br> MODEL.DEFAULT_NORM_TYPE: bn| |mobilenetv2-0-25_bn_imagenet|MobileNetV2|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 0.25 <br> MODEL.DEFAULT_NORM_TYPE: bn|
|xception41_imagenet|Xception41|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_41 <br> MODEL.DEFAULT_NORM_TYPE: bn| |xception41_imagenet|Xception41|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_41 <br> MODEL.DEFAULT_NORM_TYPE: bn|
|xception65_imagenet|Xception65|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn| |xception65_imagenet|Xception65|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn|
|resnet50_vd_imagenet|ResNet50_vd|bn|ImageNet|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: resnet50_vd <br> MODEL.DEFAULT_NORM_TYPE: bn|
|deeplabv3p_mobilenetv2-1-0_bn_coco|MobileNetV2|bn|COCO|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn| |deeplabv3p_mobilenetv2-1-0_bn_coco|MobileNetV2|bn|COCO|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn|
|**deeplabv3p_xception65_bn_coco**|Xception65|bn|COCO|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn | |**deeplabv3p_xception65_bn_coco**|Xception65|bn|COCO|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn |
|deeplabv3p_mobilenetv2-1-0_bn_cityscapes|MobileNetV2|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn| |deeplabv3p_mobilenetv2-1-0_bn_cityscapes|MobileNetV2|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv2 <br> MODEL.DEEPLAB.DEPTH_MULTIPLIER: 1.0 <br> MODEL.DEEPLAB.ENCODER_WITH_ASPP: False <br> MODEL.DEEPLAB.ENABLE_DECODER: False <br> MODEL.DEFAULT_NORM_TYPE: bn|
|deeplabv3p_mobilenetv3_large_cityscapes|MobileNetV3|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: mobilenetv3_large <br> MODEL.DEFAULT_NORM_TYPE: bn|
|deeplabv3p_xception65_gn_cityscapes|Xception65|gn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: gn| |deeplabv3p_xception65_gn_cityscapes|Xception65|gn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: gn|
|deeplabv3p_xception65_bn_cityscapes|Xception65|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn| |deeplabv3p_xception65_bn_cityscapes|Xception65|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: xception_65 <br> MODEL.DEFAULT_NORM_TYPE: bn|
|deeplabv3p_resnet50_vd_cityscapes|resnet50_vd|bn|Cityscapes|MODEL.MODEL_NAME: deeplabv3p <br> MODEL.DEEPLAB.BACKBONE: resnet50_vd <br> MODEL.DEFAULT_NORM_TYPE: bn|
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册