提交 c4148bae 编写于 作者: L LielinJiang

polish code

上级 bd72b8b6
...@@ -88,8 +88,6 @@ AUG: ...@@ -88,8 +88,6 @@ AUG:
RICH_CROP: RICH_CROP:
ENABLE: False ENABLE: False
BATCH_SIZE: 4 BATCH_SIZE: 4
MEAN: [0.5, 0.5, 0.5]
STD: [0.5, 0.5, 0.5]
TEST: TEST:
TEST_MODEL: "./saved_model/lanenet/final/" TEST_MODEL: "./saved_model/lanenet/final/"
TRAIN: TRAIN:
...@@ -123,10 +121,10 @@ CUDA_VISIBLE_DEVICES=0 python -u eval.py --use_gpu --cfg configs/lanenet.yaml ...@@ -123,10 +121,10 @@ CUDA_VISIBLE_DEVICES=0 python -u eval.py --use_gpu --cfg configs/lanenet.yaml
``` ```
## 七. 可视化 ## 七. 可视化
我们提供了一个训练好的模型,点击[链接](https://paddleseg.bj.bcebos.com/models/lanenet_vgg_tusimple.tar),下载后放在```./pretrained_models/```下,使用如下命令进行可视化 需要先下载一个车前视角和鸟瞰图视角转换所需文件,点击[链接](https://paddleseg.bj.bcebos.com/resources/tusimple_ipm_remap.tar),下载后放在```./utils```下。同时我们提供了一个训练好的模型,点击[链接](https://paddleseg.bj.bcebos.com/models/lanenet_vgg_tusimple.tar),下载后放在```./pretrained_models/```下,使用如下命令进行可视化
```shell ```shell
CUDA_VISIBLE_DEVICES=0 python -u ./vis.py --cfg configs/lanenet.yaml --use_gpu --vis_dir vis_result \ CUDA_VISIBLE_DEVICES=0 python -u ./vis.py --cfg configs/lanenet.yaml --use_gpu --vis_dir vis_result \
TEST.TEST_MODEL pretrained_models/LaneNet_vgg_tusimple/ \ TEST.TEST_MODEL pretrained_models/LaneNet_vgg_tusimple/
``` ```
可视化结果示例: 可视化结果示例:
......
...@@ -14,8 +14,6 @@ AUG: ...@@ -14,8 +14,6 @@ AUG:
ENABLE: False ENABLE: False
BATCH_SIZE: 4 BATCH_SIZE: 4
MEAN: [0.5, 0.5, 0.5]
STD: [0.5, 0.5, 0.5]
DATALOADER: DATALOADER:
BUF_SIZE: 256 BUF_SIZE: 256
...@@ -51,5 +49,4 @@ SOLVER: ...@@ -51,5 +49,4 @@ SOLVER:
LR_POLICY: "poly" LR_POLICY: "poly"
OPTIMIZER: "sgd" OPTIMIZER: "sgd"
WEIGHT_DECAY: 0.001 WEIGHT_DECAY: 0.001
CROSS_ENTROPY_WEIGHT: 'lanenet'
...@@ -18,7 +18,8 @@ import cv2 ...@@ -18,7 +18,8 @@ import cv2
import numpy as np import numpy as np
from utils.config import cfg from utils.config import cfg
from models.model_builder import ModelPhase from models.model_builder import ModelPhase
from pdseg.data_aug import get_random_scale, randomly_scale_image_and_label, random_rotation, \
rand_scale_aspect, hsv_color_jitter, rand_crop
def resize(img, grt=None, grt_instance=None, mode=ModelPhase.TRAIN): def resize(img, grt=None, grt_instance=None, mode=ModelPhase.TRAIN):
""" """
...@@ -80,358 +81,3 @@ def resize(img, grt=None, grt_instance=None, mode=ModelPhase.TRAIN): ...@@ -80,358 +81,3 @@ def resize(img, grt=None, grt_instance=None, mode=ModelPhase.TRAIN):
cfg.AUG.AUG_METHOD)) cfg.AUG.AUG_METHOD))
return img, grt, grt_instance return img, grt, grt_instance
def get_random_scale(min_scale_factor, max_scale_factor, step_size):
"""
在一定范围内得到随机值,范围为min_scale_factor到max_scale_factor,间隔为step_size
Args:
min_scale_factor(float): 随机尺度下限,大于0
max_scale_factor(float): 随机尺度上限,不小于下限值
step_size(float): 尺度间隔,非负, 等于为0时直接返回min_scale_factor到max_scale_factor范围内任一值
Returns:
随机尺度值
"""
if min_scale_factor < 0 or min_scale_factor > max_scale_factor:
raise ValueError('Unexpected value of min_scale_factor.')
if min_scale_factor == max_scale_factor:
return min_scale_factor
if step_size == 0:
return np.random.uniform(min_scale_factor, max_scale_factor)
num_steps = int((max_scale_factor - min_scale_factor) / step_size + 1)
scale_factors = np.linspace(min_scale_factor, max_scale_factor,
num_steps).tolist()
np.random.shuffle(scale_factors)
return scale_factors[0]
def randomly_scale_image_and_label(image, label=None, scale=1.0):
"""
按比例resize图像和标签图, 如果scale为1,返回原图
Args:
image(numpy.ndarray): 输入图像
label(numpy.ndarray): 标签图,默认None
sclae(float): 图片resize的比例,非负,默认1.0
Returns:
resize后的图像和标签图
"""
if scale == 1.0:
return image, label
height = image.shape[0]
width = image.shape[1]
new_height = int(height * scale + 0.5)
new_width = int(width * scale + 0.5)
new_image = cv2.resize(
image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
if label is not None:
height = label.shape[0]
width = label.shape[1]
new_height = int(height * scale + 0.5)
new_width = int(width * scale + 0.5)
new_label = cv2.resize(
label, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
return new_image, new_label
def random_rotation(crop_img, crop_seg, rich_crop_max_rotation, mean_value):
"""
随机旋转图像和标签图
Args:
crop_img(numpy.ndarray): 输入图像
crop_seg(numpy.ndarray): 标签图
rich_crop_max_rotation(int):旋转最大角度,0-90
mean_value(list):均值, 对图片旋转产生的多余区域使用均值填充
Returns:
旋转后的图像和标签图
"""
ignore_index = cfg.DATASET.IGNORE_INDEX
if rich_crop_max_rotation > 0:
(h, w) = crop_img.shape[:2]
do_rotation = np.random.uniform(-rich_crop_max_rotation,
rich_crop_max_rotation)
pc = (w // 2, h // 2)
r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0)
cos = np.abs(r[0, 0])
sin = np.abs(r[0, 1])
nw = int((h * sin) + (w * cos))
nh = int((h * cos) + (w * sin))
(cx, cy) = pc
r[0, 2] += (nw / 2) - cx
r[1, 2] += (nh / 2) - cy
dsize = (nw, nh)
crop_img = cv2.warpAffine(
crop_img,
r,
dsize=dsize,
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=mean_value)
crop_seg = cv2.warpAffine(
crop_seg,
r,
dsize=dsize,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(ignore_index, ignore_index, ignore_index))
return crop_img, crop_seg
def rand_scale_aspect(crop_img,
crop_seg,
rich_crop_min_scale=0,
rich_crop_aspect_ratio=0):
"""
从输入图像和标签图像中裁取随机宽高比的图像,并reszie回原始尺寸
Args:
crop_img(numpy.ndarray): 输入图像
crop_seg(numpy.ndarray): 标签图像
rich_crop_min_scale(float):裁取图像占原始图像的面积比,0-1,默认0返回原图
rich_crop_aspect_ratio(float): 裁取图像的宽高比范围,非负,默认0返回原图
Returns:
裁剪并resize回原始尺寸的图像和标签图像
"""
if rich_crop_min_scale == 0 or rich_crop_aspect_ratio == 0:
return crop_img, crop_seg
else:
img_height = crop_img.shape[0]
img_width = crop_img.shape[1]
for i in range(0, 10):
area = img_height * img_width
target_area = area * np.random.uniform(rich_crop_min_scale, 1.0)
aspectRatio = np.random.uniform(rich_crop_aspect_ratio,
1.0 / rich_crop_aspect_ratio)
dw = int(np.sqrt(target_area * 1.0 * aspectRatio))
dh = int(np.sqrt(target_area * 1.0 / aspectRatio))
if (np.random.randint(10) < 5):
tmp = dw
dw = dh
dh = tmp
if (dh < img_height and dw < img_width):
h1 = np.random.randint(0, img_height - dh)
w1 = np.random.randint(0, img_width - dw)
crop_img = crop_img[h1:(h1 + dh), w1:(w1 + dw), :]
crop_seg = crop_seg[h1:(h1 + dh), w1:(w1 + dw)]
crop_img = cv2.resize(
crop_img, (img_width, img_height),
interpolation=cv2.INTER_LINEAR)
crop_seg = cv2.resize(
crop_seg, (img_width, img_height),
interpolation=cv2.INTER_NEAREST)
break
return crop_img, crop_seg
def saturation_jitter(cv_img, jitter_range):
"""
调节图像饱和度
Args:
cv_img(numpy.ndarray): 输入图像
jitter_range(float): 调节程度,0-1
Returns:
饱和度调整后的图像
"""
greyMat = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
greyMat = greyMat[:, :, None] * np.ones(3, dtype=int)[None, None, :]
cv_img = cv_img.astype(np.float32)
cv_img = cv_img * (1 - jitter_range) + jitter_range * greyMat
cv_img = np.where(cv_img > 255, 255, cv_img)
cv_img = cv_img.astype(np.uint8)
return cv_img
def brightness_jitter(cv_img, jitter_range):
"""
调节图像亮度
Args:
cv_img(numpy.ndarray): 输入图像
jitter_range(float): 调节程度,0-1
Returns:
亮度调整后的图像
"""
cv_img = cv_img.astype(np.float32)
cv_img = cv_img * (1.0 - jitter_range)
cv_img = np.where(cv_img > 255, 255, cv_img)
cv_img = cv_img.astype(np.uint8)
return cv_img
def contrast_jitter(cv_img, jitter_range):
"""
调节图像对比度
Args:
cv_img(numpy.ndarray): 输入图像
jitter_range(float): 调节程度,0-1
Returns:
对比度调整后的图像
"""
greyMat = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
mean = np.mean(greyMat)
cv_img = cv_img.astype(np.float32)
cv_img = cv_img * (1 - jitter_range) + jitter_range * mean
cv_img = np.where(cv_img > 255, 255, cv_img)
cv_img = cv_img.astype(np.uint8)
return cv_img
def random_jitter(cv_img, saturation_range, brightness_range, contrast_range):
"""
图像亮度、饱和度、对比度调节,在调整范围内随机获得调节比例,并随机顺序叠加三种效果
Args:
cv_img(numpy.ndarray): 输入图像
saturation_range(float): 饱和对调节范围,0-1
brightness_range(float): 亮度调节范围,0-1
contrast_range(float): 对比度调节范围,0-1
Returns:
亮度、饱和度、对比度调整后图像
"""
saturation_ratio = np.random.uniform(-saturation_range, saturation_range)
brightness_ratio = np.random.uniform(-brightness_range, brightness_range)
contrast_ratio = np.random.uniform(-contrast_range, contrast_range)
order = [1, 2, 3]
np.random.shuffle(order)
for i in range(3):
if order[i] == 0:
cv_img = saturation_jitter(cv_img, saturation_ratio)
if order[i] == 1:
cv_img = brightness_jitter(cv_img, brightness_ratio)
if order[i] == 2:
cv_img = contrast_jitter(cv_img, contrast_ratio)
return cv_img
def hsv_color_jitter(crop_img,
brightness_jitter_ratio=0,
saturation_jitter_ratio=0,
contrast_jitter_ratio=0):
"""
图像亮度、饱和度、对比度调节
Args:
crop_img(numpy.ndarray): 输入图像
brightness_jitter_ratio(float): 亮度调节度最大值,1-0,默认0
saturation_jitter_ratio(float): 饱和度调节度最大值,1-0,默认0
contrast_jitter_ratio(float): 对比度调节度最大值,1-0,默认0
Returns:
亮度、饱和度、对比度调节后图像
"""
if brightness_jitter_ratio > 0 or \
saturation_jitter_ratio > 0 or \
contrast_jitter_ratio > 0:
crop_img = random_jitter(crop_img, saturation_jitter_ratio,
brightness_jitter_ratio, contrast_jitter_ratio)
return crop_img
def rand_crop(crop_img, crop_seg, mode=ModelPhase.TRAIN):
"""
随机裁剪图片和标签图, 若crop尺寸大于原始尺寸,分别使用均值和ignore值填充再进行crop,
crop尺寸与原始尺寸一致,返回原图,crop尺寸小于原始尺寸直接crop
Args:
crop_img(numpy.ndarray): 输入图像
crop_seg(numpy.ndarray): 标签图
mode(string): 模式, 默认训练模式,验证或预测、可视化模式时crop尺寸需大于原始图片尺寸
Returns:
裁剪后的图片和标签图
"""
img_height = crop_img.shape[0]
img_width = crop_img.shape[1]
if ModelPhase.is_train(mode):
crop_width = cfg.TRAIN_CROP_SIZE[0]
crop_height = cfg.TRAIN_CROP_SIZE[1]
else:
crop_width = cfg.EVAL_CROP_SIZE[0]
crop_height = cfg.EVAL_CROP_SIZE[1]
if not ModelPhase.is_train(mode):
if (crop_height < img_height or crop_width < img_width):
raise Exception(
"Crop size({},{}) must large than img size({},{}) when in EvalPhase."
.format(crop_width, crop_height, img_width, img_height))
if img_height == crop_height and img_width == crop_width:
return crop_img, crop_seg
else:
pad_height = max(crop_height - img_height, 0)
pad_width = max(crop_width - img_width, 0)
if (pad_height > 0 or pad_width > 0):
crop_img = cv2.copyMakeBorder(
crop_img,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=cfg.DATASET.PADDING_VALUE)
if crop_seg is not None:
crop_seg = cv2.copyMakeBorder(
crop_seg,
0,
pad_height,
0,
pad_width,
cv2.BORDER_CONSTANT,
value=cfg.DATASET.IGNORE_INDEX)
img_height = crop_img.shape[0]
img_width = crop_img.shape[1]
if crop_height > 0 and crop_width > 0:
h_off = np.random.randint(img_height - crop_height + 1)
w_off = np.random.randint(img_width - crop_width + 1)
crop_img = crop_img[h_off:(crop_height + h_off), w_off:(
w_off + crop_width), :]
if crop_seg is not None:
crop_seg = crop_seg[h_off:(crop_height + h_off), w_off:(
w_off + crop_width)]
return crop_img, crop_seg
...@@ -25,6 +25,9 @@ import sys ...@@ -25,6 +25,9 @@ import sys
cur_path = os.path.abspath(os.path.dirname(__file__)) cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(cur_path)[0])[0] root_path = os.path.split(os.path.split(cur_path)[0])[0]
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
SEG_PATH = os.path.join(LOCAL_PATH, "../../../")
sys.path.append(SEG_PATH)
sys.path.append(root_path) sys.path.append(root_path)
import time import time
...@@ -37,7 +40,7 @@ import paddle ...@@ -37,7 +40,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from utils.config import cfg from utils.config import cfg
from utils.timer import Timer, calculate_eta from pdseg.utils.timer import Timer, calculate_eta
from models.model_builder import build_model from models.model_builder import build_model
from models.model_builder import ModelPhase from models.model_builder import ModelPhase
from reader import LaneNetDataset from reader import LaneNetDataset
......
...@@ -17,71 +17,6 @@ import paddle.fluid as fluid ...@@ -17,71 +17,6 @@ import paddle.fluid as fluid
import numpy as np import numpy as np
from utils.config import cfg from utils.config import cfg
def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2, weight=None):
ignore_mask = fluid.layers.cast(ignore_mask, 'float32')
label = fluid.layers.elementwise_min(
label, fluid.layers.assign(np.array([num_classes - 1], dtype=np.int32)))
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
logit = fluid.layers.reshape(logit, [-1, num_classes])
label = fluid.layers.reshape(label, [-1, 1])
label = fluid.layers.cast(label, 'int64')
ignore_mask = fluid.layers.reshape(ignore_mask, [-1, 1])
if weight is None:
loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=cfg.DATASET.IGNORE_INDEX,
return_softmax=True)
else:
label_one_hot = fluid.layers.one_hot(input=label, depth=num_classes)
if isinstance(weight, list):
assert len(weight) == num_classes, "weight length must equal num of classes"
weight = fluid.layers.assign(np.array([weight], dtype='float32'))
elif isinstance(weight, fluid.layers.Variable):
pass
else:
tmp = []
total_num = fluid.layers.cast(fluid.layers.shape(label)[0], 'float32')
for i in range(num_classes):
cls_pixel_num = fluid.layers.reduce_sum(label_one_hot[:, i])
ratio = total_num / (cls_pixel_num + 1)
tmp.append(ratio)
weight = fluid.layers.concat(tmp)
weight = weight / fluid.layers.reduce_sum(weight) * num_classes
weight = fluid.layers.reshape(weight, [1, num_classes])
weighted_label_one_hot = fluid.layers.elementwise_mul(label_one_hot, weight)
probs = fluid.layers.softmax(logit)
loss = fluid.layers.cross_entropy(
probs,
weighted_label_one_hot,
soft_label=True,
ignore_index=cfg.DATASET.IGNORE_INDEX)
weighted_label_one_hot.stop_gradient = True
loss = loss * ignore_mask
avg_loss = fluid.layers.mean(loss) / fluid.layers.mean(ignore_mask)
label.stop_gradient = True
ignore_mask.stop_gradient = True
return avg_loss
def multi_softmax_with_loss(logits, label, ignore_mask=None, num_classes=2, weight=None):
if isinstance(logits, tuple):
avg_loss = 0
for i, logit in enumerate(logits):
logit_label = fluid.layers.resize_nearest(label, logit.shape[2:])
logit_mask = (logit_label.astype('int32') !=
cfg.DATASET.IGNORE_INDEX).astype('int32')
loss = softmax_with_loss(logit, logit_label, logit_mask,
num_classes)
avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss
else:
avg_loss = softmax_with_loss(logits, label, ignore_mask, num_classes, weight)
return avg_loss
def unsorted_segment_sum(data, segment_ids, unique_labels, feature_dims): def unsorted_segment_sum(data, segment_ids, unique_labels, feature_dims):
zeros = fluid.layers.fill_constant_batch_size_like(unique_labels, shape=[1, feature_dims], zeros = fluid.layers.fill_constant_batch_size_like(unique_labels, shape=[1, feature_dims],
......
...@@ -14,5 +14,4 @@ ...@@ -14,5 +14,4 @@
# limitations under the License. # limitations under the License.
import models.modeling import models.modeling
import models.libs #import models.backbone
import models.backbone
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from utils.config import cfg
import contextlib
bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0)
name_scope = ""
@contextlib.contextmanager
def scope(name):
global name_scope
bk = name_scope
name_scope = name_scope + name + '/'
yield
name_scope = bk
def max_pool(input, kernel, stride, padding):
data = fluid.layers.pool2d(
input,
pool_size=kernel,
pool_type='max',
pool_stride=stride,
pool_padding=padding)
return data
def avg_pool(input, kernel, stride, padding=0):
data = fluid.layers.pool2d(
input,
pool_size=kernel,
pool_type='avg',
pool_stride=stride,
pool_padding=padding)
return data
def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
N, C, H, W = input.shape
if C % G != 0:
# print "group can not divide channle:", C, G
for d in range(10):
for t in [d, -d]:
if G + t <= 0: continue
if C % (G + t) == 0:
G = G + t
break
if C % G == 0:
# print "use group size:", G
break
assert C % G == 0
x = fluid.layers.group_norm(
input,
groups=G,
param_attr=param_attr,
bias_attr=bias_attr,
name=name_scope + 'group_norm')
return x
def bn(*args, **kargs):
if cfg.MODEL.DEFAULT_NORM_TYPE == 'bn':
with scope('BatchNorm'):
return fluid.layers.batch_norm(
*args,
epsilon=cfg.MODEL.DEFAULT_EPSILON,
momentum=cfg.MODEL.BN_MOMENTUM,
param_attr=fluid.ParamAttr(
name=name_scope + 'gamma', regularizer=bn_regularizer),
bias_attr=fluid.ParamAttr(
name=name_scope + 'beta', regularizer=bn_regularizer),
moving_mean_name=name_scope + 'moving_mean',
moving_variance_name=name_scope + 'moving_variance',
**kargs)
elif cfg.MODEL.DEFAULT_NORM_TYPE == 'gn':
with scope('GroupNorm'):
return group_norm(
args[0],
cfg.MODEL.DEFAULT_GROUP_NUMBER,
eps=cfg.MODEL.DEFAULT_EPSILON,
param_attr=fluid.ParamAttr(
name=name_scope + 'gamma', regularizer=bn_regularizer),
bias_attr=fluid.ParamAttr(
name=name_scope + 'beta', regularizer=bn_regularizer))
else:
raise Exception("Unsupport norm type:" + cfg.MODEL.DEFAULT_NORM_TYPE)
def bn_relu(data):
return fluid.layers.relu(bn(data))
def relu(data):
return fluid.layers.relu(data)
def conv(*args, **kargs):
kargs['param_attr'] = name_scope + 'weights'
if 'bias_attr' in kargs and kargs['bias_attr']:
kargs['bias_attr'] = fluid.ParamAttr(
name=name_scope + 'biases',
regularizer=None,
initializer=fluid.initializer.ConstantInitializer(value=0.0))
else:
kargs['bias_attr'] = False
return fluid.layers.conv2d(*args, **kargs)
def deconv(*args, **kargs):
kargs['param_attr'] = name_scope + 'weights'
if 'bias_attr' in kargs and kargs['bias_attr']:
kargs['bias_attr'] = name_scope + 'biases'
else:
kargs['bias_attr'] = False
return fluid.layers.conv2d_transpose(*args, **kargs)
def separate_conv(input, channel, stride, filter, dilation=1, act=None):
param_attr = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33))
with scope('depthwise'):
input = conv(
input,
input.shape[1],
filter,
stride,
groups=input.shape[1],
padding=(filter // 2) * dilation,
dilation=dilation,
use_cudnn=False,
param_attr=param_attr)
input = bn(input)
if act: input = act(input)
param_attr = fluid.ParamAttr(
name=name_scope + 'weights',
regularizer=None,
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
with scope('pointwise'):
input = conv(
input, channel, 1, 1, groups=1, padding=0, param_attr=param_attr)
input = bn(input)
if act: input = act(input)
return input
...@@ -15,15 +15,15 @@ ...@@ -15,15 +15,15 @@
import sys import sys
sys.path.append("..") sys.path.append("..")
import struct import struct
import importlib
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType from paddle.fluid.proto.framework_pb2 import VarType
import solver from pdseg import solver
from utils.config import cfg from utils.config import cfg
from loss import multi_softmax_with_loss from pdseg.loss import multi_softmax_with_loss
from loss import discriminative_loss from loss import discriminative_loss
from models.modeling import lanenet
class ModelPhase(object): class ModelPhase(object):
""" """
...@@ -67,36 +67,15 @@ class ModelPhase(object): ...@@ -67,36 +67,15 @@ class ModelPhase(object):
return False return False
def map_model_name(model_name): def seg_model(image, class_num):
name_dict = { model_name = cfg.MODEL.MODEL_NAME
"lanenet": "lanenet.lanenet" if model_name == 'lanenet':
} logits = lanenet.lanenet(image, class_num)
if model_name in name_dict.keys():
return name_dict[model_name]
else: else:
raise Exception( raise Exception(
"unknow model name, only support unet, deeplabv3p, icnet") "unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet"
)
return logits
def get_func(func_name):
"""Helper to return a function object by name. func_name must identify a
function in this module or the path to a function relative to the base
'modeling' module.
"""
if func_name == '':
return None
try:
parts = func_name.split('.')
# Refers to a function in this module
if len(parts) == 1:
return globals()[parts[0]]
# Otherwise, assume we're referencing a module under modeling
module_name = 'models.' + '.'.join(parts[:-1])
module = importlib.import_module(module_name)
return getattr(module, parts[-1])
except Exception:
print('Failed to find function: {}'.format(func_name))
return module
def softmax(logit): def softmax(logit):
...@@ -105,6 +84,7 @@ def softmax(logit): ...@@ -105,6 +84,7 @@ def softmax(logit):
logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) logit = fluid.layers.transpose(logit, [0, 3, 1, 2])
return logit return logit
def sigmoid_to_softmax(logit): def sigmoid_to_softmax(logit):
""" """
one channel to two channel one channel to two channel
...@@ -151,14 +131,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): ...@@ -151,14 +131,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
iterable=False, iterable=False,
use_double_buffer=True) use_double_buffer=True)
model_name = map_model_name(cfg.MODEL.MODEL_NAME)
model_func = get_func("modeling." + model_name)
loss_type = cfg.SOLVER.LOSS loss_type = cfg.SOLVER.LOSS
if not isinstance(loss_type, list): if not isinstance(loss_type, list):
loss_type = list(loss_type) loss_type = list(loss_type)
logits = model_func(image, class_num) logits = seg_model(image, class_num)
if ModelPhase.is_train(phase): if ModelPhase.is_train(phase):
loss_valid = False loss_valid = False
...@@ -170,11 +148,9 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): ...@@ -170,11 +148,9 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
image_shape[1:], 0.5, 3.0, 1.0, 1.0, 0.001) image_shape[1:], 0.5, 3.0, 1.0, 1.0, 0.001)
if "softmax_loss" in loss_type: if "softmax_loss" in loss_type:
if isinstance(cfg.SOLVER.CROSS_ENTROPY_WEIGHT, str) and \ weight = None
cfg.SOLVER.CROSS_ENTROPY_WEIGHT == 'lanenet': if cfg.MODEL.MODEL_NAME == 'lanenet':
weight = get_dynamic_weight(label) weight = get_dynamic_weight(label)
else:
weight = cfg.SOLVER.CROSS_ENTROPY_WEIGHT
seg_loss = multi_softmax_with_loss(logits, label, mask, class_num, weight) seg_loss = multi_softmax_with_loss(logits, label, mask, class_num, weight)
loss_valid = True loss_valid = True
valid_loss.append("softmax_loss") valid_loss.append("softmax_loss")
......
...@@ -20,10 +20,11 @@ import paddle.fluid as fluid ...@@ -20,10 +20,11 @@ import paddle.fluid as fluid
from utils.config import cfg from utils.config import cfg
from models.libs.model_libs import scope, name_scope from pdseg.models.libs.model_libs import scope, name_scope
from models.libs.model_libs import bn, bn_relu, relu from pdseg.models.libs.model_libs import bn, bn_relu, relu
from models.libs.model_libs import conv, max_pool, deconv from pdseg.models.libs.model_libs import conv, max_pool, deconv
from models.backbone.vgg import VGGNet as vgg_backbone from pdseg.models.backbone.vgg import VGGNet as vgg_backbone
#from models.backbone.vgg import VGGNet as vgg_backbone
# Bottleneck type # Bottleneck type
REGULAR = 1 REGULAR = 1
...@@ -412,7 +413,13 @@ def decoder(input, num_classes): ...@@ -412,7 +413,13 @@ def decoder(input, num_classes):
def encoder(input): def encoder(input):
if 'vgg' in cfg.MODEL.LANENET.BACKBONE: if 'vgg' in cfg.MODEL.LANENET.BACKBONE:
model = vgg_backbone(layers=16) model = vgg_backbone(layers=16)
output = model.net(input) #output = model.net(input)
_, encode_feature_dict = model.net(input, end_points=13, decode_points=[7, 10, 13])
output = {}
output['pool3'] = encode_feature_dict[7]
output['pool4'] = encode_feature_dict[10]
output['pool5'] = encode_feature_dict[13]
elif 'enet' in cfg.MODEL.LANET.BACKBONE: elif 'enet' in cfg.MODEL.LANET.BACKBONE:
with scope('LaneNetBase'): with scope('LaneNetBase'):
initial = iniatial_block(input) initial = iniatial_block(input)
......
...@@ -22,301 +22,19 @@ import codecs ...@@ -22,301 +22,19 @@ import codecs
import numpy as np import numpy as np
import cv2 import cv2
import data_aug as aug
from utils.config import cfg from utils.config import cfg
import data_aug as aug
from pdseg.data_utils import GeneratorEnqueuer from pdseg.data_utils import GeneratorEnqueuer
from models.model_builder import ModelPhase from models.model_builder import ModelPhase
import copy import copy
def cv2_imread(file_path, flag=cv2.IMREAD_COLOR): def cv2_imread(file_path, flag=cv2.IMREAD_COLOR):
# resolve cv2.imread open Chinese file path issues on Windows Platform. # resolve cv2.imread open Chinese file path issues on Windows Platform.
return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag) return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag)
class SegDataset(object): class LaneNetDataset():
def __init__(self,
file_list,
data_dir,
shuffle=False,
mode=ModelPhase.TRAIN):
self.mode = mode
self.shuffle = shuffle
self.data_dir = data_dir
self.shuffle_seed = 0
# NOTE: Please ensure file list was save in UTF-8 coding format
with codecs.open(file_list, 'r', 'utf-8') as flist:
self.lines = [line.strip() for line in flist]
self.all_lines = copy.deepcopy(self.lines)
if shuffle and cfg.NUM_TRAINERS > 1:
np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines)
elif shuffle:
np.random.shuffle(self.lines)
def generator(self):
if self.shuffle and cfg.NUM_TRAINERS > 1:
np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines)
num_lines = len(self.all_lines) // cfg.NUM_TRAINERS
self.lines = self.all_lines[num_lines * cfg.TRAINER_ID: num_lines * (cfg.TRAINER_ID + 1)]
self.shuffle_seed += 1
elif self.shuffle:
np.random.shuffle(self.lines)
for line in self.lines:
yield self.process_image(line, self.data_dir, self.mode)
def sharding_generator(self, pid=0, num_processes=1):
"""
Use line id as shard key for multiprocess io
It's a normal generator if pid=0, num_processes=1
"""
for index, line in enumerate(self.lines):
# Use index and pid to shard file list
if index % num_processes == pid:
yield self.process_image(line, self.data_dir, self.mode)
def batch_reader(self, batch_size):
br = self.batch(self.reader, batch_size)
for batch in br:
yield batch[0], batch[1], batch[2]
def multiprocess_generator(self, max_queue_size=32, num_processes=8):
# Re-shuffle file list
if self.shuffle and cfg.NUM_TRAINERS > 1:
np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines)
num_lines = len(self.all_lines) // self.num_trainers
self.lines = self.all_lines[num_lines * self.trainer_id: num_lines * (self.trainer_id + 1)]
self.shuffle_seed += 1
elif self.shuffle:
np.random.shuffle(self.lines)
# Create multiple sharding generators according to num_processes for multiple processes
generators = []
for pid in range(num_processes):
generators.append(self.sharding_generator(pid, num_processes))
try:
enqueuer = GeneratorEnqueuer(generators)
enqueuer.start(max_queue_size=max_queue_size, workers=num_processes)
while True:
generator_out = None
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_out = enqueuer.queue.get(timeout=5)
break
else:
time.sleep(0.01)
if generator_out is None:
break
yield generator_out
finally:
if enqueuer is not None:
enqueuer.stop()
def batch(self, reader, batch_size, is_test=False, drop_last=False):
def batch_reader(is_test=False, drop_last=drop_last):
if is_test:
imgs, grts, img_names, valid_shapes, org_shapes = [], [], [], [], []
for img, grt, img_name, valid_shape, org_shape in reader():
imgs.append(img)
grts.append(grt)
img_names.append(img_name)
valid_shapes.append(valid_shape)
org_shapes.append(org_shape)
if len(imgs) == batch_size:
yield np.array(imgs), np.array(
grts), img_names, np.array(valid_shapes), np.array(
org_shapes)
imgs, grts, img_names, valid_shapes, org_shapes = [], [], [], [], []
if not drop_last and len(imgs) > 0:
yield np.array(imgs), np.array(grts), img_names, np.array(
valid_shapes), np.array(org_shapes)
else:
imgs, labs, ignore = [], [], []
bs = 0
for img, lab, ig in reader():
imgs.append(img)
labs.append(lab)
ignore.append(ig)
bs += 1
if bs == batch_size:
yield np.array(imgs), np.array(labs), np.array(ignore)
bs = 0
imgs, labs, ignore = [], [], []
if not drop_last and bs > 0:
yield np.array(imgs), np.array(labs), np.array(ignore)
return batch_reader(is_test, drop_last)
def load_image(self, line, src_dir, mode=ModelPhase.TRAIN):
# original image cv2.imread flag setting
cv2_imread_flag = cv2.IMREAD_COLOR
if cfg.DATASET.IMAGE_TYPE == "rgba":
# If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to
# reserver alpha channel
cv2_imread_flag = cv2.IMREAD_UNCHANGED
parts = line.strip().split(cfg.DATASET.SEPARATOR)
if len(parts) != 2:
if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL:
raise Exception("File list format incorrect! It should be"
" image_name{}label_name\\n".format(
cfg.DATASET.SEPARATOR))
img_name, grt_name = parts[0], None
else:
img_name, grt_name = parts[0], parts[1]
img_path = os.path.join(src_dir, img_name)
img = cv2_imread(img_path, cv2_imread_flag)
if grt_name is not None:
grt_path = os.path.join(src_dir, grt_name)
grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE)
else:
grt = None
if img is None:
raise Exception(
"Empty image, src_dir: {}, img: {} & lab: {}".format(
src_dir, img_path, grt_path))
img_height = img.shape[0]
img_width = img.shape[1]
if grt is not None:
grt_height = grt.shape[0]
grt_width = grt.shape[1]
if img_height != grt_height or img_width != grt_width:
raise Exception(
"source img and label img must has the same size")
else:
if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL:
raise Exception(
"Empty image, src_dir: {}, img: {} & lab: {}".format(
src_dir, img_path, grt_path))
if len(img.shape) < 3:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img_channels = img.shape[2]
if img_channels < 3:
raise Exception("PaddleSeg only supports gray, rgb or rgba image")
if img_channels != cfg.DATASET.DATA_DIM:
raise Exception(
"Input image channel({}) is not match cfg.DATASET.DATA_DIM({}), img_name={}"
.format(img_channels, cfg.DATASET.DATADIM, img_name))
if img_channels != len(cfg.MEAN):
raise Exception(
"img name {}, img chns {} mean size {}, size unequal".format(
img_name, img_channels, len(cfg.MEAN)))
if img_channels != len(cfg.STD):
raise Exception(
"img name {}, img chns {} std size {}, size unequal".format(
img_name, img_channels, len(cfg.STD)))
return img, grt, img_name, grt_name
def normalize_image(self, img):
""" 像素归一化后减均值除方差 """
img = img.transpose((2, 0, 1)).astype('float32') / 255.0
img_mean = np.array(cfg.MEAN).reshape((len(cfg.MEAN), 1, 1))
img_std = np.array(cfg.STD).reshape((len(cfg.STD), 1, 1))
img -= img_mean
img /= img_std
return img
def process_image(self, line, data_dir, mode):
""" process_image """
img, grt, grt_instance, img_name, grt_name = self.load_image(
line, data_dir, mode=mode)
if mode == ModelPhase.TRAIN:
img, grt = aug.resize(img, grt, mode)
if cfg.AUG.RICH_CROP.ENABLE:
if cfg.AUG.RICH_CROP.BLUR:
if cfg.AUG.RICH_CROP.BLUR_RATIO <= 0:
n = 0
elif cfg.AUG.RICH_CROP.BLUR_RATIO >= 1:
n = 1
else:
n = int(1.0 / cfg.AUG.RICH_CROP.BLUR_RATIO)
if n > 0:
if np.random.randint(0, n) == 0:
radius = np.random.randint(3, 10)
if radius % 2 != 1:
radius = radius + 1
if radius > 9:
radius = 9
img = cv2.GaussianBlur(img, (radius, radius), 0, 0)
img, grt = aug.random_rotation(
img,
grt,
rich_crop_max_rotation=cfg.AUG.RICH_CROP.MAX_ROTATION,
mean_value=cfg.DATASET.PADDING_VALUE)
img, grt = aug.rand_scale_aspect(
img,
grt,
rich_crop_min_scale=cfg.AUG.RICH_CROP.MIN_AREA_RATIO,
rich_crop_aspect_ratio=cfg.AUG.RICH_CROP.ASPECT_RATIO)
img = aug.hsv_color_jitter(
img,
brightness_jitter_ratio=cfg.AUG.RICH_CROP.
BRIGHTNESS_JITTER_RATIO,
saturation_jitter_ratio=cfg.AUG.RICH_CROP.
SATURATION_JITTER_RATIO,
contrast_jitter_ratio=cfg.AUG.RICH_CROP.
CONTRAST_JITTER_RATIO)
if cfg.AUG.FLIP:
if cfg.AUG.FLIP_RATIO <= 0:
n = 0
elif cfg.AUG.FLIP_RATIO >= 1:
n = 1
else:
n = int(1.0 / cfg.AUG.FLIP_RATIO)
if n > 0:
if np.random.randint(0, n) == 0:
img = img[::-1, :, :]
grt = grt[::-1, :]
if cfg.AUG.MIRROR:
if np.random.randint(0, 2) == 1:
img = img[:, ::-1, :]
grt = grt[:, ::-1]
img, grt = aug.rand_crop(img, grt, mode=mode)
elif ModelPhase.is_eval(mode):
img, grt = aug.resize(img, grt, mode=mode)
img, grt = aug.rand_crop(img, grt, mode=mode)
elif ModelPhase.is_visual(mode):
org_shape = [img.shape[0], img.shape[1]]
img, grt = aug.resize(img, grt, mode=mode)
valid_shape = [img.shape[0], img.shape[1]]
img, grt = aug.rand_crop(img, grt, mode=mode)
else:
raise ValueError("Dataset mode={} Error!".format(mode))
# Normalize image
img = self.normalize_image(img)
if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode):
grt = np.expand_dims(np.array(grt).astype('int32'), axis=0)
ignore = (grt != cfg.DATASET.IGNORE_INDEX).astype('int32')
if ModelPhase.is_train(mode):
return (img, grt, ignore)
elif ModelPhase.is_eval(mode):
return (img, grt, ignore)
elif ModelPhase.is_visual(mode):
return (img, grt, img_name, valid_shape, org_shape)
class LaneNetDataset(SegDataset):
def __init__(self, def __init__(self,
file_list, file_list,
data_dir, data_dir,
......
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.fluid as fluid
import numpy as np
import importlib
from utils.config import cfg
class Solver(object):
def __init__(self, main_prog, start_prog):
total_images = cfg.DATASET.TRAIN_TOTAL_IMAGES
self.weight_decay = cfg.SOLVER.WEIGHT_DECAY
self.momentum = cfg.SOLVER.MOMENTUM
self.momentum2 = cfg.SOLVER.MOMENTUM2
self.step_per_epoch = total_images // cfg.BATCH_SIZE
if total_images % cfg.BATCH_SIZE != 0:
self.step_per_epoch += 1
self.total_step = cfg.SOLVER.NUM_EPOCHS * self.step_per_epoch
self.main_prog = main_prog
self.start_prog = start_prog
def lr_warmup(self, learning_rate, warmup_steps, start_lr, end_lr):
linear_step = end_lr - start_lr
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate_warmup")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (global_step / warmup_steps)
fluid.layers.tensor.assign(decayed_lr, lr)
with switch.default():
fluid.layers.tensor.assign(learning_rate, lr)
return lr
def piecewise_decay(self):
gamma = cfg.SOLVER.GAMMA
bd = [self.step_per_epoch * e for e in cfg.SOLVER.DECAY_EPOCH]
lr = [cfg.SOLVER.LR * (gamma**i) for i in range(len(bd) + 1)]
decayed_lr = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
return decayed_lr
def poly_decay(self):
power = cfg.SOLVER.POWER
decayed_lr = fluid.layers.polynomial_decay(
cfg.SOLVER.LR, self.total_step, end_learning_rate=0, power=power)
return decayed_lr
def cosine_decay(self):
decayed_lr = fluid.layers.cosine_decay(
cfg.SOLVER.LR, self.step_per_epoch, cfg.SOLVER.NUM_EPOCHS)
return decayed_lr
def get_lr(self, lr_policy):
if lr_policy.lower() == 'poly':
decayed_lr = self.poly_decay()
elif lr_policy.lower() == 'piecewise':
decayed_lr = self.piecewise_decay()
elif lr_policy.lower() == 'cosine':
decayed_lr = self.cosine_decay()
else:
raise Exception(
"unsupport learning decay policy! only support poly,piecewise,cosine"
)
if cfg.SOLVER.LR_WARMUP:
start_lr = 0
end_lr = cfg.SOLVER.LR
warmup_steps = cfg.SOLVER.LR_WARMUP_STEPS
decayed_lr = self.lr_warmup(decayed_lr, warmup_steps, start_lr, end_lr)
return decayed_lr
def sgd_optimizer(self, lr_policy, loss):
decayed_lr = self.get_lr(lr_policy)
optimizer = fluid.optimizer.Momentum(
learning_rate=decayed_lr,
momentum=self.momentum,
regularization=fluid.regularizer.L2Decay(
regularization_coeff=self.weight_decay),
)
optimizer.minimize(loss)
return decayed_lr
def adam_optimizer(self, lr_policy, loss):
decayed_lr = self.get_lr(lr_policy)
optimizer = fluid.optimizer.Adam(
learning_rate=decayed_lr,
beta1=self.momentum,
beta2=self.momentum2,
regularization=fluid.regularizer.L2Decay(
regularization_coeff=self.weight_decay),
)
optimizer.minimize(loss)
return decayed_lr
def optimise(self, loss):
lr_policy = cfg.SOLVER.LR_POLICY
opt = cfg.SOLVER.OPTIMIZER
if opt.lower() == 'adam':
return self.adam_optimizer(lr_policy, loss)
elif opt.lower() == 'sgd':
return self.sgd_optimizer(lr_policy, loss)
else:
raise Exception(
"unsupport optimizer solver, only support adam and sgd")
...@@ -25,6 +25,8 @@ import sys ...@@ -25,6 +25,8 @@ import sys
cur_path = os.path.abspath(os.path.dirname(__file__)) cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(cur_path)[0])[0] root_path = os.path.split(os.path.split(cur_path)[0])[0]
SEG_PATH = os.path.join(cur_path, "../../../")
sys.path.append(SEG_PATH)
sys.path.append(root_path) sys.path.append(root_path)
import argparse import argparse
...@@ -34,7 +36,7 @@ import numpy as np ...@@ -34,7 +36,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from utils.config import cfg from utils.config import cfg
from utils.timer import Timer, calculate_eta from pdseg.utils.timer import Timer, calculate_eta
from reader import LaneNetDataset from reader import LaneNetDataset
from models.model_builder import build_model from models.model_builder import build_model
from models.model_builder import ModelPhase from models.model_builder import ModelPhase
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A simple attribute dictionary used for representing configuration options."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
import codecs
from ast import literal_eval
import yaml
import six
class SegConfig(dict):
def __init__(self, *args, **kwargs):
super(SegConfig, self).__init__(*args, **kwargs)
self.immutable = False
def __setattr__(self, key, value, create_if_not_exist=True):
if key in ["immutable"]:
self.__dict__[key] = value
return
t = self
keylist = key.split(".")
for k in keylist[:-1]:
t = t.__getattr__(k, create_if_not_exist)
t.__getattr__(keylist[-1], create_if_not_exist)
t[keylist[-1]] = value
def __getattr__(self, key, create_if_not_exist=True):
if key in ["immutable"]:
return self.__dict__[key]
if not key in self:
if not create_if_not_exist:
raise KeyError
self[key] = SegConfig()
return self[key]
def __setitem__(self, key, value):
#
if self.immutable:
raise AttributeError(
'Attempted to set "{}" to "{}", but SegConfig is immutable'.
format(key, value))
#
if isinstance(value, six.string_types):
try:
value = literal_eval(value)
except ValueError:
pass
except SyntaxError:
pass
super(SegConfig, self).__setitem__(key, value)
def update_from_segconfig(self, other):
if isinstance(other, dict):
other = SegConfig(other)
assert isinstance(other, SegConfig)
diclist = [("", other)]
while len(diclist):
prefix, tdic = diclist[0]
diclist = diclist[1:]
for key, value in tdic.items():
key = "{}.{}".format(prefix, key) if prefix else key
if isinstance(value, dict):
diclist.append((key, value))
continue
try:
self.__setattr__(key, value, create_if_not_exist=False)
except KeyError:
raise KeyError('Non-existent config key: {}'.format(key))
def check_and_infer(self):
if self.DATASET.IMAGE_TYPE in ['rgb', 'gray']:
self.DATASET.DATA_DIM = 3
elif self.DATASET.IMAGE_TYPE in ['rgba']:
self.DATASET.DATA_DIM = 4
else:
raise KeyError(
'DATASET.IMAGE_TYPE config error, only support `rgb`, `gray` and `rgba`'
)
if self.MEAN is not None:
self.DATASET.PADDING_VALUE = [x*255.0 for x in self.MEAN]
if not self.TRAIN_CROP_SIZE:
raise ValueError(
'TRAIN_CROP_SIZE is empty! Please set a pair of values in format (width, height)'
)
if not self.EVAL_CROP_SIZE:
raise ValueError(
'EVAL_CROP_SIZE is empty! Please set a pair of values in format (width, height)'
)
# Ensure file list is use UTF-8 encoding
train_sets = codecs.open(self.DATASET.TRAIN_FILE_LIST, 'r', 'utf-8').readlines()
val_sets = codecs.open(self.DATASET.VAL_FILE_LIST, 'r', 'utf-8').readlines()
test_sets = codecs.open(self.DATASET.TEST_FILE_LIST, 'r', 'utf-8').readlines()
self.DATASET.TRAIN_TOTAL_IMAGES = len(train_sets)
self.DATASET.VAL_TOTAL_IMAGES = len(val_sets)
self.DATASET.TEST_TOTAL_IMAGES = len(test_sets)
if self.MODEL.MODEL_NAME == 'icnet' and \
len(self.MODEL.MULTI_LOSS_WEIGHT) != 3:
self.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4, 0.16]
def update_from_list(self, config_list):
if len(config_list) % 2 != 0:
raise ValueError(
"Command line options config format error! Please check it: {}".
format(config_list))
for key, value in zip(config_list[0::2], config_list[1::2]):
try:
self.__setattr__(key, value, create_if_not_exist=False)
except KeyError:
raise KeyError('Non-existent config key: {}'.format(key))
def update_from_file(self, config_file):
with codecs.open(config_file, 'r', 'utf-8') as file:
dic = yaml.load(file, Loader=yaml.FullLoader)
self.update_from_segconfig(dic)
def set_immutable(self, immutable):
self.immutable = immutable
for value in self.values():
if isinstance(value, SegConfig):
value.set_immutable(immutable)
def is_immutable(self):
return self.immutable
...@@ -15,7 +15,17 @@ ...@@ -15,7 +15,17 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
from utils.collect import SegConfig
import os
import sys
# LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
# PDSEG_PATH = os.path.join(LOCAL_PATH, "../../../", "pdseg")
# print(PDSEG_PATH)
# sys.path.insert(0, PDSEG_PATH)
# print(sys.path)
from pdseg.utils.collect import SegConfig
import numpy as np import numpy as np
cfg = SegConfig() cfg = SegConfig()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
def calculate_eta(remaining_step, speed):
if remaining_step < 0:
remaining_step = 0
remaining_time = int(remaining_step / speed)
result = "{:0>2}:{:0>2}:{:0>2}"
arr = []
for i in range(2, -1, -1):
arr.append(int(remaining_time / 60**i))
remaining_time %= 60**i
return result.format(*arr)
class Timer(object):
""" Simple timer class for measuring time consuming """
def __init__(self):
self._start_time = 0.0
self._end_time = 0.0
self._elapsed_time = 0.0
self._is_running = False
def start(self):
self._is_running = True
self._start_time = time.time()
def restart(self):
self.start()
def stop(self):
self._is_running = False
self._end_time = time.time()
def elapsed_time(self):
self._end_time = time.time()
self._elapsed_time = self._end_time - self._start_time
if not self.is_running:
return 0.0
return self._elapsed_time
@property
def is_running(self):
return self._is_running
此差异已折叠。
...@@ -25,7 +25,10 @@ import sys ...@@ -25,7 +25,10 @@ import sys
cur_path = os.path.abspath(os.path.dirname(__file__)) cur_path = os.path.abspath(os.path.dirname(__file__))
root_path = os.path.split(os.path.split(cur_path)[0])[0] root_path = os.path.split(os.path.split(cur_path)[0])[0]
SEG_PATH = os.path.join(cur_path, "../../../")
sys.path.append(SEG_PATH)
sys.path.append(root_path) sys.path.append(root_path)
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
import time import time
...@@ -84,44 +87,6 @@ def makedirs(directory): ...@@ -84,44 +87,6 @@ def makedirs(directory):
os.makedirs(directory) os.makedirs(directory)
def get_color_map(num_classes):
""" Returns the color map for visualizing the segmentation mask,
which can support arbitrary number of classes.
Args:
num_classes: Number of classes
Returns:
The color map
"""
#color_map = num_classes * 3 * [0]
color_map = num_classes * [[0, 0, 0]]
for i in range(0, num_classes):
j = 0
color_map[i] = [0, 0, 0]
lab = i
while lab:
color_map[i][0] |= (((lab >> 0) & 1) << (7 - j))
color_map[i][1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i][2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
return color_map
def colorize(image, shape, color_map):
"""
Convert segment result to color image.
"""
color_map = np.array(color_map).astype("uint8")
# Use OpenCV LUT for color mapping
c1 = cv2.LUT(image, color_map[:, 0])
c2 = cv2.LUT(image, color_map[:, 1])
c3 = cv2.LUT(image, color_map[:, 2])
color_res = np.dstack((c1, c2, c3))
return color_res
def to_png_fn(fn, name=""): def to_png_fn(fn, name=""):
""" """
Append png as filename postfix Append png as filename postfix
...@@ -224,7 +189,6 @@ def visualize(cfg, ...@@ -224,7 +189,6 @@ def visualize(cfg,
plt.imshow(binary_seg_image * 255, cmap='gray') plt.imshow(binary_seg_image * 255, cmap='gray')
plt.show() plt.show()
# from collections import
cv2.imwrite(pred_binary_fn, np.array(binary_seg_image * 255).astype(np.uint8)) cv2.imwrite(pred_binary_fn, np.array(binary_seg_image * 255).astype(np.uint8))
cv2.imwrite(pred_lane_fn, postprocess_result['source_image']) cv2.imwrite(pred_lane_fn, postprocess_result['source_image'])
cv2.imwrite(pred_instance_fn, mask_image) cv2.imwrite(pred_instance_fn, mask_image)
......
...@@ -40,9 +40,8 @@ def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2, weight=None ...@@ -40,9 +40,8 @@ def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2, weight=None
if isinstance(weight, list): if isinstance(weight, list):
assert len(weight) == num_classes, "weight length must equal num of classes" assert len(weight) == num_classes, "weight length must equal num of classes"
weight = fluid.layers.assign(np.array([weight], dtype='float32')) weight = fluid.layers.assign(np.array([weight], dtype='float32'))
elif isinstance(weight, fluid.layers.Variable): elif isinstance(weight, str):
pass assert weight.lower() == 'dynamic', 'if weight is string, must be dynamic!'
else:
tmp = [] tmp = []
total_num = fluid.layers.cast(fluid.layers.shape(label)[0], 'float32') total_num = fluid.layers.cast(fluid.layers.shape(label)[0], 'float32')
for i in range(num_classes): for i in range(num_classes):
...@@ -51,6 +50,10 @@ def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2, weight=None ...@@ -51,6 +50,10 @@ def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2, weight=None
tmp.append(ratio) tmp.append(ratio)
weight = fluid.layers.concat(tmp) weight = fluid.layers.concat(tmp)
weight = weight / fluid.layers.reduce_sum(weight) * num_classes weight = weight / fluid.layers.reduce_sum(weight) * num_classes
elif isinstance(weight, fluid.layers.Variable):
pass
else:
raise ValueError('Expect weight is a list, string or Variable, but receive {}'.format(type(weight)))
weight = fluid.layers.reshape(weight, [1, num_classes]) weight = fluid.layers.reshape(weight, [1, num_classes])
weighted_label_one_hot = fluid.layers.elementwise_mul(label_one_hot, weight) weighted_label_one_hot = fluid.layers.elementwise_mul(label_one_hot, weight)
probs = fluid.layers.softmax(logit) probs = fluid.layers.softmax(logit)
......
...@@ -14,5 +14,3 @@ ...@@ -14,5 +14,3 @@
# limitations under the License. # limitations under the License.
import models.modeling import models.modeling
import models.libs
import models.backbone
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -20,14 +21,26 @@ import paddle ...@@ -20,14 +21,26 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import ParamAttr from paddle.fluid import ParamAttr
__all__ = ["VGGNet", "VGG11", "VGG13", "VGG16", "VGG19"] __all__ = ["VGGNet"]
def check_points(count, points):
if points is None:
return False
else:
if isinstance(points, list):
return (True if count in points else False)
else:
return (True if count == points else False)
class VGGNet(): class VGGNet():
def __init__(self, layers=16): def __init__(self, layers=16):
self.layers = layers self.layers = layers
def net(self, input, class_dim=1000): def net(self, input, class_dim=1000, end_points=None, decode_points=None):
short_cuts = dict()
layers_count = 0
layers = self.layers layers = self.layers
vgg_spec = { vgg_spec = {
11: ([1, 1, 2, 2, 2]), 11: ([1, 1, 2, 2, 2]),
...@@ -38,40 +51,18 @@ class VGGNet(): ...@@ -38,40 +51,18 @@ class VGGNet():
assert layers in vgg_spec.keys(), \ assert layers in vgg_spec.keys(), \
"supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers) "supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers)
fetures_dict = {}
nums = vgg_spec[layers] nums = vgg_spec[layers]
conv1 = self.conv_block(input, 64, nums[0], name="conv1_") channels = [64, 128, 256, 512, 512]
conv2 = self.conv_block(conv1, 128, nums[1], name="conv2_") conv = input
conv3 = self.conv_block(conv2, 256, nums[2], name="conv3_") for i in range(len(nums)):
conv4 = self.conv_block(conv3, 512, nums[3], name="conv4_") conv = self.conv_block(conv, channels[i], nums[i], name="conv" + str(i + 1) + "_")
conv5 = self.conv_block(conv4, 512, nums[4], name="conv5_") layers_count += nums[i]
fetures_dict['pool3'] = conv3 if check_points(layers_count, decode_points):
fetures_dict['pool4'] = conv4 short_cuts[layers_count] = conv
fetures_dict['pool5'] = conv5 if check_points(layers_count, end_points):
return fetures_dict return conv, short_cuts
fc_dim = 4096
fc_name = ["fc6", "fc7", "fc8"]
fc1 = fluid.layers.fc(
input=conv5,
size=fc_dim,
act='relu',
param_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset"))
fc1 = fluid.layers.dropout(x=fc1, dropout_prob=0.5)
fc2 = fluid.layers.fc(
input=fc1,
size=fc_dim,
act='relu',
param_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset"))
fc2 = fluid.layers.dropout(x=fc2, dropout_prob=0.5)
out = fluid.layers.fc(
input=fc2,
size=class_dim,
param_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_weights"),
bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset"))
return out return conv
def conv_block(self, input, num_filter, groups, name=None): def conv_block(self, input, num_filter, groups, name=None):
conv = input conv = input
...@@ -88,23 +79,3 @@ class VGGNet(): ...@@ -88,23 +79,3 @@ class VGGNet():
bias_attr=False) bias_attr=False)
return fluid.layers.pool2d( return fluid.layers.pool2d(
input=conv, pool_size=2, pool_type='max', pool_stride=2) input=conv, pool_size=2, pool_type='max', pool_stride=2)
def VGG11():
model = VGGNet(layers=11)
return model
def VGG13():
model = VGGNet(layers=13)
return model
def VGG16():
model = VGGNet(layers=16)
return model
def VGG19():
model = VGGNet(layers=19)
return model
...@@ -158,8 +158,9 @@ cfg.SOLVER.LOSS = ["softmax_loss"] ...@@ -158,8 +158,9 @@ cfg.SOLVER.LOSS = ["softmax_loss"]
cfg.SOLVER.LR_WARMUP = False cfg.SOLVER.LR_WARMUP = False
# warmup的迭代次数 # warmup的迭代次数
cfg.SOLVER.LR_WARMUP_STEPS = 2000 cfg.SOLVER.LR_WARMUP_STEPS = 2000
# cross entropy weight, 如果不为None,会根据每个batch中各个类别的数目,动态调整类别权重。 # cross entropy weight, 默认为None,如果设置为'dynamic',会根据每个batch中各个类别的数目,
# 也可以设置一个静态权重,比如有3类,每个类别权重可以设置为[0.1, 2.0, 0.9] # 动态调整类别权重。
# 也可以设置一个静态权重(list的方式),比如有3类,每个类别权重可以设置为[0.1, 2.0, 0.9]
cfg.SOLVER.CROSS_ENTROPY_WEIGHT = None cfg.SOLVER.CROSS_ENTROPY_WEIGHT = None
########################## 测试配置 ########################################### ########################## 测试配置 ###########################################
# 测试模型路径 # 测试模型路径
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册