未验证 提交 5b6f4c01 编写于 作者: B Bubbliiiing 提交者: GitHub

Add files via upload

上级 1efeefe3
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import os
import random
random.seed(0)
xmlfilepath=r'./VOCdevkit/VOC2007/Annotations'
saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/"
#----------------------------------------------------------------------#
# 想要增加测试集修改trainval_percent
# train_percent不需要修改
#----------------------------------------------------------------------#
trainval_percent=1
train_percent=1
......
......@@ -3,18 +3,21 @@
# 具体视频教程可查看
# https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
from yolo import YOLO
from PIL import Image
from keras.layers import Input
from keras.applications.imagenet_utils import preprocess_input
from keras import backend as K
from utils.utils import letterbox_image
from nets.yolo4 import yolo_body,yolo_eval
from tqdm import tqdm
import colorsys
import numpy as np
import os
import numpy as np
from keras import backend as K
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input
from PIL import Image
from tqdm import tqdm
from nets.yolo4 import yolo_body, yolo_eval
from utils.utils import letterbox_image
from yolo import YOLO
class mAP_YOLO(YOLO):
#---------------------------------------------------#
# 获得所有的分类
......@@ -25,12 +28,16 @@ class mAP_YOLO(YOLO):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# 计算anchor数量
#---------------------------------------------------#
# 计算先验框的数量和种类的数量
#---------------------------------------------------#
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
try:
self.yolo_model = load_model(model_path, compile=False)
except:
......@@ -58,6 +65,10 @@ class mAP_YOLO(YOLO):
self.input_image_shape = K.placeholder(shape=(2, ))
#---------------------------------------------------------#
# 在yolo_eval函数中,我们会对预测结果进行后处理
# 后处理的内容包括,解码、非极大抑制、门限筛选等
#---------------------------------------------------------#
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
num_classes, self.input_image_shape, max_boxes = self.max_boxes,
score_threshold = self.score, iou_threshold = self.iou)
......@@ -68,21 +79,27 @@ class mAP_YOLO(YOLO):
#---------------------------------------------------#
def detect_image(self, image_id, image):
f = open("./input/detection-results/"+image_id+".txt","w")
# 调整图片使其符合输入要求
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
new_image_size = (self.model_image_size[1],self.model_image_size[0])
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
# 预测结果
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(image_data, 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
K.learning_phase(): 0})
for i, c in enumerate(out_classes):
predicted_class = self.class_names[int(c)]
......
import numpy as np
import xml.etree.ElementTree as ET
import glob
import random
import xml.etree.ElementTree as ET
import numpy as np
def cas_iou(box,cluster):
x = np.minimum(cluster[:,0],box[0])
......@@ -61,6 +63,9 @@ def load_data(path):
tree = ET.parse(xml_file)
height = int(tree.findtext('./size/height'))
width = int(tree.findtext('./size/width'))
if height<=0 or width<=0:
continue
# 对于每一个目标都获得它的宽高
for obj in tree.iter('object'):
xmin = int(float(obj.findtext('bndbox/xmin'))) / width
......@@ -103,4 +108,4 @@ if __name__ == '__main__':
else:
x_y = ", %d,%d" % (data[i][0], data[i][1])
f.write(x_y)
f.close()
\ No newline at end of file
f.close()
from functools import wraps
from keras import backend as K
from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, Layer
from keras.layers import (Add, Concatenate, Conv2D, Layer, MaxPooling2D,
UpSampling2D, ZeroPadding2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
......@@ -21,8 +23,11 @@ class Mish(Layer):
def compute_output_shape(self, input_shape):
return input_shape
#--------------------------------------------------#
# 单次卷积
# 单次卷积DarknetConv2D
# 正则化系数为5e-4
# 如果步长为2则自己设定padding方式。
#--------------------------------------------------#
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
......@@ -32,7 +37,7 @@ def DarknetConv2D(*args, **kwargs):
return Conv2D(*args, **darknet_conv_kwargs)
#---------------------------------------------------#
# 卷积块
# 卷积块 -> 卷积 + 标准化 + 激活函数
# DarknetConv2D + BatchNormalization + Mish
#---------------------------------------------------#
def DarknetConv2D_BN_Mish(*args, **kwargs):
......@@ -43,36 +48,48 @@ def DarknetConv2D_BN_Mish(*args, **kwargs):
BatchNormalization(),
Mish())
#---------------------------------------------------#
#--------------------------------------------------------------------#
# CSPdarknet的结构块
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
#---------------------------------------------------#
# 首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
# 对于整个CSPdarknet的结构块,就是一个大残差块+内部多个小残差块
#--------------------------------------------------------------------#
def resblock_body(x, num_filters, num_blocks, all_narrow=True):
# 进行长和宽的压缩
#----------------------------------------------------------------#
# 利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
#----------------------------------------------------------------#
preconv1 = ZeroPadding2D(((1,0),(1,0)))(x)
preconv1 = DarknetConv2D_BN_Mish(num_filters, (3,3), strides=(2,2))(preconv1)
# 生成一个大的残差边
#--------------------------------------------------------------------#
# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
#--------------------------------------------------------------------#
shortconv = DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (1,1))(preconv1)
# 主干部分的卷积
#----------------------------------------------------------------#
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
#----------------------------------------------------------------#
mainconv = DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (1,1))(preconv1)
# 1x1卷积对通道数进行整合->3x3卷积提取特征,使用残差结构
for i in range(num_blocks):
y = compose(
DarknetConv2D_BN_Mish(num_filters//2, (1,1)),
DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (3,3)))(mainconv)
mainconv = Add()([mainconv,y])
# 1x1卷积后和残差边堆叠
postconv = DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (1,1))(mainconv)
#----------------------------------------------------------------#
# 将大残差边再堆叠回来
#----------------------------------------------------------------#
route = Concatenate()([postconv, shortconv])
# 最后对通道数进行整合
return DarknetConv2D_BN_Mish(num_filters, (1,1))(route)
#---------------------------------------------------#
# darknet53 的主体部分
# CSPdarknet53 的主体部分
# 输入为一张416x416x3的图片
# 输出为三个有效特征层
#---------------------------------------------------#
def darknet_body(x):
x = DarknetConv2D_BN_Mish(32, (3,3))(x)
......
......@@ -12,20 +12,31 @@ def box_ciou(b1, b2):
-------
ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
"""
# 求出预测框左上角右下角
#-----------------------------------------------------------#
# 求出预测框左上角右下角
# b1_mins (batch, feat_w, feat_h, anchor_num, 2)
# b1_maxes (batch, feat_w, feat_h, anchor_num, 2)
#-----------------------------------------------------------#
b1_xy = b1[..., :2]
b1_wh = b1[..., 2:4]
b1_wh_half = b1_wh/2.
b1_mins = b1_xy - b1_wh_half
b1_maxes = b1_xy + b1_wh_half
# 求出真实框左上角右下角
#-----------------------------------------------------------#
# 求出真实框左上角右下角
# b2_mins (batch, feat_w, feat_h, anchor_num, 2)
# b2_maxes (batch, feat_w, feat_h, anchor_num, 2)
#-----------------------------------------------------------#
b2_xy = b2[..., :2]
b2_wh = b2[..., 2:4]
b2_wh_half = b2_wh/2.
b2_mins = b2_xy - b2_wh_half
b2_maxes = b2_xy + b2_wh_half
# 求真实框和预测框所有的iou
#-----------------------------------------------------------#
# 求真实框和预测框所有的iou
# iou (batch, feat_w, feat_h, anchor_num)
#-----------------------------------------------------------#
intersect_mins = K.maximum(b1_mins, b2_mins)
intersect_maxes = K.minimum(b1_maxes, b2_maxes)
intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
......@@ -33,21 +44,27 @@ def box_ciou(b1, b2):
b1_area = b1_wh[..., 0] * b1_wh[..., 1]
b2_area = b2_wh[..., 0] * b2_wh[..., 1]
union_area = b1_area + b2_area - intersect_area
iou = intersect_area / K.maximum(union_area,K.epsilon())
iou = intersect_area / K.maximum(union_area, K.epsilon())
# 计算中心的差距
#-----------------------------------------------------------#
# 计算中心的差距
# center_distance (batch, feat_w, feat_h, anchor_num)
#-----------------------------------------------------------#
center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1)
# 找到包裹两个框的最小框的左上角和右下角
enclose_mins = K.minimum(b1_mins, b2_mins)
enclose_maxes = K.maximum(b1_maxes, b2_maxes)
enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0)
# 计算对角线距离
#-----------------------------------------------------------#
# 计算对角线距离
# enclose_diagonal (batch, feat_w, feat_h, anchor_num)
#-----------------------------------------------------------#
enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1)
ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal ,K.epsilon())
v = 4*K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1],K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi)
v = 4 * K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1], K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi)
alpha = v / K.maximum((1.0 - iou + v), K.epsilon())
ciou = ciou - alpha * v
ciou = K.expand_dims(ciou, -1)
ciou = tf.where(tf.is_nan(ciou), tf.zeros_like(ciou), ciou)
return ciou
import numpy as np
import tensorflow as tf
from keras import backend as K
from nets.ious import box_ciou
#---------------------------------------------------#
......@@ -10,17 +11,22 @@ def _smooth_labels(y_true, label_smoothing):
num_classes = tf.cast(K.shape(y_true)[-1], dtype=K.floatx())
label_smoothing = K.constant(label_smoothing, dtype=K.floatx())
return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
#---------------------------------------------------#
# 将预测值的每个特征层调成真实值
#---------------------------------------------------#
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
num_anchors = len(anchors)
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
# 获得x,y的网格
# (13, 13, 1, 2)
grid_shape = K.shape(feats)[1:3] # height, width
#---------------------------------------------------#
# 获得x,y的网格
# (13, 13, 1, 2)
#---------------------------------------------------#
grid_shape = K.shape(feats)[1:3]
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
......@@ -28,22 +34,34 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
grid = K.concatenate([grid_x, grid_y])
grid = K.cast(grid, K.dtype(feats))
# (batch_size,13,13,3,85)
#---------------------------------------------------#
# 将预测结果调整成(batch_size,13,13,3,85)
# 85可拆分成4 + 1 + 80
# 4代表的是中心宽高的调整参数
# 1代表的是框的置信度
# 80代表的是种类的置信度
#---------------------------------------------------#
feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.sigmoid(feats[..., 5:])
# 在计算loss的时候返回如下参数
#---------------------------------------------------------------------#
# 在计算loss的时候返回grid, feats, box_xy, box_wh
# 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------------------------#
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------#
# 用于计算每个预测框与真实框的iou
#---------------------------------------------------#
......@@ -77,108 +95,162 @@ def box_iou(b1, b2):
return iou
#---------------------------------------------------#
# loss值计算
#---------------------------------------------------#
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False):
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False, normalize=True):
# 一共有三层
num_layers = len(anchors)//3
# 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,255),(m,26,26,255),(m,52,52,255)。
#---------------------------------------------------------------------------------------------------#
# 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
#---------------------------------------------------------------------------------------------------#
y_true = args[num_layers:]
yolo_outputs = args[:num_layers]
# 先验框
# 678为142,110, 192,243, 459,401
# 345为36,75, 76,55, 72,146
# 012为12,16, 19,36, 40,28
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
# 得到input_shpae为608,608
# 得到input_shpae为416,416
input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
loss = 0
# 取出每一张图片
# m的值就是batch_size
num_pos = 0
#-----------------------------------------------------------#
# 取出每一张图片
# m的值就是batch_size
#-----------------------------------------------------------#
m = K.shape(yolo_outputs[0])[0]
mf = K.cast(m, K.dtype(yolo_outputs[0]))
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,255),(m,26,26,255),(m,52,52,255)。
#---------------------------------------------------------------------------------------------------#
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
#---------------------------------------------------------------------------------------------------#
for l in range(num_layers):
# 以第一个特征层(m,13,13,3,85)为例子
# 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
#-----------------------------------------------------------#
# 以第一个特征层(m,13,13,3,85)为例子
# 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
#-----------------------------------------------------------#
object_mask = y_true[l][..., 4:5]
# 取出其对应的种类(m,13,13,3,80)
#-----------------------------------------------------------#
# 取出其对应的种类(m,13,13,3,80)
#-----------------------------------------------------------#
true_class_probs = y_true[l][..., 5:]
if label_smoothing:
true_class_probs = _smooth_labels(true_class_probs, label_smoothing)
# 将yolo_outputs的特征层输出进行处理
# grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85)
# 还有解码后的xy,wh,(m,13,13,3,2)
#-----------------------------------------------------------#
# 将yolo_outputs的特征层输出进行处理、获得四个返回值
# 其中:
# grid (13,13,1,2) 网格坐标
# raw_pred (m,13,13,3,85) 尚未处理的预测结果
# pred_xy (m,13,13,3,2) 解码后的中心坐标
# pred_wh (m,13,13,3,2) 解码后的宽高坐标
#-----------------------------------------------------------#
grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
# 这个是解码后的预测的box的位置
# (m,13,13,3,4)
#-----------------------------------------------------------#
# pred_box是解码后的预测的box的位置
# (m,13,13,3,4)
#-----------------------------------------------------------#
pred_box = K.concatenate([pred_xy, pred_wh])
# 找到负样本群组,第一步是创建一个数组,[]
#-----------------------------------------------------------#
# 找到负样本群组,第一步是创建一个数组,[]
#-----------------------------------------------------------#
ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
object_mask_bool = K.cast(object_mask, 'bool')
# 对每一张图片计算ignore_mask
#-----------------------------------------------------------#
# 对每一张图片计算ignore_mask
#-----------------------------------------------------------#
def loop_body(b, ignore_mask):
# 取出第b副图内,真实存在的所有的box的参数
# n,4
#-----------------------------------------------------------#
# 取出n个真实框:n,4
#-----------------------------------------------------------#
true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
# 计算预测结果与真实情况的iou
# pred_box为13,13,3,4
# 计算的结果是每个pred_box和其它所有真实框的iou
# 13,13,3,n
#-----------------------------------------------------------#
# 计算预测框与真实框的iou
# pred_box 13,13,3,4 预测框的坐标
# true_box n,4 真实框的坐标
# iou 13,13,3,n 预测框和真实框的iou
#-----------------------------------------------------------#
iou = box_iou(pred_box[b], true_box)
# 13,13,3
#-----------------------------------------------------------#
# best_iou 13,13,3 每个特征点与真实框的最大重合程度
#-----------------------------------------------------------#
best_iou = K.max(iou, axis=-1)
# 如果某些预测框和真实框的重合程度大于0.5,则忽略。
#-----------------------------------------------------------#
# 判断预测框和真实框的最大iou小于ignore_thresh
# 则认为该预测框没有与之对应的真实框
# 该操作的目的是:
# 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
# 不适合当作负样本,所以忽略掉。
#-----------------------------------------------------------#
ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
return b+1, ignore_mask
# 遍历所有的图片
#-----------------------------------------------------------#
# 在这个地方进行一个循环、循环是对每一张图片进行的
#-----------------------------------------------------------#
_, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
# 将每幅图的内容压缩,进行处理
#-----------------------------------------------------------#
# ignore_mask用于提取出作为负样本的特征点
# (m,13,13,3)
#-----------------------------------------------------------#
ignore_mask = ignore_mask.stack()
#(m,13,13,3,1)
# (m,13,13,3,1)
ignore_mask = K.expand_dims(ignore_mask, -1)
#-----------------------------------------------------------#
# 真实框越大,比重越小,小框的比重更大。
#-----------------------------------------------------------#
box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
# Calculate ciou loss as location loss
#-----------------------------------------------------------#
# 计算Ciou loss
#-----------------------------------------------------------#
raw_true_box = y_true[l][...,0:4]
ciou = box_ciou(pred_box, raw_true_box)
ciou_loss = object_mask * box_loss_scale * (1 - ciou)
ciou_loss = K.sum(ciou_loss) / mf
location_loss = ciou_loss
# 如果该位置本来有框,那么计算1与置信度的交叉熵
# 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本
# best_iou<ignore_thresh用于限制负样本数量
#------------------------------------------------------------------------------#
# 如果该位置本来有框,那么计算1与置信度的交叉熵
# 如果该位置本来没有框,那么计算0与置信度的交叉熵
# 在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh
# 该操作的目的是:
# 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
# 不适合当作负样本,所以忽略掉。
#------------------------------------------------------------------------------#
confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
(1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)
confidence_loss = K.sum(confidence_loss) / mf
class_loss = K.sum(class_loss) / mf
location_loss = K.sum(tf.where(tf.is_nan(ciou_loss), tf.zeros_like(ciou_loss), ciou_loss))
confidence_loss = K.sum(tf.where(tf.is_nan(confidence_loss), tf.zeros_like(confidence_loss), confidence_loss))
class_loss = K.sum(tf.where(tf.is_nan(class_loss), tf.zeros_like(class_loss), class_loss))
#-----------------------------------------------------------#
# 计算正样本数量
#-----------------------------------------------------------#
num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1)
loss += location_loss + confidence_loss + class_loss
# if print_loss:
#loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
# loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
if normalize:
loss = loss / num_pos
else:
loss = loss / mf
return loss
......@@ -3,17 +3,21 @@ from functools import wraps
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
from keras.layers import (Add, Concatenate, Conv2D, MaxPooling2D, UpSampling2D,
ZeroPadding2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from nets.CSPdarknet53 import darknet_body
from utils.utils import compose
from nets.CSPdarknet53 import darknet_body
#--------------------------------------------------#
# 单次卷积
# 单次卷积DarknetConv2D
# 正则化系数为5e-4
# 如果步长为2则自己设定padding方式。
#--------------------------------------------------#
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
......@@ -23,7 +27,7 @@ def DarknetConv2D(*args, **kwargs):
return Conv2D(*args, **darknet_conv_kwargs)
#---------------------------------------------------#
# 卷积块
# 卷积块 -> 卷积 + 标准化 + 激活函数
# DarknetConv2D + BatchNormalization + LeakyReLU
#---------------------------------------------------#
def DarknetConv2D_BN_Leaky(*args, **kwargs):
......@@ -35,7 +39,7 @@ def DarknetConv2D_BN_Leaky(*args, **kwargs):
LeakyReLU(alpha=0.1))
#---------------------------------------------------#
# 特征层->最后的输出
# 进行五次卷积
#---------------------------------------------------#
def make_five_convs(x, num_filters):
# 五次卷积
......@@ -47,14 +51,19 @@ def make_five_convs(x, num_filters):
return x
#---------------------------------------------------#
# 特征层->最后的输出
# Panet网络的构建,并且获得预测结果
#---------------------------------------------------#
def yolo_body(inputs, num_anchors, num_classes):
# 生成darknet53的主干模型
#---------------------------------------------------#
# 生成CSPdarknet53的主干模型
# 获得三个有效特征层,他们的shape分别是:
# 52,52,256
# 26,26,512
# 13,13,1024
#---------------------------------------------------#
feat1,feat2,feat3 = darknet_body(inputs)
# 第一个特征层
# y1=(batch_size,13,13,3,85)
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5 = DarknetConv2D_BN_Leaky(512, (1,1))(feat3)
P5 = DarknetConv2D_BN_Leaky(1024, (3,3))(P5)
P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5)
......@@ -67,38 +76,60 @@ def yolo_body(inputs, num_anchors, num_classes):
P5 = DarknetConv2D_BN_Leaky(1024, (3,3))(P5)
P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5)
# 13,13,512 -> 13,13,256 -> 26,26,256
P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1,1)), UpSampling2D(2))(P5)
# 26,26,512 -> 26,26,256
P4 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2)
# 26,26,256 + 26,26,256 -> 26,26,512
P4 = Concatenate()([P4, P5_upsample])
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4 = make_five_convs(P4,256)
# 26,26,256 -> 26,26,128 -> 52,52,128
P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P4)
# 52,52,256 -> 52,52,128
P3 = DarknetConv2D_BN_Leaky(128, (1,1))(feat1)
# 52,52,128 + 52,52,128 -> 52,52,256
P3 = Concatenate()([P3, P4_upsample])
P3 = make_five_convs(P3,128)
# 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
P3 = make_five_convs(P3,128)
#---------------------------------------------------#
# 第三个特征层
# y3=(batch_size,52,52,3,85)
#---------------------------------------------------#
P3_output = DarknetConv2D_BN_Leaky(256, (3,3))(P3)
P3_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P3_output)
#26,26 output
# 52,52,128 -> 26,26,256
P3_downsample = ZeroPadding2D(((1,0),(1,0)))(P3)
P3_downsample = DarknetConv2D_BN_Leaky(256, (3,3), strides=(2,2))(P3_downsample)
# 26,26,256 + 26,26,256 -> 26,26,512
P4 = Concatenate()([P3_downsample, P4])
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4 = make_five_convs(P4,256)
#---------------------------------------------------#
# 第二个特征层
# y2=(batch_size,26,26,3,85)
#---------------------------------------------------#
P4_output = DarknetConv2D_BN_Leaky(512, (3,3))(P4)
P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output)
#13,13 output
# 26,26,256 -> 13,13,512
P4_downsample = ZeroPadding2D(((1,0),(1,0)))(P4)
P4_downsample = DarknetConv2D_BN_Leaky(512, (3,3), strides=(2,2))(P4_downsample)
# 13,13,512 + 13,13,512 -> 13,13,1024
P5 = Concatenate()([P4_downsample, P5])
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5 = make_five_convs(P5,512)
#---------------------------------------------------#
# 第一个特征层
# y1=(batch_size,13,13,3,85)
#---------------------------------------------------#
P5_output = DarknetConv2D_BN_Leaky(1024, (3,3))(P5)
P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output)
......@@ -109,12 +140,16 @@ def yolo_body(inputs, num_anchors, num_classes):
#---------------------------------------------------#
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
num_anchors = len(anchors)
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
# 获得x,y的网格
# (13,13, 1, 2)
grid_shape = K.shape(feats)[1:3] # height, width
#---------------------------------------------------#
# 获得x,y的网格
# (13, 13, 1, 2)
#---------------------------------------------------#
grid_shape = K.shape(feats)[1:3]
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
......@@ -122,18 +157,29 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
grid = K.concatenate([grid_x, grid_y])
grid = K.cast(grid, K.dtype(feats))
# (batch_size,13,13,3,85)
#---------------------------------------------------#
# 将预测结果调整成(batch_size,13,13,3,85)
# 85可拆分成4 + 1 + 80
# 4代表的是中心宽高的调整参数
# 1代表的是框的置信度
# 80代表的是种类的置信度
#---------------------------------------------------#
feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.sigmoid(feats[..., 5:])
# 在计算loss的时候返回如下参数
#---------------------------------------------------------------------#
# 在计算loss的时候返回grid, feats, box_xy, box_wh
# 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------------------------#
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
......@@ -142,6 +188,9 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
# 对box进行调整,使其符合真实图片的样子
#---------------------------------------------------#
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
#-----------------------------------------------------------------#
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
#-----------------------------------------------------------------#
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
......@@ -149,6 +198,10 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape/image_shape))
#-----------------------------------------------------------------#
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
# new_shape指的是宽高缩放情况
#-----------------------------------------------------------------#
offset = (input_shape-new_shape)/2./input_shape
scale = input_shape/new_shape
......@@ -171,14 +224,24 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
# 获取每个box和它的得分
#---------------------------------------------------#
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
# -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80
#-----------------------------------------------------------------#
# 将预测值调成真实值
# box_xy : -1,13,13,3,2;
# box_wh : -1,13,13,3,2;
# box_confidence : -1,13,13,3,1;
# box_class_probs : -1,13,13,3,80;
#-----------------------------------------------------------------#
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape)
# 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的box_xy, box_wh是相对于有灰条的图像的
# 我们需要对齐进行修改,去除灰条的部分。
# 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
#-----------------------------------------------------------------#
boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
# 获得得分和box
#-----------------------------------------------------------------#
# 获得最终得分和框的位置
#-----------------------------------------------------------------#
boxes = K.reshape(boxes, [-1, 4])
box_scores = box_confidence * box_class_probs
box_scores = K.reshape(box_scores, [-1, num_classes])
......@@ -194,42 +257,63 @@ def yolo_eval(yolo_outputs,
max_boxes=20,
score_threshold=.6,
iou_threshold=.5):
# 获得特征层的数量
#---------------------------------------------------#
# 获得特征层的数量,有效特征层的数量为3
#---------------------------------------------------#
num_layers = len(yolo_outputs)
# 特征层1对应的anchor是678
# 特征层2对应的anchor是345
# 特征层3对应的anchor是012
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
#-----------------------------------------------------------#
# 这里获得的是输入图片的大小,一般是416x416
#-----------------------------------------------------------#
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
# 对每个特征层进行处理
#-----------------------------------------------------------#
# 对每个特征层进行处理
#-----------------------------------------------------------#
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
boxes.append(_boxes)
box_scores.append(_box_scores)
# 将每个特征层的结果进行堆叠
#-----------------------------------------------------------#
# 将每个特征层的结果进行堆叠
#-----------------------------------------------------------#
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
#-----------------------------------------------------------#
# 判断得分是否大于score_threshold
#-----------------------------------------------------------#
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
# 取出所有box_scores >= score_threshold的框,和成绩
#-----------------------------------------------------------#
# 取出所有box_scores >= score_threshold的框,和成绩
#-----------------------------------------------------------#
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
# 非极大抑制,去掉box重合程度高的那一些
#-----------------------------------------------------------#
# 非极大抑制
# 保留一定区域内得分最大的框
#-----------------------------------------------------------#
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
# 获取非极大抑制后的结果
# 下列三个分别是
# 框的位置,得分与种类
#-----------------------------------------------------------#
# 获取非极大抑制后的结果
# 下列三个分别是
# 框的位置,得分与种类
#-----------------------------------------------------------#
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
......
from yolo import YOLO
'''
predict.py有几个注意点
1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
2、如果想要保存,利用r_image.save("img.jpg")即可保存。
3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。
4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
'''
from keras.layers import Input
from PIL import Image
from nets.yolo4 import yolo_body
from yolo import YOLO
yolo = YOLO()
while True:
......
......@@ -3,11 +3,14 @@
# map测试请看get_dr_txt.py、get_gt_txt.py
# 和get_map.py
#--------------------------------------------#
from nets.yolo4 import yolo_body
from keras.layers import Input
inputs = Input([416,416,3])
model = yolo_body(inputs,3,80)
model.summary()
for i,layer in enumerate(model.layers):
print(i,layer.name)
from nets.yolo4 import yolo_body
if __name__ == "__main__":
inputs = Input([416, 416, 3])
model = yolo_body(inputs, 3, 80)
model.summary()
# for i,layer in enumerate(model.layers):
# print(i,layer.name)
import keras.backend as K
import numpy as np
import tensorflow as tf
import keras.backend as K
from keras.backend.tensorflow_backend import set_session
from keras.callbacks import (EarlyStopping, ModelCheckpoint, ReduceLROnPlateau,
TensorBoard)
from keras.layers import Input, Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from nets.yolo4 import yolo_body
from nets.loss import yolo_loss
from keras.backend.tensorflow_backend import set_session
from utils.utils import get_random_data,get_random_data_with_Mosaic,rand,WarmUpCosineDecayScheduler
from nets.yolo4 import yolo_body
from utils.utils import (WarmUpCosineDecayScheduler, get_random_data,
get_random_data_with_Mosaic, rand)
#---------------------------------------------------#
......@@ -31,8 +34,7 @@ def get_anchors(anchors_path):
#---------------------------------------------------#
# 训练数据生成器
#---------------------------------------------------#
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False):
'''data generator for fit_generator'''
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False, random=True):
n = len(annotation_lines)
i = 0
flag = True
......@@ -47,11 +49,11 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class
image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape)
i = (i+1) % n
else:
image, box = get_random_data(annotation_lines[i], input_shape)
image, box = get_random_data(annotation_lines[i], input_shape, random=random)
i = (i+1) % n
flag = bool(1-flag)
else:
image, box = get_random_data(annotation_lines[i], input_shape)
image, box = get_random_data(annotation_lines[i], input_shape, random=random)
i = (i+1) % n
image_data.append(image)
box_data.append(box)
......@@ -60,7 +62,6 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
yield [image_data, *y_true], np.zeros(batch_size)
#---------------------------------------------------#
# 读入xml文件,并输出y_true
#---------------------------------------------------#
......@@ -68,80 +69,130 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
# 一共有三个特征层数
num_layers = len(anchors)//3
# 先验框
# 678为 142,110, 192,243, 459,401
# 345为 36,75, 76,55, 72,146
# 012为 12,16, 19,36, 40,28
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
#-----------------------------------------------------------#
# 获得框的坐标和图片的大小
#-----------------------------------------------------------#
true_boxes = np.array(true_boxes, dtype='float32')
input_shape = np.array(input_shape, dtype='int32') # 416,416
# 读出xy轴,读出长宽
# 中心点(m,n,2)
input_shape = np.array(input_shape, dtype='int32')
#-----------------------------------------------------------#
# 通过计算获得真实框的中心和宽高
# 中心点(m,n,2) 宽高(m,n,2)
#-----------------------------------------------------------#
boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
# 计算比例
#-----------------------------------------------------------#
# 将真实框归一化到小数形式
#-----------------------------------------------------------#
true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
# m张图
# m为图片数量,grid_shapes为网格的shape
m = true_boxes.shape[0]
# 得到网格的shape为13,13;26,26;52,52
grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(num_layers)]
# y_true的格式为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
#-----------------------------------------------------------#
# y_true的格式为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
#-----------------------------------------------------------#
y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
dtype='float32') for l in range(num_layers)]
# [1,9,2]
#-----------------------------------------------------------#
# [9,2] -> [1,9,2]
#-----------------------------------------------------------#
anchors = np.expand_dims(anchors, 0)
anchor_maxes = anchors / 2.
anchor_mins = -anchor_maxes
# 长宽要大于0才有效
#-----------------------------------------------------------#
# 长宽要大于0才有效
#-----------------------------------------------------------#
valid_mask = boxes_wh[..., 0]>0
for b in range(m):
# 对每一张图进行处理
wh = boxes_wh[b, valid_mask[b]]
if len(wh)==0: continue
# [n,1,2]
#-----------------------------------------------------------#
# [n,2] -> [n,1,2]
#-----------------------------------------------------------#
wh = np.expand_dims(wh, -2)
box_maxes = wh / 2.
box_mins = -box_maxes
# 计算真实框和哪个先验框最契合
#-----------------------------------------------------------#
# 计算所有真实框和先验框的交并比
# intersect_area [n,9]
# box_area [n,1]
# anchor_area [1,9]
# iou [n,9]
#-----------------------------------------------------------#
intersect_mins = np.maximum(box_mins, anchor_mins)
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
box_area = wh[..., 0] * wh[..., 1]
anchor_area = anchors[..., 0] * anchors[..., 1]
iou = intersect_area / (box_area + anchor_area - intersect_area)
# 维度是(n) 感谢 消尽不死鸟 的提醒
#-----------------------------------------------------------#
# 维度是[n,] 感谢 消尽不死鸟 的提醒
#-----------------------------------------------------------#
best_anchor = np.argmax(iou, axis=-1)
for t, n in enumerate(best_anchor):
#-----------------------------------------------------------#
# 找到每个真实框所属的特征层
#-----------------------------------------------------------#
for l in range(num_layers):
if n in anchor_mask[l]:
# floor用于向下取整
i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
# 找到真实框在特征层l中第b副图像对应的位置
#-----------------------------------------------------------#
# floor用于向下取整,找到真实框所属的特征层对应的x、y轴坐标
#-----------------------------------------------------------#
i = np.floor(true_boxes[b,t,0] * grid_shapes[l][1]).astype('int32')
j = np.floor(true_boxes[b,t,1] * grid_shapes[l][0]).astype('int32')
#-----------------------------------------------------------#
# k指的的当前这个特征点的第k个先验框
#-----------------------------------------------------------#
k = anchor_mask[l].index(n)
c = true_boxes[b,t, 4].astype('int32')
y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4]
#-----------------------------------------------------------#
# c指的是当前这个真实框的种类
#-----------------------------------------------------------#
c = true_boxes[b, t, 4].astype('int32')
#-----------------------------------------------------------#
# y_true的shape为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85)
# 最后的85可以拆分成4+1+80,4代表的是框的中心与宽高、
# 1代表的是置信度、80代表的是种类
#-----------------------------------------------------------#
y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
y_true[l][b, j, i, k, 4] = 1
y_true[l][b, j, i, k, 5+c] = 1
return y_true
#----------------------------------------------------#
# 检测精度mAP和pr曲线计算参考视频
# https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
if __name__ == "__main__":
# 标签的位置
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path = '2007_train.txt'
# 获取classes和anchor的位置
#------------------------------------------------------#
# 训练后的模型保存的位置,保存在logs文件夹里面
#------------------------------------------------------#
log_dir = 'logs/'
#----------------------------------------------------#
# classes和anchor的路径,非常重要
# 训练前一定要修改classes_path,使其对应自己的数据集
#----------------------------------------------------#
classes_path = 'model_data/voc_classes.txt'
anchors_path = 'model_data/yolo_anchors.txt'
#------------------------------------------------------#
......@@ -150,58 +201,81 @@ if __name__ == "__main__":
# 预测的东西都不一样了自然维度不匹配
#------------------------------------------------------#
weights_path = 'model_data/yolo4_weight.h5'
# 获得classes和anchor
#------------------------------------------------------#
# 训练用图片大小
# 一般在416x416和608x608选择
#------------------------------------------------------#
input_shape = (416,416)
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize = True
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
# 一共有多少类
#------------------------------------------------------#
# 一共有多少类和多少先验框
#------------------------------------------------------#
num_classes = len(class_names)
num_anchors = len(anchors)
# 训练后的模型保存的位置
log_dir = 'logs/'
# 输入的shape大小
# 显存比较小可以使用416x416
# 现存比较大可以使用608x608
input_shape = (416,416)
#------------------------------------------------------#
# Yolov4的tricks应用
# mosaic 马赛克数据增强 True or False
# Cosine_scheduler 余弦退火学习率 True or False
# label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
#------------------------------------------------------#
mosaic = True
Cosine_scheduler = False
label_smoothing = 0
# 清除session
K.clear_session()
# 输入的图像为
#------------------------------------------------------#
# 创建yolo模型
#------------------------------------------------------#
image_input = Input(shape=(None, None, 3))
h, w = input_shape
# 创建yolo模型
print('Create YOLOv4 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
model_body = yolo_body(image_input, num_anchors//3, num_classes)
# 载入预训练权重
#------------------------------------------------------#
# 载入预训练权重
#------------------------------------------------------#
print('Load weights {}.'.format(weights_path))
model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
# y_true为13,13,3,85
# 26,26,3,85
# 52,52,3,85
#------------------------------------------------------#
# 在这个地方设置损失,将网络的输出结果传入loss函数
# 把整个模型的输出作为loss
#------------------------------------------------------#
y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
num_anchors//3, num_classes+5)) for l in range(3)]
# 输入为*model_body.input, *y_true
# 输出为model_loss
loss_input = [*model_body.output, *y_true]
model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input)
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5,
'label_smoothing': label_smoothing, 'normalize': normalize})(loss_input)
model = Model([model_body.input, *y_true], model_loss)
# 训练参数设置
#-------------------------------------------------------------------------------#
# 训练参数的设置
# logging表示tensorboard的保存地址
# checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
# reduce_lr用于设置学习率下降的方式
# early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
#-------------------------------------------------------------------------------#
logging = TensorBoard(log_dir=log_dir)
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss', save_weights_only=True, save_best_only=False, period=1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
# 0.1用于验证,0.9用于训练
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path) as f:
lines = f.readlines()
......@@ -211,6 +285,10 @@ if __name__ == "__main__":
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val
freeze_layers = 249
for i in range(freeze_layers): model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
......@@ -219,18 +297,12 @@ if __name__ == "__main__":
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
freeze_layers = 249
for i in range(freeze_layers): model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
# 调整非主干模型first
if True:
Init_epoch = 0
Freeze_epoch = 50
# batch_size大小,每次喂入多少数据
batch_size = 8
# 最大学习率
learning_rate_base = 1e-3
if Cosine_scheduler:
# 预热期
warmup_epoch = int((Freeze_epoch-Init_epoch)*0.2)
......@@ -252,9 +324,9 @@ if __name__ == "__main__":
model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False),
validation_steps=max(1, num_val//batch_size),
epochs=Freeze_epoch,
initial_epoch=Init_epoch,
......@@ -263,15 +335,12 @@ if __name__ == "__main__":
for i in range(freeze_layers): model_body.layers[i].trainable = True
# 解冻后训练
if True:
Freeze_epoch = 50
Epoch = 100
# batch_size大小,每次喂入多少数据
batch_size = 2
# 最大学习率
learning_rate_base = 1e-4
if Cosine_scheduler:
# 预热期
warmup_epoch = int((Epoch-Freeze_epoch)*0.2)
......@@ -293,9 +362,9 @@ if __name__ == "__main__":
model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False),
validation_steps=max(1, num_val//batch_size),
epochs=Epoch,
initial_epoch=Freeze_epoch,
......
"""Miscellaneous utility functions."""
from functools import reduce
import numpy as np
import cv2
import keras
import keras.backend as K
from functools import reduce
import numpy as np
from matplotlib.colors import hsv_to_rgb, rgb_to_hsv
from PIL import Image
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
import cv2
def compose(*funcs):
if funcs:
......@@ -101,8 +101,8 @@ def merge_bboxes(bboxes, cutx, cuty):
def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5):
'''random preprocessing for real-time data augmentation'''
h, w = input_shape
min_offset_x = 0.4
min_offset_y = 0.4
min_offset_x = 0.3
min_offset_y = 0.3
scale_low = 1-min(min_offset_x,min_offset_y)
scale_high = scale_low+0.2
......@@ -112,6 +112,7 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
place_x = [0,0,int(w*min_offset_x),int(w*min_offset_x)]
place_y = [0,int(h*min_offset_y),int(h*min_offset_y),0]
for line in annotation_line:
# 每一行进行分割
line_content = line.split()
......@@ -163,7 +164,6 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)/255
index = index + 1
box_data = []
# 对box进行重新处理
......@@ -183,8 +183,6 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
image_datas.append(image_data)
box_datas.append(box_data)
# 将图片分割,放在一起
cutx = np.random.randint(int(w*min_offset_x), int(w*(1 - min_offset_x)))
cuty = np.random.randint(int(h*min_offset_y), int(h*(1 - min_offset_y)))
......@@ -206,7 +204,7 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue
return new_image, box_data
def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=1.5, val=1.5):
def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
'''random preprocessing for real-time data augmentation'''
line = annotation_line.split()
image = Image.open(line[0])
......@@ -214,6 +212,36 @@ def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=
h, w = input_shape
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if not random:
# resize image
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)/255
# correct boxes
box_data = np.zeros((max_boxes,5))
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
if len(box)>max_boxes: box = box[:max_boxes]
box_data[:len(box)] = box
return image_data, box_data
# 对图像进行缩放并且进行长和宽的扭曲
new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
scale = rand(.25, 2)
......
#-------------------------------------#
# 调用摄像头检测
# 调用摄像头或者视频进行检测
# 调用摄像头直接运行即可
# 调用视频可以将cv2.VideoCapture()指定路径
# 视频的保存并不难,可以百度一下看看
#-------------------------------------#
import time
import cv2
import numpy as np
from keras.layers import Input
from yolo import YOLO
from PIL import Image
import numpy as np
import cv2
import time
from yolo import YOLO
yolo = YOLO()
# 调用摄像头
capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
# 调用摄像头
# capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
capture=cv2.VideoCapture(0)
fps = 0.0
while(True):
......@@ -20,10 +29,8 @@ while(True):
frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
# 转变成Image
frame = Image.fromarray(np.uint8(frame))
# 进行检测
frame = np.array(yolo.detect_image(frame))
# RGBtoBGR满足opencv显示格式
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
......@@ -37,4 +44,5 @@ while(True):
capture.release()
break
yolo.close_session()
yolo.close_session()
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
def sigmoid(x):
s = 1 / (1 + np.exp(-x))
return s
......@@ -78,4 +80,4 @@ def yolo_head(feats, anchors, num_classes):
#
feat = np.random.normal(0,0.5,[4,13,13,75])
anchors = [[142, 110],[192, 243],[459, 401]]
yolo_head(feat,anchors,20)
\ No newline at end of file
yolo_head(feat,anchors,20)
#---------------------------------------------#
# 运行前一定要修改classes
# 如果生成的2007_train.txt里面没有目标信息
# 那么就是因为classes没有设定正确
#---------------------------------------------#
import xml.etree.ElementTree as ET
from os import getcwd
......
import os
import numpy as np
import copy
import colorsys
import copy
import os
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from nets.yolo4 import yolo_body,yolo_eval
from keras.models import load_model
from PIL import Image, ImageDraw, ImageFont
from nets.yolo4 import yolo_body, yolo_eval
from utils.utils import letterbox_image
#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
# model_path和classes_path都需要修改!
# 如果出现shape不匹配,一定要注意
# 训练时的model_path和classes_path参数的修改
#--------------------------------------------#
class YOLO(object):
_defaults = {
......@@ -64,18 +70,22 @@ class YOLO(object):
return np.array(anchors).reshape(-1, 2)
#---------------------------------------------------#
# 获得所有的分类
# 载入模型
#---------------------------------------------------#
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# 计算anchor数量
#---------------------------------------------------#
# 计算先验框的数量和种类的数量
#---------------------------------------------------#
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
#---------------------------------------------------------#
try:
self.yolo_model = load_model(model_path, compile=False)
except:
......@@ -103,6 +113,10 @@ class YOLO(object):
self.input_image_shape = K.placeholder(shape=(2, ))
#---------------------------------------------------------#
# 在yolo_eval函数中,我们会对预测结果进行后处理
# 后处理的内容包括,解码、非极大抑制、门限筛选等
#---------------------------------------------------------#
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
num_classes, self.input_image_shape, max_boxes = self.max_boxes,
score_threshold = self.score, iou_threshold = self.iou)
......@@ -113,30 +127,37 @@ class YOLO(object):
#---------------------------------------------------#
def detect_image(self, image):
start = timer()
# 调整图片使其符合输入要求
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
new_image_size = (self.model_image_size[1],self.model_image_size[0])
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
# 预测结果
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(image_data, 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
K.learning_phase(): 0})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
# 设置字体
#---------------------------------------------------------#
# 设置字体
#---------------------------------------------------------#
font = ImageFont.truetype(font='font/simhei.ttf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
small_pic=[]
thickness = max((image.size[0] + image.size[1]) // 300, 1)
for i, c in list(enumerate(out_classes)):
predicted_class = self.class_names[c]
......@@ -159,7 +180,7 @@ class YOLO(object):
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label)
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册