From 5b6f4c01c81528c2ccfe4bd2aceca93c84220d6c Mon Sep 17 00:00:00 2001 From: Bubbliiiing <47347516+bubbliiiing@users.noreply.github.com> Date: Thu, 14 Jan 2021 15:36:58 +0800 Subject: [PATCH] Add files via upload --- VOCdevkit/VOC2007/voc2yolo4.py | 11 +- get_dr_txt.py | 53 +++++--- kmeans_for_anchors.py | 11 +- nets/CSPdarknet53.py | 43 +++++-- nets/ious.py | 33 +++-- nets/loss.py | 182 ++++++++++++++++++--------- nets/yolo4.py | 168 ++++++++++++++++++------- predict.py | 12 +- test.py | 15 ++- train.py | 217 ++++++++++++++++++++++----------- utils/utils.py | 50 ++++++-- video.py | 28 +++-- vision_for_anchors.py | 6 +- voc_annotation.py | 5 + yolo.py | 63 ++++++---- 15 files changed, 632 insertions(+), 265 deletions(-) diff --git a/VOCdevkit/VOC2007/voc2yolo4.py b/VOCdevkit/VOC2007/voc2yolo4.py index 22e40c7..e3c8214 100644 --- a/VOCdevkit/VOC2007/voc2yolo4.py +++ b/VOCdevkit/VOC2007/voc2yolo4.py @@ -1,9 +1,18 @@ +#----------------------------------------------------------------------# +# 验证集的划分在train.py代码里面进行 +# test.txt和val.txt里面没有内容是正常的。训练不会使用到。 +#----------------------------------------------------------------------# import os import random - +random.seed(0) + xmlfilepath=r'./VOCdevkit/VOC2007/Annotations' saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/" +#----------------------------------------------------------------------# +# 想要增加测试集修改trainval_percent +# train_percent不需要修改 +#----------------------------------------------------------------------# trainval_percent=1 train_percent=1 diff --git a/get_dr_txt.py b/get_dr_txt.py index 7253a74..d7e666a 100644 --- a/get_dr_txt.py +++ b/get_dr_txt.py @@ -3,18 +3,21 @@ # 具体视频教程可查看 # https://www.bilibili.com/video/BV1zE411u7Vw #----------------------------------------------------# -from yolo import YOLO -from PIL import Image -from keras.layers import Input -from keras.applications.imagenet_utils import preprocess_input -from keras import backend as K -from utils.utils import letterbox_image -from nets.yolo4 import yolo_body,yolo_eval -from tqdm import tqdm import colorsys -import numpy as np import os +import numpy as np +from keras import backend as K +from keras.applications.imagenet_utils import preprocess_input +from keras.layers import Input +from PIL import Image +from tqdm import tqdm + +from nets.yolo4 import yolo_body, yolo_eval +from utils.utils import letterbox_image +from yolo import YOLO + + class mAP_YOLO(YOLO): #---------------------------------------------------# # 获得所有的分类 @@ -25,12 +28,16 @@ class mAP_YOLO(YOLO): model_path = os.path.expanduser(self.model_path) assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' - # 计算anchor数量 + #---------------------------------------------------# + # 计算先验框的数量和种类的数量 + #---------------------------------------------------# num_anchors = len(self.anchors) num_classes = len(self.class_names) - # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 - # 否则先构建模型再载入 + #---------------------------------------------------------# + # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 + # 否则先构建模型再载入 + #---------------------------------------------------------# try: self.yolo_model = load_model(model_path, compile=False) except: @@ -58,6 +65,10 @@ class mAP_YOLO(YOLO): self.input_image_shape = K.placeholder(shape=(2, )) + #---------------------------------------------------------# + # 在yolo_eval函数中,我们会对预测结果进行后处理 + # 后处理的内容包括,解码、非极大抑制、门限筛选等 + #---------------------------------------------------------# boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, num_classes, self.input_image_shape, max_boxes = self.max_boxes, score_threshold = self.score, iou_threshold = self.iou) @@ -68,21 +79,27 @@ class mAP_YOLO(YOLO): #---------------------------------------------------# def detect_image(self, image_id, image): f = open("./input/detection-results/"+image_id+".txt","w") - # 调整图片使其符合输入要求 + #---------------------------------------------------------# + # 给图像增加灰条,实现不失真的resize + #---------------------------------------------------------# new_image_size = (self.model_image_size[1],self.model_image_size[0]) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. - image_data = np.expand_dims(image_data, 0) # Add batch dimension. - - # 预测结果 + #---------------------------------------------------------# + # 添加上batch_size维度 + #---------------------------------------------------------# + image_data = np.expand_dims(image_data, 0) + + #---------------------------------------------------------# + # 将图像输入网络当中进行预测! + #---------------------------------------------------------# out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) + K.learning_phase(): 0}) for i, c in enumerate(out_classes): predicted_class = self.class_names[int(c)] diff --git a/kmeans_for_anchors.py b/kmeans_for_anchors.py index 98c3650..2dcbbc0 100644 --- a/kmeans_for_anchors.py +++ b/kmeans_for_anchors.py @@ -1,7 +1,9 @@ -import numpy as np -import xml.etree.ElementTree as ET import glob import random +import xml.etree.ElementTree as ET + +import numpy as np + def cas_iou(box,cluster): x = np.minimum(cluster[:,0],box[0]) @@ -61,6 +63,9 @@ def load_data(path): tree = ET.parse(xml_file) height = int(tree.findtext('./size/height')) width = int(tree.findtext('./size/width')) + if height<=0 or width<=0: + continue + # 对于每一个目标都获得它的宽高 for obj in tree.iter('object'): xmin = int(float(obj.findtext('bndbox/xmin'))) / width @@ -103,4 +108,4 @@ if __name__ == '__main__': else: x_y = ", %d,%d" % (data[i][0], data[i][1]) f.write(x_y) - f.close() \ No newline at end of file + f.close() diff --git a/nets/CSPdarknet53.py b/nets/CSPdarknet53.py index 5bb7c5f..a838946 100644 --- a/nets/CSPdarknet53.py +++ b/nets/CSPdarknet53.py @@ -1,6 +1,8 @@ from functools import wraps + from keras import backend as K -from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D, Layer +from keras.layers import (Add, Concatenate, Conv2D, Layer, MaxPooling2D, + UpSampling2D, ZeroPadding2D) from keras.layers.advanced_activations import LeakyReLU from keras.layers.normalization import BatchNormalization from keras.regularizers import l2 @@ -21,8 +23,11 @@ class Mish(Layer): def compute_output_shape(self, input_shape): return input_shape + #--------------------------------------------------# -# 单次卷积 +# 单次卷积DarknetConv2D +# 正则化系数为5e-4 +# 如果步长为2则自己设定padding方式。 #--------------------------------------------------# @wraps(Conv2D) def DarknetConv2D(*args, **kwargs): @@ -32,7 +37,7 @@ def DarknetConv2D(*args, **kwargs): return Conv2D(*args, **darknet_conv_kwargs) #---------------------------------------------------# -# 卷积块 +# 卷积块 -> 卷积 + 标准化 + 激活函数 # DarknetConv2D + BatchNormalization + Mish #---------------------------------------------------# def DarknetConv2D_BN_Mish(*args, **kwargs): @@ -43,36 +48,48 @@ def DarknetConv2D_BN_Mish(*args, **kwargs): BatchNormalization(), Mish()) -#---------------------------------------------------# +#--------------------------------------------------------------------# # CSPdarknet的结构块 -# 存在一个大残差边 -# 这个大残差边绕过了很多的残差结构 -#---------------------------------------------------# +# 首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩 +# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构 +# 主干部分会对num_blocks进行循环,循环内部是残差结构。 +# 对于整个CSPdarknet的结构块,就是一个大残差块+内部多个小残差块 +#--------------------------------------------------------------------# def resblock_body(x, num_filters, num_blocks, all_narrow=True): - # 进行长和宽的压缩 + #----------------------------------------------------------------# + # 利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩 + #----------------------------------------------------------------# preconv1 = ZeroPadding2D(((1,0),(1,0)))(x) preconv1 = DarknetConv2D_BN_Mish(num_filters, (3,3), strides=(2,2))(preconv1) - # 生成一个大的残差边 + #--------------------------------------------------------------------# + # 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构 + #--------------------------------------------------------------------# shortconv = DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (1,1))(preconv1) - # 主干部分的卷积 + #----------------------------------------------------------------# + # 主干部分会对num_blocks进行循环,循环内部是残差结构。 + #----------------------------------------------------------------# mainconv = DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (1,1))(preconv1) - # 1x1卷积对通道数进行整合->3x3卷积提取特征,使用残差结构 for i in range(num_blocks): y = compose( DarknetConv2D_BN_Mish(num_filters//2, (1,1)), DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (3,3)))(mainconv) mainconv = Add()([mainconv,y]) - # 1x1卷积后和残差边堆叠 postconv = DarknetConv2D_BN_Mish(num_filters//2 if all_narrow else num_filters, (1,1))(mainconv) + + #----------------------------------------------------------------# + # 将大残差边再堆叠回来 + #----------------------------------------------------------------# route = Concatenate()([postconv, shortconv]) # 最后对通道数进行整合 return DarknetConv2D_BN_Mish(num_filters, (1,1))(route) #---------------------------------------------------# -# darknet53 的主体部分 +# CSPdarknet53 的主体部分 +# 输入为一张416x416x3的图片 +# 输出为三个有效特征层 #---------------------------------------------------# def darknet_body(x): x = DarknetConv2D_BN_Mish(32, (3,3))(x) diff --git a/nets/ious.py b/nets/ious.py index 1f7fc39..a0c7a3f 100644 --- a/nets/ious.py +++ b/nets/ious.py @@ -12,20 +12,31 @@ def box_ciou(b1, b2): ------- ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) """ - # 求出预测框左上角右下角 + #-----------------------------------------------------------# + # 求出预测框左上角右下角 + # b1_mins (batch, feat_w, feat_h, anchor_num, 2) + # b1_maxes (batch, feat_w, feat_h, anchor_num, 2) + #-----------------------------------------------------------# b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh/2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half - # 求出真实框左上角右下角 + #-----------------------------------------------------------# + # 求出真实框左上角右下角 + # b2_mins (batch, feat_w, feat_h, anchor_num, 2) + # b2_maxes (batch, feat_w, feat_h, anchor_num, 2) + #-----------------------------------------------------------# b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half - # 求真实框和预测框所有的iou + #-----------------------------------------------------------# + # 求真实框和预测框所有的iou + # iou (batch, feat_w, feat_h, anchor_num) + #-----------------------------------------------------------# intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) @@ -33,21 +44,27 @@ def box_ciou(b1, b2): b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area - iou = intersect_area / K.maximum(union_area,K.epsilon()) + iou = intersect_area / K.maximum(union_area, K.epsilon()) - # 计算中心的差距 + #-----------------------------------------------------------# + # 计算中心的差距 + # center_distance (batch, feat_w, feat_h, anchor_num) + #-----------------------------------------------------------# center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1) - # 找到包裹两个框的最小框的左上角和右下角 enclose_mins = K.minimum(b1_mins, b2_mins) enclose_maxes = K.maximum(b1_maxes, b2_maxes) enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0) - # 计算对角线距离 + #-----------------------------------------------------------# + # 计算对角线距离 + # enclose_diagonal (batch, feat_w, feat_h, anchor_num) + #-----------------------------------------------------------# enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1) ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal ,K.epsilon()) - v = 4*K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1],K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi) + v = 4 * K.square(tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1], K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1],K.epsilon()))) / (math.pi * math.pi) alpha = v / K.maximum((1.0 - iou + v), K.epsilon()) ciou = ciou - alpha * v ciou = K.expand_dims(ciou, -1) + ciou = tf.where(tf.is_nan(ciou), tf.zeros_like(ciou), ciou) return ciou diff --git a/nets/loss.py b/nets/loss.py index 8de636c..4c839b1 100644 --- a/nets/loss.py +++ b/nets/loss.py @@ -1,6 +1,7 @@ import numpy as np import tensorflow as tf from keras import backend as K + from nets.ious import box_ciou #---------------------------------------------------# @@ -10,17 +11,22 @@ def _smooth_labels(y_true, label_smoothing): num_classes = tf.cast(K.shape(y_true)[-1], dtype=K.floatx()) label_smoothing = K.constant(label_smoothing, dtype=K.floatx()) return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes + #---------------------------------------------------# # 将预测值的每个特征层调成真实值 #---------------------------------------------------# def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) - # [1, 1, 1, num_anchors, 2] + #---------------------------------------------------# + # [1, 1, 1, num_anchors, 2] + #---------------------------------------------------# anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) - # 获得x,y的网格 - # (13, 13, 1, 2) - grid_shape = K.shape(feats)[1:3] # height, width + #---------------------------------------------------# + # 获得x,y的网格 + # (13, 13, 1, 2) + #---------------------------------------------------# + grid_shape = K.shape(feats)[1:3] grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), @@ -28,22 +34,34 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) - # (batch_size,13,13,3,85) + #---------------------------------------------------# + # 将预测结果调整成(batch_size,13,13,3,85) + # 85可拆分成4 + 1 + 80 + # 4代表的是中心宽高的调整参数 + # 1代表的是框的置信度 + # 80代表的是种类的置信度 + #---------------------------------------------------# feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) - # 将预测值调成真实值 - # box_xy对应框的中心点 - # box_wh对应框的宽和高 + #---------------------------------------------------# + # 将预测值调成真实值 + # box_xy对应框的中心点 + # box_wh对应框的宽和高 + #---------------------------------------------------# box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) - # 在计算loss的时候返回如下参数 + #---------------------------------------------------------------------# + # 在计算loss的时候返回grid, feats, box_xy, box_wh + # 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs + #---------------------------------------------------------------------# if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs + #---------------------------------------------------# # 用于计算每个预测框与真实框的iou #---------------------------------------------------# @@ -77,108 +95,162 @@ def box_iou(b1, b2): return iou - #---------------------------------------------------# # loss值计算 #---------------------------------------------------# -def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False): - +def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False, normalize=True): # 一共有三层 num_layers = len(anchors)//3 - # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] - # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 - # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,255),(m,26,26,255),(m,52,52,255)。 + #---------------------------------------------------------------------------------------------------# + # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] + # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 + # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 + #---------------------------------------------------------------------------------------------------# y_true = args[num_layers:] yolo_outputs = args[:num_layers] - # 先验框 - # 678为142,110, 192,243, 459,401 - # 345为36,75, 76,55, 72,146 - # 012为12,16, 19,36, 40,28 + #-----------------------------------------------------------# + # 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401] + # 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146] + # 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28] + #-----------------------------------------------------------# anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] - # 得到input_shpae为608,608 + # 得到input_shpae为416,416 input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) loss = 0 - - # 取出每一张图片 - # m的值就是batch_size + num_pos = 0 + #-----------------------------------------------------------# + # 取出每一张图片 + # m的值就是batch_size + #-----------------------------------------------------------# m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) - # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 - # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,255),(m,26,26,255),(m,52,52,255)。 + #---------------------------------------------------------------------------------------------------# + # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 + # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 + #---------------------------------------------------------------------------------------------------# for l in range(num_layers): - # 以第一个特征层(m,13,13,3,85)为例子 - # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) + #-----------------------------------------------------------# + # 以第一个特征层(m,13,13,3,85)为例子 + # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) + #-----------------------------------------------------------# object_mask = y_true[l][..., 4:5] - # 取出其对应的种类(m,13,13,3,80) + #-----------------------------------------------------------# + # 取出其对应的种类(m,13,13,3,80) + #-----------------------------------------------------------# true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) - # 将yolo_outputs的特征层输出进行处理 - # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) - # 还有解码后的xy,wh,(m,13,13,3,2) + #-----------------------------------------------------------# + # 将yolo_outputs的特征层输出进行处理、获得四个返回值 + # 其中: + # grid (13,13,1,2) 网格坐标 + # raw_pred (m,13,13,3,85) 尚未处理的预测结果 + # pred_xy (m,13,13,3,2) 解码后的中心坐标 + # pred_wh (m,13,13,3,2) 解码后的宽高坐标 + #-----------------------------------------------------------# grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) - # 这个是解码后的预测的box的位置 - # (m,13,13,3,4) + #-----------------------------------------------------------# + # pred_box是解码后的预测的box的位置 + # (m,13,13,3,4) + #-----------------------------------------------------------# pred_box = K.concatenate([pred_xy, pred_wh]) - # 找到负样本群组,第一步是创建一个数组,[] + #-----------------------------------------------------------# + # 找到负样本群组,第一步是创建一个数组,[] + #-----------------------------------------------------------# ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') - # 对每一张图片计算ignore_mask + #-----------------------------------------------------------# + # 对每一张图片计算ignore_mask + #-----------------------------------------------------------# def loop_body(b, ignore_mask): - # 取出第b副图内,真实存在的所有的box的参数 - # n,4 + #-----------------------------------------------------------# + # 取出n个真实框:n,4 + #-----------------------------------------------------------# true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) - # 计算预测结果与真实情况的iou - # pred_box为13,13,3,4 - # 计算的结果是每个pred_box和其它所有真实框的iou - # 13,13,3,n + #-----------------------------------------------------------# + # 计算预测框与真实框的iou + # pred_box 13,13,3,4 预测框的坐标 + # true_box n,4 真实框的坐标 + # iou 13,13,3,n 预测框和真实框的iou + #-----------------------------------------------------------# iou = box_iou(pred_box[b], true_box) - # 13,13,3 + #-----------------------------------------------------------# + # best_iou 13,13,3 每个特征点与真实框的最大重合程度 + #-----------------------------------------------------------# best_iou = K.max(iou, axis=-1) - # 如果某些预测框和真实框的重合程度大于0.5,则忽略。 + #-----------------------------------------------------------# + # 判断预测框和真实框的最大iou小于ignore_thresh + # 则认为该预测框没有与之对应的真实框 + # 该操作的目的是: + # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 + # 不适合当作负样本,所以忽略掉。 + #-----------------------------------------------------------# ignore_mask = ignore_mask.write(b, K.cast(best_iou 卷积 + 标准化 + 激活函数 # DarknetConv2D + BatchNormalization + LeakyReLU #---------------------------------------------------# def DarknetConv2D_BN_Leaky(*args, **kwargs): @@ -35,7 +39,7 @@ def DarknetConv2D_BN_Leaky(*args, **kwargs): LeakyReLU(alpha=0.1)) #---------------------------------------------------# -# 特征层->最后的输出 +# 进行五次卷积 #---------------------------------------------------# def make_five_convs(x, num_filters): # 五次卷积 @@ -47,14 +51,19 @@ def make_five_convs(x, num_filters): return x #---------------------------------------------------# -# 特征层->最后的输出 +# Panet网络的构建,并且获得预测结果 #---------------------------------------------------# def yolo_body(inputs, num_anchors, num_classes): - # 生成darknet53的主干模型 + #---------------------------------------------------# + # 生成CSPdarknet53的主干模型 + # 获得三个有效特征层,他们的shape分别是: + # 52,52,256 + # 26,26,512 + # 13,13,1024 + #---------------------------------------------------# feat1,feat2,feat3 = darknet_body(inputs) - # 第一个特征层 - # y1=(batch_size,13,13,3,85) + # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512 P5 = DarknetConv2D_BN_Leaky(512, (1,1))(feat3) P5 = DarknetConv2D_BN_Leaky(1024, (3,3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5) @@ -67,38 +76,60 @@ def yolo_body(inputs, num_anchors, num_classes): P5 = DarknetConv2D_BN_Leaky(1024, (3,3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5) + # 13,13,512 -> 13,13,256 -> 26,26,256 P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1,1)), UpSampling2D(2))(P5) - + # 26,26,512 -> 26,26,256 P4 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2) + # 26,26,256 + 26,26,256 -> 26,26,512 P4 = Concatenate()([P4, P5_upsample]) + + # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 P4 = make_five_convs(P4,256) + # 26,26,256 -> 26,26,128 -> 52,52,128 P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P4) - + # 52,52,256 -> 52,52,128 P3 = DarknetConv2D_BN_Leaky(128, (1,1))(feat1) + # 52,52,128 + 52,52,128 -> 52,52,256 P3 = Concatenate()([P3, P4_upsample]) - P3 = make_five_convs(P3,128) + # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 + P3 = make_five_convs(P3,128) + + #---------------------------------------------------# + # 第三个特征层 + # y3=(batch_size,52,52,3,85) + #---------------------------------------------------# P3_output = DarknetConv2D_BN_Leaky(256, (3,3))(P3) P3_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P3_output) - #26,26 output + # 52,52,128 -> 26,26,256 P3_downsample = ZeroPadding2D(((1,0),(1,0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(256, (3,3), strides=(2,2))(P3_downsample) + # 26,26,256 + 26,26,256 -> 26,26,512 P4 = Concatenate()([P3_downsample, P4]) + # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 P4 = make_five_convs(P4,256) + #---------------------------------------------------# + # 第二个特征层 + # y2=(batch_size,26,26,3,85) + #---------------------------------------------------# P4_output = DarknetConv2D_BN_Leaky(512, (3,3))(P4) P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output) - - #13,13 output + # 26,26,256 -> 13,13,512 P4_downsample = ZeroPadding2D(((1,0),(1,0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3,3), strides=(2,2))(P4_downsample) + # 13,13,512 + 13,13,512 -> 13,13,1024 P5 = Concatenate()([P4_downsample, P5]) + # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 P5 = make_five_convs(P5,512) - + #---------------------------------------------------# + # 第一个特征层 + # y1=(batch_size,13,13,3,85) + #---------------------------------------------------# P5_output = DarknetConv2D_BN_Leaky(1024, (3,3))(P5) P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output) @@ -109,12 +140,16 @@ def yolo_body(inputs, num_anchors, num_classes): #---------------------------------------------------# def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) - # [1, 1, 1, num_anchors, 2] + #---------------------------------------------------# + # [1, 1, 1, num_anchors, 2] + #---------------------------------------------------# anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) - # 获得x,y的网格 - # (13,13, 1, 2) - grid_shape = K.shape(feats)[1:3] # height, width + #---------------------------------------------------# + # 获得x,y的网格 + # (13, 13, 1, 2) + #---------------------------------------------------# + grid_shape = K.shape(feats)[1:3] grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), @@ -122,18 +157,29 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) - # (batch_size,13,13,3,85) + #---------------------------------------------------# + # 将预测结果调整成(batch_size,13,13,3,85) + # 85可拆分成4 + 1 + 80 + # 4代表的是中心宽高的调整参数 + # 1代表的是框的置信度 + # 80代表的是种类的置信度 + #---------------------------------------------------# feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) - # 将预测值调成真实值 - # box_xy对应框的中心点 - # box_wh对应框的宽和高 + #---------------------------------------------------# + # 将预测值调成真实值 + # box_xy对应框的中心点 + # box_wh对应框的宽和高 + #---------------------------------------------------# box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) - # 在计算loss的时候返回如下参数 + #---------------------------------------------------------------------# + # 在计算loss的时候返回grid, feats, box_xy, box_wh + # 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs + #---------------------------------------------------------------------# if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs @@ -142,6 +188,9 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): # 对box进行调整,使其符合真实图片的样子 #---------------------------------------------------# def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): + #-----------------------------------------------------------------# + # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 + #-----------------------------------------------------------------# box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] @@ -149,6 +198,10 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): image_shape = K.cast(image_shape, K.dtype(box_yx)) new_shape = K.round(image_shape * K.min(input_shape/image_shape)) + #-----------------------------------------------------------------# + # 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况 + # new_shape指的是宽高缩放情况 + #-----------------------------------------------------------------# offset = (input_shape-new_shape)/2./input_shape scale = input_shape/new_shape @@ -171,14 +224,24 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): # 获取每个box和它的得分 #---------------------------------------------------# def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): - # 将预测值调成真实值 - # box_xy对应框的中心点 - # box_wh对应框的宽和高 - # -1,13,13,3,2; -1,13,13,3,2; -1,13,13,3,1; -1,13,13,3,80 + #-----------------------------------------------------------------# + # 将预测值调成真实值 + # box_xy : -1,13,13,3,2; + # box_wh : -1,13,13,3,2; + # box_confidence : -1,13,13,3,1; + # box_class_probs : -1,13,13,3,80; + #-----------------------------------------------------------------# box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape) - # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax + #-----------------------------------------------------------------# + # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 + # 因此生成的box_xy, box_wh是相对于有灰条的图像的 + # 我们需要对齐进行修改,去除灰条的部分。 + # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax + #-----------------------------------------------------------------# boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) - # 获得得分和box + #-----------------------------------------------------------------# + # 获得最终得分和框的位置 + #-----------------------------------------------------------------# boxes = K.reshape(boxes, [-1, 4]) box_scores = box_confidence * box_class_probs box_scores = K.reshape(box_scores, [-1, num_classes]) @@ -194,42 +257,63 @@ def yolo_eval(yolo_outputs, max_boxes=20, score_threshold=.6, iou_threshold=.5): - # 获得特征层的数量 + #---------------------------------------------------# + # 获得特征层的数量,有效特征层的数量为3 + #---------------------------------------------------# num_layers = len(yolo_outputs) - # 特征层1对应的anchor是678 - # 特征层2对应的anchor是345 - # 特征层3对应的anchor是012 + #-----------------------------------------------------------# + # 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401] + # 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146] + # 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28] + #-----------------------------------------------------------# anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] + #-----------------------------------------------------------# + # 这里获得的是输入图片的大小,一般是416x416 + #-----------------------------------------------------------# input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] - # 对每个特征层进行处理 + #-----------------------------------------------------------# + # 对每个特征层进行处理 + #-----------------------------------------------------------# for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) - # 将每个特征层的结果进行堆叠 + #-----------------------------------------------------------# + # 将每个特征层的结果进行堆叠 + #-----------------------------------------------------------# boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) + #-----------------------------------------------------------# + # 判断得分是否大于score_threshold + #-----------------------------------------------------------# mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): - # 取出所有box_scores >= score_threshold的框,和成绩 + #-----------------------------------------------------------# + # 取出所有box_scores >= score_threshold的框,和成绩 + #-----------------------------------------------------------# class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) - # 非极大抑制,去掉box重合程度高的那一些 + #-----------------------------------------------------------# + # 非极大抑制 + # 保留一定区域内得分最大的框 + #-----------------------------------------------------------# nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) - # 获取非极大抑制后的结果 - # 下列三个分别是 - # 框的位置,得分与种类 + #-----------------------------------------------------------# + # 获取非极大抑制后的结果 + # 下列三个分别是 + # 框的位置,得分与种类 + #-----------------------------------------------------------# class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c diff --git a/predict.py b/predict.py index 5ff4e03..cbb247c 100644 --- a/predict.py +++ b/predict.py @@ -1,6 +1,16 @@ -from yolo import YOLO +''' +predict.py有几个注意点 +1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。 +2、如果想要保存,利用r_image.save("img.jpg")即可保存。 +3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。 +4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。 +''' +from keras.layers import Input from PIL import Image +from nets.yolo4 import yolo_body +from yolo import YOLO + yolo = YOLO() while True: diff --git a/test.py b/test.py index ef3b2ef..f873741 100644 --- a/test.py +++ b/test.py @@ -3,11 +3,14 @@ # map测试请看get_dr_txt.py、get_gt_txt.py # 和get_map.py #--------------------------------------------# -from nets.yolo4 import yolo_body from keras.layers import Input -inputs = Input([416,416,3]) -model = yolo_body(inputs,3,80) -model.summary() -for i,layer in enumerate(model.layers): - print(i,layer.name) +from nets.yolo4 import yolo_body + +if __name__ == "__main__": + inputs = Input([416, 416, 3]) + model = yolo_body(inputs, 3, 80) + model.summary() + + # for i,layer in enumerate(model.layers): + # print(i,layer.name) diff --git a/train.py b/train.py index 948e1c3..97d7991 100644 --- a/train.py +++ b/train.py @@ -1,14 +1,17 @@ +import keras.backend as K import numpy as np import tensorflow as tf -import keras.backend as K +from keras.backend.tensorflow_backend import set_session +from keras.callbacks import (EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, + TensorBoard) from keras.layers import Input, Lambda from keras.models import Model from keras.optimizers import Adam -from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping -from nets.yolo4 import yolo_body + from nets.loss import yolo_loss -from keras.backend.tensorflow_backend import set_session -from utils.utils import get_random_data,get_random_data_with_Mosaic,rand,WarmUpCosineDecayScheduler +from nets.yolo4 import yolo_body +from utils.utils import (WarmUpCosineDecayScheduler, get_random_data, + get_random_data_with_Mosaic, rand) #---------------------------------------------------# @@ -31,8 +34,7 @@ def get_anchors(anchors_path): #---------------------------------------------------# # 训练数据生成器 #---------------------------------------------------# -def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False): - '''data generator for fit_generator''' +def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, mosaic=False, random=True): n = len(annotation_lines) i = 0 flag = True @@ -47,11 +49,11 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class image, box = get_random_data_with_Mosaic(annotation_lines[i:i+4], input_shape) i = (i+1) % n else: - image, box = get_random_data(annotation_lines[i], input_shape) + image, box = get_random_data(annotation_lines[i], input_shape, random=random) i = (i+1) % n flag = bool(1-flag) else: - image, box = get_random_data(annotation_lines[i], input_shape) + image, box = get_random_data(annotation_lines[i], input_shape, random=random) i = (i+1) % n image_data.append(image) box_data.append(box) @@ -60,7 +62,6 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) yield [image_data, *y_true], np.zeros(batch_size) - #---------------------------------------------------# # 读入xml文件,并输出y_true #---------------------------------------------------# @@ -68,80 +69,130 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): assert (true_boxes[..., 4] [1,9,2] + #-----------------------------------------------------------# anchors = np.expand_dims(anchors, 0) anchor_maxes = anchors / 2. anchor_mins = -anchor_maxes - # 长宽要大于0才有效 + + #-----------------------------------------------------------# + # 长宽要大于0才有效 + #-----------------------------------------------------------# valid_mask = boxes_wh[..., 0]>0 for b in range(m): # 对每一张图进行处理 wh = boxes_wh[b, valid_mask[b]] if len(wh)==0: continue - # [n,1,2] + #-----------------------------------------------------------# + # [n,2] -> [n,1,2] + #-----------------------------------------------------------# wh = np.expand_dims(wh, -2) box_maxes = wh / 2. box_mins = -box_maxes - # 计算真实框和哪个先验框最契合 + #-----------------------------------------------------------# + # 计算所有真实框和先验框的交并比 + # intersect_area [n,9] + # box_area [n,1] + # anchor_area [1,9] + # iou [n,9] + #-----------------------------------------------------------# intersect_mins = np.maximum(box_mins, anchor_mins) intersect_maxes = np.minimum(box_maxes, anchor_maxes) intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] + box_area = wh[..., 0] * wh[..., 1] anchor_area = anchors[..., 0] * anchors[..., 1] + iou = intersect_area / (box_area + anchor_area - intersect_area) - # 维度是(n) 感谢 消尽不死鸟 的提醒 + #-----------------------------------------------------------# + # 维度是[n,] 感谢 消尽不死鸟 的提醒 + #-----------------------------------------------------------# best_anchor = np.argmax(iou, axis=-1) for t, n in enumerate(best_anchor): + #-----------------------------------------------------------# + # 找到每个真实框所属的特征层 + #-----------------------------------------------------------# for l in range(num_layers): if n in anchor_mask[l]: - # floor用于向下取整 - i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32') - j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32') - # 找到真实框在特征层l中第b副图像对应的位置 + #-----------------------------------------------------------# + # floor用于向下取整,找到真实框所属的特征层对应的x、y轴坐标 + #-----------------------------------------------------------# + i = np.floor(true_boxes[b,t,0] * grid_shapes[l][1]).astype('int32') + j = np.floor(true_boxes[b,t,1] * grid_shapes[l][0]).astype('int32') + #-----------------------------------------------------------# + # k指的的当前这个特征点的第k个先验框 + #-----------------------------------------------------------# k = anchor_mask[l].index(n) - c = true_boxes[b,t, 4].astype('int32') - y_true[l][b, j, i, k, 0:4] = true_boxes[b,t, 0:4] + #-----------------------------------------------------------# + # c指的是当前这个真实框的种类 + #-----------------------------------------------------------# + c = true_boxes[b, t, 4].astype('int32') + #-----------------------------------------------------------# + # y_true的shape为(m,13,13,3,85)(m,26,26,3,85)(m,52,52,3,85) + # 最后的85可以拆分成4+1+80,4代表的是框的中心与宽高、 + # 1代表的是置信度、80代表的是种类 + #-----------------------------------------------------------# + y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4] y_true[l][b, j, i, k, 4] = 1 y_true[l][b, j, i, k, 5+c] = 1 return y_true - #----------------------------------------------------# # 检测精度mAP和pr曲线计算参考视频 # https://www.bilibili.com/video/BV1zE411u7Vw #----------------------------------------------------# if __name__ == "__main__": - # 标签的位置 + #----------------------------------------------------# + # 获得图片路径和标签 + #----------------------------------------------------# annotation_path = '2007_train.txt' - # 获取classes和anchor的位置 + #------------------------------------------------------# + # 训练后的模型保存的位置,保存在logs文件夹里面 + #------------------------------------------------------# + log_dir = 'logs/' + #----------------------------------------------------# + # classes和anchor的路径,非常重要 + # 训练前一定要修改classes_path,使其对应自己的数据集 + #----------------------------------------------------# classes_path = 'model_data/voc_classes.txt' anchors_path = 'model_data/yolo_anchors.txt' #------------------------------------------------------# @@ -150,58 +201,81 @@ if __name__ == "__main__": # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# weights_path = 'model_data/yolo4_weight.h5' - # 获得classes和anchor + #------------------------------------------------------# + # 训练用图片大小 + # 一般在416x416和608x608选择 + #------------------------------------------------------# + input_shape = (416,416) + #------------------------------------------------------# + # 是否对损失进行归一化 + #------------------------------------------------------# + normalize = True + + #----------------------------------------------------# + # 获取classes和anchor + #----------------------------------------------------# class_names = get_classes(classes_path) anchors = get_anchors(anchors_path) - # 一共有多少类 + #------------------------------------------------------# + # 一共有多少类和多少先验框 + #------------------------------------------------------# num_classes = len(class_names) num_anchors = len(anchors) - # 训练后的模型保存的位置 - log_dir = 'logs/' - # 输入的shape大小 - # 显存比较小可以使用416x416 - # 现存比较大可以使用608x608 - input_shape = (416,416) + #------------------------------------------------------# + # Yolov4的tricks应用 + # mosaic 马赛克数据增强 True or False + # Cosine_scheduler 余弦退火学习率 True or False + # label_smoothing 标签平滑 0.01以下一般 如0.01、0.005 + #------------------------------------------------------# mosaic = True Cosine_scheduler = False label_smoothing = 0 - # 清除session K.clear_session() - - # 输入的图像为 + #------------------------------------------------------# + # 创建yolo模型 + #------------------------------------------------------# image_input = Input(shape=(None, None, 3)) h, w = input_shape - - # 创建yolo模型 print('Create YOLOv4 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) model_body = yolo_body(image_input, num_anchors//3, num_classes) - # 载入预训练权重 + #------------------------------------------------------# + # 载入预训练权重 + #------------------------------------------------------# print('Load weights {}.'.format(weights_path)) model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) - # y_true为13,13,3,85 - # 26,26,3,85 - # 52,52,3,85 + #------------------------------------------------------# + # 在这个地方设置损失,将网络的输出结果传入loss函数 + # 把整个模型的输出作为loss + #------------------------------------------------------# y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ num_anchors//3, num_classes+5)) for l in range(3)] - - # 输入为*model_body.input, *y_true - # 输出为model_loss loss_input = [*model_body.output, *y_true] model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', - arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing})(loss_input) + arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, + 'label_smoothing': label_smoothing, 'normalize': normalize})(loss_input) model = Model([model_body.input, *y_true], model_loss) - # 训练参数设置 + #-------------------------------------------------------------------------------# + # 训练参数的设置 + # logging表示tensorboard的保存地址 + # checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次 + # reduce_lr用于设置学习率下降的方式 + # early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛 + #-------------------------------------------------------------------------------# logging = TensorBoard(log_dir=log_dir) checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=False, period=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) - # 0.1用于验证,0.9用于训练 + #----------------------------------------------------------------------# + # 验证集的划分在train.py代码里面进行 + # 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。 + # 当前划分方式下,验证集和训练集的比例为1:9 + #----------------------------------------------------------------------# val_split = 0.1 with open(annotation_path) as f: lines = f.readlines() @@ -211,6 +285,10 @@ if __name__ == "__main__": num_val = int(len(lines)*val_split) num_train = len(lines) - num_val + freeze_layers = 249 + for i in range(freeze_layers): model_body.layers[i].trainable = False + print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) + #------------------------------------------------------# # 主干特征提取网络特征通用,冻结训练可以加快训练速度 # 也可以在训练初期防止权值被破坏。 @@ -219,18 +297,12 @@ if __name__ == "__main__": # Epoch总训练世代 # 提示OOM或者显存不足请调小Batch_size #------------------------------------------------------# - freeze_layers = 249 - for i in range(freeze_layers): model_body.layers[i].trainable = False - print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) - - # 调整非主干模型first if True: Init_epoch = 0 Freeze_epoch = 50 - # batch_size大小,每次喂入多少数据 batch_size = 8 - # 最大学习率 learning_rate_base = 1e-3 + if Cosine_scheduler: # 预热期 warmup_epoch = int((Freeze_epoch-Init_epoch)*0.2) @@ -252,9 +324,9 @@ if __name__ == "__main__": model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) - model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), + model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True), steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), + validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False), validation_steps=max(1, num_val//batch_size), epochs=Freeze_epoch, initial_epoch=Init_epoch, @@ -263,15 +335,12 @@ if __name__ == "__main__": for i in range(freeze_layers): model_body.layers[i].trainable = True - # 解冻后训练 if True: Freeze_epoch = 50 Epoch = 100 - # batch_size大小,每次喂入多少数据 batch_size = 2 - - # 最大学习率 learning_rate_base = 1e-4 + if Cosine_scheduler: # 预热期 warmup_epoch = int((Epoch-Freeze_epoch)*0.2) @@ -293,9 +362,9 @@ if __name__ == "__main__": model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) - model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), + model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True), steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), + validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False), validation_steps=max(1, num_val//batch_size), epochs=Epoch, initial_epoch=Freeze_epoch, diff --git a/utils/utils.py b/utils/utils.py index ed334a4..6be23a8 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -1,12 +1,12 @@ -"""Miscellaneous utility functions.""" +from functools import reduce -import numpy as np +import cv2 import keras import keras.backend as K -from functools import reduce +import numpy as np +from matplotlib.colors import hsv_to_rgb, rgb_to_hsv from PIL import Image -from matplotlib.colors import rgb_to_hsv, hsv_to_rgb -import cv2 + def compose(*funcs): if funcs: @@ -101,8 +101,8 @@ def merge_bboxes(bboxes, cutx, cuty): def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5): '''random preprocessing for real-time data augmentation''' h, w = input_shape - min_offset_x = 0.4 - min_offset_y = 0.4 + min_offset_x = 0.3 + min_offset_y = 0.3 scale_low = 1-min(min_offset_x,min_offset_y) scale_high = scale_low+0.2 @@ -112,6 +112,7 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue place_x = [0,0,int(w*min_offset_x),int(w*min_offset_x)] place_y = [0,int(h*min_offset_y),int(h*min_offset_y),0] + for line in annotation_line: # 每一行进行分割 line_content = line.split() @@ -163,7 +164,6 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue new_image.paste(image, (dx, dy)) image_data = np.array(new_image)/255 - index = index + 1 box_data = [] # 对box进行重新处理 @@ -183,8 +183,6 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue image_datas.append(image_data) box_datas.append(box_data) - - # 将图片分割,放在一起 cutx = np.random.randint(int(w*min_offset_x), int(w*(1 - min_offset_x))) cuty = np.random.randint(int(h*min_offset_y), int(h*(1 - min_offset_y))) @@ -206,7 +204,7 @@ def get_random_data_with_Mosaic(annotation_line, input_shape, max_boxes=100, hue return new_image, box_data -def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=1.5, val=1.5): +def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True): '''random preprocessing for real-time data augmentation''' line = annotation_line.split() image = Image.open(line[0]) @@ -214,6 +212,36 @@ def get_random_data(annotation_line, input_shape, max_boxes=100, jitter=.3, hue= h, w = input_shape box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) + if not random: + # resize image + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + dx = (w-nw)//2 + dy = (h-nh)//2 + + image = image.resize((nw,nh), Image.BICUBIC) + new_image = Image.new('RGB', (w,h), (128,128,128)) + new_image.paste(image, (dx, dy)) + image_data = np.array(new_image, np.float32)/255 + + # correct boxes + box_data = np.zeros((max_boxes,5)) + if len(box)>0: + np.random.shuffle(box) + box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx + box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy + box[:, 0:2][box[:, 0:2]<0] = 0 + box[:, 2][box[:, 2]>w] = w + box[:, 3][box[:, 3]>h] = h + box_w = box[:, 2] - box[:, 0] + box_h = box[:, 3] - box[:, 1] + box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box + if len(box)>max_boxes: box = box[:max_boxes] + box_data[:len(box)] = box + + return image_data, box_data + # 对图像进行缩放并且进行长和宽的扭曲 new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter) scale = rand(.25, 2) diff --git a/video.py b/video.py index cf64691..6488b3d 100644 --- a/video.py +++ b/video.py @@ -1,15 +1,24 @@ #-------------------------------------# -# 调用摄像头检测 +# 调用摄像头或者视频进行检测 +# 调用摄像头直接运行即可 +# 调用视频可以将cv2.VideoCapture()指定路径 +# 视频的保存并不难,可以百度一下看看 #-------------------------------------# +import time + +import cv2 +import numpy as np from keras.layers import Input -from yolo import YOLO from PIL import Image -import numpy as np -import cv2 -import time + +from yolo import YOLO + yolo = YOLO() -# 调用摄像头 -capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4") +#-------------------------------------# +# 调用摄像头 +# capture=cv2.VideoCapture("1.mp4") +#-------------------------------------# +capture=cv2.VideoCapture(0) fps = 0.0 while(True): @@ -20,10 +29,8 @@ while(True): frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) # 转变成Image frame = Image.fromarray(np.uint8(frame)) - # 进行检测 frame = np.array(yolo.detect_image(frame)) - # RGBtoBGR满足opencv显示格式 frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR) @@ -37,4 +44,5 @@ while(True): capture.release() break -yolo.close_session() +yolo.close_session() + diff --git a/vision_for_anchors.py b/vision_for_anchors.py index 894f668..ceb5bab 100644 --- a/vision_for_anchors.py +++ b/vision_for_anchors.py @@ -1,5 +1,7 @@ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + + def sigmoid(x): s = 1 / (1 + np.exp(-x)) return s @@ -78,4 +80,4 @@ def yolo_head(feats, anchors, num_classes): # feat = np.random.normal(0,0.5,[4,13,13,75]) anchors = [[142, 110],[192, 243],[459, 401]] -yolo_head(feat,anchors,20) \ No newline at end of file +yolo_head(feat,anchors,20) diff --git a/voc_annotation.py b/voc_annotation.py index aad0657..ca0f88d 100644 --- a/voc_annotation.py +++ b/voc_annotation.py @@ -1,3 +1,8 @@ +#---------------------------------------------# +# 运行前一定要修改classes +# 如果生成的2007_train.txt里面没有目标信息 +# 那么就是因为classes没有设定正确 +#---------------------------------------------# import xml.etree.ElementTree as ET from os import getcwd diff --git a/yolo.py b/yolo.py index 0ac6e1e..f688547 100644 --- a/yolo.py +++ b/yolo.py @@ -1,17 +1,23 @@ -import os -import numpy as np -import copy import colorsys +import copy +import os from timeit import default_timer as timer + +import numpy as np from keras import backend as K -from keras.models import load_model from keras.layers import Input -from PIL import Image, ImageFont, ImageDraw -from nets.yolo4 import yolo_body,yolo_eval +from keras.models import load_model +from PIL import Image, ImageDraw, ImageFont + +from nets.yolo4 import yolo_body, yolo_eval from utils.utils import letterbox_image + + #--------------------------------------------# # 使用自己训练好的模型预测需要修改2个参数 # model_path和classes_path都需要修改! +# 如果出现shape不匹配,一定要注意 +# 训练时的model_path和classes_path参数的修改 #--------------------------------------------# class YOLO(object): _defaults = { @@ -64,18 +70,22 @@ class YOLO(object): return np.array(anchors).reshape(-1, 2) #---------------------------------------------------# - # 获得所有的分类 + # 载入模型 #---------------------------------------------------# def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' - # 计算anchor数量 + #---------------------------------------------------# + # 计算先验框的数量和种类的数量 + #---------------------------------------------------# num_anchors = len(self.anchors) num_classes = len(self.class_names) - # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 - # 否则先构建模型再载入 + #---------------------------------------------------------# + # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 + # 否则先构建模型再载入 + #---------------------------------------------------------# try: self.yolo_model = load_model(model_path, compile=False) except: @@ -103,6 +113,10 @@ class YOLO(object): self.input_image_shape = K.placeholder(shape=(2, )) + #---------------------------------------------------------# + # 在yolo_eval函数中,我们会对预测结果进行后处理 + # 后处理的内容包括,解码、非极大抑制、门限筛选等 + #---------------------------------------------------------# boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, num_classes, self.input_image_shape, max_boxes = self.max_boxes, score_threshold = self.score, iou_threshold = self.iou) @@ -113,30 +127,37 @@ class YOLO(object): #---------------------------------------------------# def detect_image(self, image): start = timer() - - # 调整图片使其符合输入要求 + #---------------------------------------------------------# + # 给图像增加灰条,实现不失真的resize + #---------------------------------------------------------# new_image_size = (self.model_image_size[1],self.model_image_size[0]) boxed_image = letterbox_image(image, new_image_size) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. - image_data = np.expand_dims(image_data, 0) # Add batch dimension. - - # 预测结果 + #---------------------------------------------------------# + # 添加上batch_size维度 + #---------------------------------------------------------# + image_data = np.expand_dims(image_data, 0) + + #---------------------------------------------------------# + # 将图像输入网络当中进行预测! + #---------------------------------------------------------# out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) + K.learning_phase(): 0}) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) - # 设置字体 + + #---------------------------------------------------------# + # 设置字体 + #---------------------------------------------------------# font = ImageFont.truetype(font='font/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) - thickness = (image.size[0] + image.size[1]) // 300 - small_pic=[] + thickness = max((image.size[0] + image.size[1]) // 300, 1) for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] @@ -159,7 +180,7 @@ class YOLO(object): draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') - print(label) + print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) -- GitLab