未验证 提交 a60e3df5 编写于 作者: B Bubbliiiing 提交者: GitHub

Add files via upload

上级 49081f53
......@@ -28,17 +28,19 @@ class FPS_YOLO(YOLO):
# 调整图片使其符合输入要求
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
photo = np.array(crop_img,dtype = np.float32)
photo /= 255.0
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
photo = photo.astype(np.float32)
images = []
images.append(photo)
images = np.asarray(images)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images = [photo]
with torch.no_grad():
images = torch.from_numpy(images)
images = torch.from_numpy(np.asarray(images))
if self.cuda:
images = images.cuda()
outputs = self.net(images)
......
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import os
import random
random.seed(0)
xmlfilepath=r'./VOCdevkit/VOC2007/Annotations'
saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/"
#----------------------------------------------------------------------#
# 想要增加测试集修改trainval_percent
# train_percent不需要修改
#----------------------------------------------------------------------#
trainval_percent=1
train_percent=1
......
import torch
import math
import numpy as np
import torch
def box_ciou(b1, b2):
"""
输入为:
......@@ -53,4 +56,4 @@ def box_ciou(b1, b2):
box1 = torch.from_numpy(np.array([[25,25,40,40]])).type(torch.FloatTensor)
box2 = torch.from_numpy(np.array([[25,25,30,40]])).type(torch.FloatTensor)
print(box_ciou(box1,box2))
\ No newline at end of file
print(box_ciou(box1,box2))
import colorsys
import json
import os
import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from tqdm import tqdm
from nets.yolo4 import YoloBody
from utils.utils import (DecodeBox, bbox_iou, diou_non_max_suppression,
letterbox_image, non_max_suppression,
yolo_correct_boxes)
from yolo import YOLO
coco_classes = {'person': 1, 'bicycle': 2, 'car': 3, 'motorbike': 4, 'aeroplane': 5,
'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11,
'': 83, 'stop sign': 13, 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24,
'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39,
'baseball glove': 40, 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 'bottle': 44,
'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52,
'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58,
'pizza': 59, 'donut': 60, 'cake': 61, 'chair': 62, 'sofa': 63, 'pottedplant': 64, 'bed': 65,
'diningtable': 67, 'toilet': 70, 'tvmonitor': 72, 'laptop': 73, 'mouse': 74, 'remote': 75,
'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88,
'hair drier': 89, 'toothbrush': 90
}
class mAP_YOLO(YOLO):
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def detect_image(self, image_id, image, results):
self.confidence = 0.001
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images = [photo]
with torch.no_grad():
images = torch.from_numpy(np.asarray(images))
if self.cuda:
images = images.cuda()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs = self.net(images)
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return image
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
for i, c in enumerate(top_label):
result = {}
predicted_class = self.class_names[c]
top, left, bottom, right = boxes[i]
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
result["image_id"] = int(image_id)
result["category_id"] = coco_classes[predicted_class]
result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)]
result["score"] = float(top_conf[i])
results.append(result)
return results
yolo = mAP_YOLO()
jpg_names = os.listdir("./coco_dataset/val2017")
with open("./coco_dataset/eval_results.json","w") as f:
results = []
for jpg_name in tqdm(jpg_names):
if jpg_name.endswith("jpg"):
image_path = "./coco_dataset/val2017/" + jpg_name
image = Image.open(image_path)
# 开启后在之后计算mAP可以可视化
results = yolo.detect_image(jpg_name.split(".")[0],image,results)
json.dump(results,f)
......@@ -3,19 +3,24 @@
# 具体教程请查看Bilibili
# Bubbliiiing
#-------------------------------------#
import cv2
import numpy as np
import colorsys
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from yolo import YOLO
from nets.yolo4 import YoloBody
from PIL import Image,ImageFont, ImageDraw
from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
from tqdm import tqdm
from nets.yolo4 import YoloBody
from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
non_max_suppression, yolo_correct_boxes)
from yolo import YOLO
class mAP_Yolo(YOLO):
#---------------------------------------------------#
# 检测图片
......@@ -26,42 +31,61 @@ class mAP_Yolo(YOLO):
f = open("./input/detection-results/"+image_id+".txt","w")
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
photo = np.array(crop_img,dtype = np.float32)
photo /= 255.0
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
photo = photo.astype(np.float32)
images = []
images.append(photo)
images = np.asarray(images)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images = [photo]
with torch.no_grad():
images = torch.from_numpy(images)
images = torch.from_numpy(np.asarray(images))
if self.cuda:
images = images.cuda()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs = self.net(images)
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return image
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return image
top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
# 去掉灰条
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
for i, c in enumerate(top_label):
predicted_class = self.class_names[c]
......
import argparse
import glob
import json
import math
import operator
import os
import shutil
import operator
import sys
import argparse
import math
import numpy as np
#----------------------------------------------------#
# 用于计算mAP
# 代码克隆自https://github.com/Cartucho/mAP
......
import numpy as np
import xml.etree.ElementTree as ET
import glob
import random
import xml.etree.ElementTree as ET
import numpy as np
def cas_iou(box,cluster):
x = np.minimum(cluster[:,0],box[0])
......@@ -61,6 +63,9 @@ def load_data(path):
tree = ET.parse(xml_file)
height = int(tree.findtext('./size/height'))
width = int(tree.findtext('./size/width'))
if height<=0 or width<=0:
continue
# 对于每一个目标都获得它的宽高
for obj in tree.iter('object'):
xmin = int(float(obj.findtext('bndbox/xmin'))) / width
......@@ -103,4 +108,4 @@ if __name__ == '__main__':
else:
x_y = ", %d,%d" % (data[i][0], data[i][1])
f.write(x_y)
f.close()
\ No newline at end of file
f.close()
import torch
import torch.nn.functional as F
import torch.nn as nn
import math
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
#-------------------------------------------------#
# MISH激活函数
#-------------------------------------------------#
......@@ -14,10 +16,10 @@ class Mish(nn.Module):
def forward(self, x):
return x * torch.tanh(F.softplus(x))
#-------------------------------------------------#
# 卷积块
# CONV+BATCHNORM+MISH
#-------------------------------------------------#
#---------------------------------------------------#
# 卷积块 -> 卷积 + 标准化 + 激活函数
# Conv2d + BatchNormalization + Mish
#---------------------------------------------------#
class BasicConv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1):
super(BasicConv, self).__init__()
......@@ -37,7 +39,7 @@ class BasicConv(nn.Module):
# 内部堆叠的残差块
#---------------------------------------------------#
class Resblock(nn.Module):
def __init__(self, channels, hidden_channels=None, residual_activation=nn.Identity()):
def __init__(self, channels, hidden_channels=None):
super(Resblock, self).__init__()
if hidden_channels is None:
......@@ -51,33 +53,52 @@ class Resblock(nn.Module):
def forward(self, x):
return x + self.block(x)
#---------------------------------------------------#
#--------------------------------------------------------------------#
# CSPdarknet的结构块
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
#---------------------------------------------------#
# 首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
# 然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
# 对于整个CSPdarknet的结构块,就是一个大残差块+内部多个小残差块
#--------------------------------------------------------------------#
class Resblock_body(nn.Module):
def __init__(self, in_channels, out_channels, num_blocks, first):
super(Resblock_body, self).__init__()
#----------------------------------------------------------------#
# 利用一个步长为2x2的卷积块进行高和宽的压缩
#----------------------------------------------------------------#
self.downsample_conv = BasicConv(in_channels, out_channels, 3, stride=2)
if first:
#--------------------------------------------------------------------------#
# 然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
#--------------------------------------------------------------------------#
self.split_conv0 = BasicConv(out_channels, out_channels, 1)
#----------------------------------------------------------------#
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
#----------------------------------------------------------------#
self.split_conv1 = BasicConv(out_channels, out_channels, 1)
self.blocks_conv = nn.Sequential(
Resblock(channels=out_channels, hidden_channels=out_channels//2),
BasicConv(out_channels, out_channels, 1)
)
self.concat_conv = BasicConv(out_channels*2, out_channels, 1)
else:
#--------------------------------------------------------------------------#
# 然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
#--------------------------------------------------------------------------#
self.split_conv0 = BasicConv(out_channels, out_channels//2, 1)
self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)
#----------------------------------------------------------------#
# 主干部分会对num_blocks进行循环,循环内部是残差结构。
#----------------------------------------------------------------#
self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)
self.blocks_conv = nn.Sequential(
*[Resblock(out_channels//2) for _ in range(num_blocks)],
BasicConv(out_channels//2, out_channels//2, 1)
)
self.concat_conv = BasicConv(out_channels, out_channels, 1)
def forward(self, x):
......@@ -88,28 +109,44 @@ class Resblock_body(nn.Module):
x1 = self.split_conv1(x)
x1 = self.blocks_conv(x1)
#------------------------------------#
# 将大残差边再堆叠回来
#------------------------------------#
x = torch.cat([x1, x0], dim=1)
#------------------------------------#
# 最后对通道数进行整合
#------------------------------------#
x = self.concat_conv(x)
return x
#---------------------------------------------------#
# CSPdarknet53 的主体部分
# 输入为一张416x416x3的图片
# 输出为三个有效特征层
#---------------------------------------------------#
class CSPDarkNet(nn.Module):
def __init__(self, layers):
super(CSPDarkNet, self).__init__()
self.inplanes = 32
# 416,416,3 -> 416,416,32
self.conv1 = BasicConv(3, self.inplanes, kernel_size=3, stride=1)
self.feature_channels = [64, 128, 256, 512, 1024]
self.stages = nn.ModuleList([
# 416,416,32 -> 208,208,64
Resblock_body(self.inplanes, self.feature_channels[0], layers[0], first=True),
# 208,208,64 -> 104,104,128
Resblock_body(self.feature_channels[0], self.feature_channels[1], layers[1], first=False),
# 104,104,128 -> 52,52,256
Resblock_body(self.feature_channels[1], self.feature_channels[2], layers[2], first=False),
# 52,52,256 -> 26,26,512
Resblock_body(self.feature_channels[2], self.feature_channels[3], layers[3], first=False),
# 26,26,512 -> 13,13,1024
Resblock_body(self.feature_channels[3], self.feature_channels[4], layers[4], first=False)
])
self.num_features = 1
# 进行权值初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
......
from collections import OrderedDict
import torch
import torch.nn as nn
from collections import OrderedDict
from nets.CSPdarknet import darknet53
def conv2d(filter_in, filter_out, kernel_size, stride=1):
pad = (kernel_size - 1) // 2 if kernel_size else 0
return nn.Sequential(OrderedDict([
......@@ -83,7 +86,13 @@ def yolo_head(filters_list, in_filters):
class YoloBody(nn.Module):
def __init__(self, num_anchors, num_classes):
super(YoloBody, self).__init__()
# backbone
#---------------------------------------------------#
# 生成CSPdarknet53的主干模型
# 获得三个有效特征层,他们的shape分别是:
# 52,52,256
# 26,26,512
# 13,13,1024
#---------------------------------------------------#
self.backbone = darknet53(None)
self.conv1 = make_three_conv([512,1024],1024)
......@@ -97,20 +106,21 @@ class YoloBody(nn.Module):
self.upsample2 = Upsample(256,128)
self.conv_for_P3 = conv2d(256,128,1)
self.make_five_conv2 = make_five_conv([128, 256],256)
# 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
# 4+1+num_classes
# 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
final_out_filter2 = num_anchors * (5 + num_classes)
self.yolo_head3 = yolo_head([256, final_out_filter2],128)
self.down_sample1 = conv2d(128,256,3,stride=2)
self.make_five_conv3 = make_five_conv([256, 512],512)
# 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
# 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
final_out_filter1 = num_anchors * (5 + num_classes)
self.yolo_head2 = yolo_head([512, final_out_filter1],256)
self.down_sample2 = conv2d(256,512,3,stride=2)
self.make_five_conv4 = make_five_conv([512, 1024],1024)
# 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
final_out_filter0 = num_anchors * (5 + num_classes)
self.yolo_head1 = yolo_head([1024, final_out_filter0],512)
......@@ -120,30 +130,58 @@ class YoloBody(nn.Module):
# backbone
x2, x1, x0 = self.backbone(x)
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048
P5 = self.conv1(x0)
P5 = self.SPP(P5)
# 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5 = self.conv2(P5)
# 13,13,512 -> 13,13,256 -> 26,26,256
P5_upsample = self.upsample1(P5)
# 26,26,512 -> 26,26,256
P4 = self.conv_for_P4(x1)
# 26,26,256 + 26,26,256 -> 26,26,512
P4 = torch.cat([P4,P5_upsample],axis=1)
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4 = self.make_five_conv1(P4)
# 26,26,256 -> 26,26,128 -> 52,52,128
P4_upsample = self.upsample2(P4)
# 52,52,256 -> 52,52,128
P3 = self.conv_for_P3(x2)
# 52,52,128 + 52,52,128 -> 52,52,256
P3 = torch.cat([P3,P4_upsample],axis=1)
# 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
P3 = self.make_five_conv2(P3)
# 52,52,128 -> 26,26,256
P3_downsample = self.down_sample1(P3)
# 26,26,256 + 26,26,256 -> 26,26,512
P4 = torch.cat([P3_downsample,P4],axis=1)
# 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
P4 = self.make_five_conv3(P4)
# 26,26,256 -> 13,13,512
P4_downsample = self.down_sample2(P4)
# 13,13,512 + 13,13,512 -> 13,13,1024
P5 = torch.cat([P4_downsample,P5],axis=1)
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
P5 = self.make_five_conv4(P5)
#---------------------------------------------------#
# 第三个特征层
# y3=(batch_size,75,52,52)
#---------------------------------------------------#
out2 = self.yolo_head3(P3)
#---------------------------------------------------#
# 第二个特征层
# y2=(batch_size,75,26,26)
#---------------------------------------------------#
out1 = self.yolo_head2(P4)
#---------------------------------------------------#
# 第一个特征层
# y1=(batch_size,75,13,13)
#---------------------------------------------------#
out0 = self.yolo_head1(P5)
return out0, out1, out2
......
此差异已折叠。
#-------------------------------------#
# 对单张图片进行预测
#-------------------------------------#
from yolo import YOLO
'''
predict.py有几个注意点
1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
2、如果想要保存,利用r_image.save("img.jpg")即可保存。
3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。
4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
'''
from PIL import Image
from yolo import YOLO
yolo = YOLO()
while True:
......
......@@ -5,6 +5,7 @@
#--------------------------------------------#
import torch
from torchsummary import summary
from nets.CSPdarknet import darknet53
from nets.yolo4 import YoloBody
......
......@@ -2,20 +2,23 @@
# 对数据集进行训练
#-------------------------------------#
import os
import numpy as np
import time
import numpy as np
import torch
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from utils.dataloader import yolo_dataset_collate, YoloDataset
from nets.yolo_training import YOLOLoss,Generator
from nets.yolo4 import YoloBody
from tqdm import tqdm
from nets.yolo4 import YoloBody
from nets.yolo_training import Generator, YOLOLoss
from utils.dataloader import YoloDataset, yolo_dataset_collate
#---------------------------------------------------#
# 获得类和先验框
#---------------------------------------------------#
......@@ -37,10 +40,12 @@ def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
total_loss = 0
val_loss = 0
start_time = time.time()
net.train()
with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration, batch in enumerate(gen):
if iteration >= epoch_size:
......@@ -53,25 +58,38 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
else:
images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
#----------------------#
# 清零梯度
#----------------------#
optimizer.zero_grad()
#----------------------#
# 前向传播
#----------------------#
outputs = net(images)
losses = []
num_pos_all = 0
#----------------------#
# 计算损失
#----------------------#
for i in range(3):
loss_item = yolo_losses[i](outputs[i], targets)
losses.append(loss_item[0])
loss = sum(losses)
loss_item, num_pos = yolo_losses[i](outputs[i], targets)
losses.append(loss_item)
num_pos_all += num_pos
loss = sum(losses) / num_pos_all
#----------------------#
# 反向传播
#----------------------#
loss.backward()
optimizer.step()
total_loss += loss
waste_time = time.time() - start_time
total_loss += loss.item()
pbar.set_postfix(**{'total_loss': total_loss.item() / (iteration + 1),
'lr' : get_lr(optimizer),
'step/s' : waste_time})
pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1),
'lr' : get_lr(optimizer)})
pbar.update(1)
start_time = time.time()
net.eval()
print('Start Validation')
with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
......@@ -90,14 +108,15 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
optimizer.zero_grad()
outputs = net(images_val)
losses = []
num_pos_all = 0
for i in range(3):
loss_item = yolo_losses[i](outputs[i], targets_val)
losses.append(loss_item[0])
loss = sum(losses)
val_loss += loss
pbar.set_postfix(**{'total_loss': val_loss.item() / (iteration + 1)})
loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
losses.append(loss_item)
num_pos_all += num_pos
loss = sum(losses) / num_pos_all
val_loss += loss.item()
pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
pbar.update(1)
net.train()
print('Finish Validation')
print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
......@@ -111,41 +130,58 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
#----------------------------------------------------#
if __name__ == "__main__":
#-------------------------------#
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape = (416,416)
#-------------------------------#
# tricks的使用设置
# 是否使用Cuda
# 没有GPU可以设置成False
#-------------------------------#
Cosine_lr = False
mosaic = True
# 用于设定是否使用cuda
Cuda = True
smoooth_label = 0
#-------------------------------#
# Dataloder的使用
#-------------------------------#
Use_Data_Loader = True
annotation_path = '2007_train.txt'
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize = True
#-------------------------------#
# 获得先验框和类
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape = (416,416)
#----------------------------------------------------#
# classes和anchor的路径,非常重要
# 训练前一定要修改classes_path,使其对应自己的数据集
#----------------------------------------------------#
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/voc_classes.txt'
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
num_classes = len(class_names)
# 创建模型
model = YoloBody(len(anchors[0]),num_classes)
#-------------------------------------------#
# 权值文件的下载请看README
#-------------------------------------------#
#------------------------------------------------------#
# Yolov4的tricks应用
# mosaic 马赛克数据增强 True or False
# Cosine_scheduler 余弦退火学习率 True or False
# label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
#------------------------------------------------------#
mosaic = True
Cosine_lr = False
smoooth_label = 0
#------------------------------------------------------#
# 创建yolo模型
# 训练前一定要修改classes_path和对应的txt文件
#------------------------------------------------------#
model = YoloBody(len(anchors[0]), num_classes)
#------------------------------------------------------#
# 权值文件请看README,百度网盘下载
#------------------------------------------------------#
model_path = "model_data/yolo4_weights.pth"
# 加快模型训练的效率
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_dict = model.state_dict()
......@@ -166,9 +202,17 @@ if __name__ == "__main__":
yolo_losses = []
for i in range(3):
yolo_losses.append(YOLOLoss(np.reshape(anchors,[-1,2]),num_classes, \
(input_shape[1], input_shape[0]), smoooth_label, Cuda))
(input_shape[1], input_shape[0]), smoooth_label, Cuda, normalize))
# 0.1用于验证,0.9用于训练
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path = '2007_train.txt'
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path) as f:
lines = f.readlines()
......@@ -199,17 +243,17 @@ if __name__ == "__main__":
lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
if Use_Data_Loader:
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
else:
gen = Generator(Batch_size, lines[:num_train],
(input_shape[0], input_shape[1])).generate(mosaic = mosaic)
(input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
gen_val = Generator(Batch_size, lines[num_train:],
(input_shape[0], input_shape[1])).generate(mosaic = False)
(input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)
epoch_size = max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
......@@ -236,17 +280,17 @@ if __name__ == "__main__":
lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
if Use_Data_Loader:
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
else:
gen = Generator(Batch_size, lines[:num_train],
(input_shape[0], input_shape[1])).generate(mosaic = mosaic)
(input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
gen_val = Generator(Batch_size, lines[num_train:],
(input_shape[0], input_shape[1])).generate(mosaic = False)
(input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)
epoch_size = max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
......
......@@ -39,9 +39,11 @@ def get_lr(optimizer):
return param_group['lr']
def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda,writer):
global train_tensorboard_step, val_tensorboard_step
total_loss = 0
val_loss = 0
start_time = time.time()
net.train()
with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration, batch in enumerate(gen):
if iteration >= epoch_size:
......@@ -54,28 +56,41 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
else:
images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
#----------------------#
# 清零梯度
#----------------------#
optimizer.zero_grad()
#----------------------#
# 前向传播
#----------------------#
outputs = net(images)
losses = []
num_pos_all = 0
#----------------------#
# 计算损失
#----------------------#
for i in range(3):
loss_item = yolo_losses[i](outputs[i], targets)
losses.append(loss_item[0])
loss = sum(losses)
loss_item, num_pos = yolo_losses[i](outputs[i], targets)
losses.append(loss_item)
num_pos_all += num_pos
loss = sum(losses) / num_pos_all
total_loss += loss.item()
#----------------------#
# 反向传播
#----------------------#
loss.backward()
optimizer.step()
# 将loss写入tensorboard,每一步都写
writer.add_scalar('Train_loss', loss, (epoch*epoch_size + iteration))
total_loss += loss
waste_time = time.time() - start_time
pbar.set_postfix(**{'total_loss': total_loss.item() / (iteration + 1),
'lr' : get_lr(optimizer),
'step/s' : waste_time})
pbar.update(1)
# 将loss写入tensorboard,每一步都写
writer.add_scalar('Train_loss', loss, train_tensorboard_step)
train_tensorboard_step += 1
start_time = time.time()
pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1),
'lr' : get_lr(optimizer)})
pbar.update(1)
# 将loss写入tensorboard,下面注释的是每个世代保存一次
# writer.add_scalar('Train_loss', total_loss/(iteration+1), epoch)
......@@ -97,20 +112,24 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
optimizer.zero_grad()
outputs = net(images_val)
losses = []
num_pos_all = 0
for i in range(3):
loss_item = yolo_losses[i](outputs[i], targets_val)
losses.append(loss_item[0])
loss = sum(losses)
val_loss += loss
loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
losses.append(loss_item)
num_pos_all += num_pos
loss = sum(losses) / num_pos_all
val_loss += loss.item()
# 将loss写入tensorboard, 下面注释的是每一步都写
# writer.add_scalar('Val_loss',val_loss/(epoch_size_val+1), (epoch*epoch_size_val + iteration))
# writer.add_scalar('Val_loss', loss, val_tensorboard_step)
# val_tensorboard_step += 1
pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
pbar.update(1)
pbar.set_postfix(**{'total_loss': val_loss.item() / (iteration + 1)})
pbar.update(1)
net.train()
# 将loss写入tensorboard,每个世代保存一次
writer.add_scalar('Val_loss',val_loss/(epoch_size_val+1), epoch)
writer.add_scalar('Val_loss',val_loss / (epoch_size_val+1), epoch)
print('Finish Validation')
print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
......@@ -121,38 +140,58 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
if __name__ == "__main__":
#-------------------------------#
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape = (416,416)
# 是否使用Cuda
# 没有GPU可以设置成False
#-------------------------------#
# tricks的使用设置
#-------------------------------#
Cosine_lr = False
mosaic = True
# 用于设定是否使用cuda
Cuda = True
smoooth_label = 0
#-------------------------------#
# Dataloder的使用
#-------------------------------#
Use_Data_Loader = True
annotation_path = '2007_train.txt'
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize = True
#-------------------------------#
# 获得先验框和类
# 输入的shape大小
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
#-------------------------------#
input_shape = (416,416)
#----------------------------------------------------#
# classes和anchor的路径,非常重要
# 训练前一定要修改classes_path,使其对应自己的数据集
#----------------------------------------------------#
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/voc_classes.txt'
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
num_classes = len(class_names)
# 创建模型
model = YoloBody(len(anchors[0]),num_classes)
#------------------------------------------------------#
# Yolov4的tricks应用
# mosaic 马赛克数据增强 True or False
# Cosine_scheduler 余弦退火学习率 True or False
# label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
#------------------------------------------------------#
mosaic = True
Cosine_lr = False
smoooth_label = 0
#------------------------------------------------------#
# 创建yolo模型
# 训练前一定要修改classes_path和对应的txt文件
#------------------------------------------------------#
model = YoloBody(len(anchors[0]), num_classes)
#------------------------------------------------------#
# 权值文件请看README,百度网盘下载
#------------------------------------------------------#
model_path = "model_data/yolo4_weights.pth"
# 加快模型训练的效率
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_dict = model.state_dict()
......@@ -173,9 +212,17 @@ if __name__ == "__main__":
yolo_losses = []
for i in range(3):
yolo_losses.append(YOLOLoss(np.reshape(anchors,[-1,2]),num_classes, \
(input_shape[1], input_shape[0]), smoooth_label, Cuda))
(input_shape[1], input_shape[0]), smoooth_label, Cuda, normalize))
# 0.1用于验证,0.9用于训练
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path = '2007_train.txt'
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split = 0.1
with open(annotation_path) as f:
lines = f.readlines()
......@@ -184,7 +231,7 @@ if __name__ == "__main__":
np.random.seed(None)
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val
writer = SummaryWriter(log_dir='logs',flush_secs=60)
if Cuda:
graph_inputs = torch.from_numpy(np.random.rand(1,3,input_shape[0],input_shape[1])).type(torch.FloatTensor).cuda()
......@@ -192,6 +239,16 @@ if __name__ == "__main__":
graph_inputs = torch.from_numpy(np.random.rand(1,3,input_shape[0],input_shape[1])).type(torch.FloatTensor)
writer.add_graph(model, (graph_inputs,))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
train_tensorboard_step = 1
val_tensorboard_step = 1
if True:
lr = 1e-3
Batch_size = 4
......@@ -205,17 +262,17 @@ if __name__ == "__main__":
lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
if Use_Data_Loader:
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
else:
gen = Generator(Batch_size, lines[:num_train],
(input_shape[0], input_shape[1])).generate(mosaic = mosaic)
(input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
gen_val = Generator(Batch_size, lines[num_train:],
(input_shape[0], input_shape[1])).generate(mosaic = False)
(input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)
epoch_size = max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
......@@ -242,17 +299,17 @@ if __name__ == "__main__":
lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
if Use_Data_Loader:
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate)
else:
gen = Generator(Batch_size, lines[:num_train],
(input_shape[0], input_shape[1])).generate(mosaic = mosaic)
(input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
gen_val = Generator(Batch_size, lines[num_train:],
(input_shape[0], input_shape[1])).generate(mosaic = False)
(input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)
epoch_size = max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
......
......@@ -14,7 +14,7 @@ from nets.yolo_training import Generator
import cv2
class YoloDataset(Dataset):
def __init__(self, train_lines, image_size, mosaic=True):
def __init__(self, train_lines, image_size, mosaic=True, is_train=True):
super(YoloDataset, self).__init__()
self.train_lines = train_lines
......@@ -22,6 +22,7 @@ class YoloDataset(Dataset):
self.image_size = image_size
self.mosaic = mosaic
self.flag = True
self.is_train = is_train
def __len__(self):
return self.train_batches
......@@ -29,7 +30,7 @@ class YoloDataset(Dataset):
def rand(self, a=0, b=1):
return np.random.rand() * (b - a) + a
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
"""实时数据增强的随机预处理"""
line = annotation_line.split()
image = Image.open(line[0])
......@@ -37,6 +38,35 @@ class YoloDataset(Dataset):
h, w = input_shape
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
if not random:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
# 调整目标框坐标
box_data = np.zeros((len(box), 5))
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # 保留有效框
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
return image_data, box_data
# 调整图片大小
new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
scale = self.rand(.25, 2)
......@@ -92,13 +122,8 @@ class YoloDataset(Dataset):
box = box[np.logical_and(box_w > 1, box_h > 1)] # 保留有效框
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
if len(box) == 0:
return image_data, []
if (box_data[:, :4] > 0).any():
return image_data, box_data
else:
return image_data, []
return image_data, box_data
def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):
h, w = input_shape
......@@ -197,12 +222,7 @@ class YoloDataset(Dataset):
# 对框进行进一步的处理
new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))
if len(new_boxes) == 0:
return new_image, []
if (new_boxes[:, :4] > 0).any():
return new_image, new_boxes
else:
return new_image, []
return new_image, new_boxes
def __getitem__(self, index):
lines = self.train_lines
......@@ -212,10 +232,10 @@ class YoloDataset(Dataset):
if self.flag and (index + 4) < n:
img, y = self.get_random_data_with_Mosaic(lines[index:index + 4], self.image_size[0:2])
else:
img, y = self.get_random_data(lines[index], self.image_size[0:2])
img, y = self.get_random_data(lines[index], self.image_size[0:2], random=self.is_train)
self.flag = bool(1-self.flag)
else:
img, y = self.get_random_data(lines[index], self.image_size[0:2])
img, y = self.get_random_data(lines[index], self.image_size[0:2], random=self.is_train)
if len(y) != 0:
# 从坐标转换成0~1的百分比
......
from __future__ import division
import os
import math
import os
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from torchvision.ops import nms
class DecodeBox(nn.Module):
def __init__(self, anchors, num_classes, img_size):
super(DecodeBox, self).__init__()
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
#-----------------------------------------------------------#
self.anchors = anchors
self.num_anchors = len(anchors)
self.num_classes = num_classes
......@@ -21,26 +28,33 @@ class DecodeBox(nn.Module):
self.img_size = img_size
def forward(self, input):
# input为bs,3*(1+4+num_classes),13,13
# 一共多少张图片
#-----------------------------------------------#
# 输入的input一共有三个,他们的shape分别是
# batch_size, 255, 13, 13
# batch_size, 255, 26, 26
# batch_size, 255, 52, 52
#-----------------------------------------------#
batch_size = input.size(0)
# 13,13
input_height = input.size(2)
input_width = input.size(3)
# 计算步长
# 每一个特征点对应原来的图片上多少个像素点
# 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点
# 416/13 = 32
#-----------------------------------------------#
# 输入为416x416时
# stride_h = stride_w = 32、16、8
#-----------------------------------------------#
stride_h = self.img_size[1] / input_height
stride_w = self.img_size[0] / input_width
# 把先验框的尺寸调整成特征层大小的形式
# 计算出先验框在特征层上对应的宽高
#-------------------------------------------------#
# 此时获得的scaled_anchors大小是相对于特征层的
#-------------------------------------------------#
scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors]
# bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
#-----------------------------------------------#
# 输入的input一共有三个,他们的shape分别是
# batch_size, 3, 13, 13, 85
# batch_size, 3, 26, 26, 85
# batch_size, 3, 52, 52, 85
#-----------------------------------------------#
prediction = input.view(batch_size, self.num_anchors,
self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
......@@ -48,30 +62,39 @@ class DecodeBox(nn.Module):
x = torch.sigmoid(prediction[..., 0])
y = torch.sigmoid(prediction[..., 1])
# 先验框的宽高调整参数
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
w = prediction[..., 2]
h = prediction[..., 3]
# 获得置信度,是否有物体
conf = torch.sigmoid(prediction[..., 4])
# 种类置信度
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
pred_cls = torch.sigmoid(prediction[..., 5:])
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
# 生成网格,先验框中心,网格左上角 batch_size,3,13,13
#----------------------------------------------------------#
# 生成网格,先验框中心,网格左上角
# batch_size,3,13,13
#----------------------------------------------------------#
grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
batch_size * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
batch_size * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
# 生成先验框的宽高
#----------------------------------------------------------#
# 按照网格格式生成先验框的宽高
# batch_size,3,13,13
#----------------------------------------------------------#
anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
# 计算调整后的先验框中心与宽高
#----------------------------------------------------------#
# 利用预测结果对先验框进行调整
# 首先调整先验框的中心,从先验框中心向右下角偏移
# 再调整先验框的宽高。
#----------------------------------------------------------#
pred_boxes = FloatTensor(prediction[..., :4].shape)
pred_boxes[..., 0] = x.data + grid_x
pred_boxes[..., 1] = y.data + grid_y
......@@ -127,7 +150,10 @@ class DecodeBox(nn.Module):
# ax.add_patch(rect3)
# plt.show()
# 用于将输出调整为相对于416x416的大小
#----------------------------------------------------------#
# 将输出结果调整成相对于输入图像大小
#----------------------------------------------------------#
_scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor)
output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale,
conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
......@@ -198,7 +224,10 @@ def bbox_iou(box1, box2, x1y1x2y2=True):
def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
# 求左上角和右下角
#----------------------------------------------------------#
# 将预测结果的格式转换成左上角右下角的格式。
# prediction [batch_size, num_anchors, 85]
#----------------------------------------------------------#
box_corner = prediction.new(prediction.shape)
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
......@@ -208,21 +237,35 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
output = [None for _ in range(len(prediction))]
for image_i, image_pred in enumerate(prediction):
# 获得种类及其置信度
#----------------------------------------------------------#
# 对种类预测部分取max。
# class_conf [batch_size, num_anchors, 1] 种类置信度
# class_pred [batch_size, num_anchors, 1] 种类
#----------------------------------------------------------#
class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
# 利用置信度进行第一轮筛选
conf_mask = (image_pred[:, 4]*class_conf[:, 0] >= conf_thres).squeeze()
#----------------------------------------------------------#
# 利用置信度进行第一轮筛选
#----------------------------------------------------------#
conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
#----------------------------------------------------------#
# 根据置信度进行预测结果的筛选
#----------------------------------------------------------#
image_pred = image_pred[conf_mask]
class_conf = class_conf[conf_mask]
class_pred = class_pred[conf_mask]
if not image_pred.size(0):
continue
# 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
#-------------------------------------------------------------------------#
# detections [batch_size, num_anchors, 7]
# 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred
#-------------------------------------------------------------------------#
detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
# 获得种类
#------------------------------------------#
# 获得预测结果中包含的所有种类
#------------------------------------------#
unique_labels = detections[:, -1].cpu().unique()
if prediction.is_cuda:
......@@ -230,7 +273,9 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
detections = detections.cuda()
for c in unique_labels:
# 获得某一类初步筛选后全部的预测结果
#------------------------------------------#
# 获得某一类得分筛选后全部的预测结果
#------------------------------------------#
detections_class = detections[detections[:, -1] == c]
#------------------------------------------#
......@@ -238,7 +283,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
#------------------------------------------#
keep = nms(
detections_class[:, :4],
detections_class[:, 4]*detections_class[:, 5],
detections_class[:, 4] * detections_class[:, 5],
nms_thres
)
max_detections = detections_class[keep]
......@@ -264,6 +309,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
return output
def merge_bboxes(bboxes, cutx, cuty):
merge_bbox = []
for i in range(len(bboxes)):
......
#-------------------------------------#
# 调用摄像头检测
# 调用摄像头或者视频进行检测
# 调用摄像头直接运行即可
# 调用视频可以将cv2.VideoCapture()指定路径
# 视频的保存并不难,可以百度一下看看
#-------------------------------------#
from yolo import YOLO
from PIL import Image
import numpy as np
import cv2
import time
yolo = YOLO()
# 调用摄像头
capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")
import cv2
import numpy as np
from PIL import Image
from yolo import YOLO
yolo = YOLO()
#-------------------------------------#
# 调用摄像头
# capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
capture=cv2.VideoCapture(0)
fps = 0.0
while(True):
t1 = time.time()
......@@ -19,10 +27,8 @@ while(True):
frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
# 转变成Image
frame = Image.fromarray(np.uint8(frame))
# 进行检测
frame = np.array(yolo.detect_image(frame))
# RGBtoBGR满足opencv显示格式
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
......@@ -32,7 +38,6 @@ while(True):
cv2.imshow("video",frame)
c= cv2.waitKey(1) & 0xff
if c==27:
capture.release()
......
#---------------------------------------------#
# 运行前一定要修改classes
# 如果生成的2007_train.txt里面没有目标信息
# 那么就是因为classes没有设定正确
#---------------------------------------------#
import xml.etree.ElementTree as ET
from os import getcwd
......
#-------------------------------------#
# 创建YOLO类
#-------------------------------------#
import cv2
import numpy as np
import colorsys
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
from nets.yolo4 import YoloBody
import torch.backends.cudnn as cudnn
from PIL import Image,ImageFont, ImageDraw
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
from nets.yolo4 import YoloBody
from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
non_max_suppression, yolo_correct_boxes)
#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
# model_path和classes_path都需要修改!
# 如果出现shape不匹配,一定要注意
# 训练时的model_path和classes_path参数的修改
#--------------------------------------------#
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo4_weights.pth',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"model_image_size" : (416, 416, 3),
"confidence": 0.5,
"iou" : 0.3,
"cuda": True
"model_path" : 'model_data/yolo4_weights.pth',
"anchors_path" : 'model_data/yolo_anchors.txt',
"classes_path" : 'model_data/coco_classes.txt',
"model_image_size" : (416, 416, 3),
"confidence" : 0.5,
"iou" : 0.3,
"cuda" : True
}
@classmethod
......@@ -43,6 +49,7 @@ class YOLO(object):
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.generate()
#---------------------------------------------------#
# 获得所有的分类
#---------------------------------------------------#
......@@ -64,25 +71,31 @@ class YOLO(object):
return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]
#---------------------------------------------------#
# 获得所有的分类
# 生成模型
#---------------------------------------------------#
def generate(self):
self.net = YoloBody(len(self.anchors[0]),len(self.class_names)).eval()
# 加快模型训练的效率
#---------------------------------------------------#
# 建立yolov4模型
#---------------------------------------------------#
self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()
#---------------------------------------------------#
# 载入yolov4模型的权重
#---------------------------------------------------#
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
state_dict = torch.load(self.model_path, map_location=device)
self.net.load_state_dict(state_dict)
print('Finished!')
if self.cuda:
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
self.net = nn.DataParallel(self.net)
self.net = self.net.cuda()
print('Finished!')
#---------------------------------------------------#
# 建立三个特征层解码用的工具
#---------------------------------------------------#
self.yolo_decodes = []
for i in range(3):
self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0])))
......@@ -103,45 +116,65 @@ class YOLO(object):
def detect_image(self, image):
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
photo = np.array(crop_img,dtype = np.float32)
photo /= 255.0
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
photo = photo.astype(np.float32)
images = []
images.append(photo)
images = np.asarray(images)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images = [photo]
with torch.no_grad():
images = torch.from_numpy(images)
images = torch.from_numpy(np.asarray(images))
if self.cuda:
images = images.cuda()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs = self.net(images)
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return image
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return image
top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
# 去掉灰条
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
for i, c in enumerate(top_label):
predicted_class = self.class_names[c]
......@@ -163,7 +196,7 @@ class YOLO(object):
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label)
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册