Add files via upload

a60e3df5 · Bubbliiiing · GitHub · 49081f53 · a60e3df5 · a60e3df5
19 changed file
--- a/FPS_test.py
+++ b/FPS_test.py
@@ -28,17 +28,19 @@ class FPS_YOLO(YOLO):
        # 调整图片使其符合输入要求
        image_shape = np.array(np.shape(image)[0:2])

+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-        images = np.asarray(images)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]

        with torch.no_grad():
-            images = torch.from_numpy(images)
+            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

--- a/VOCdevkit/VOC2007/voc2yolo4.py
+++ b/VOCdevkit/VOC2007/voc2yolo4.py
+#----------------------------------------------------------------------#
+#   验证集的划分在train.py代码里面进行
+#   test.txt和val.txt里面没有内容是正常的。训练不会使用到。
+#----------------------------------------------------------------------#
 import os
 import random 
- 
+random.seed(0)
+
 xmlfilepath=r'./VOCdevkit/VOC2007/Annotations'
 saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/"
 
+#----------------------------------------------------------------------#
+#   想要增加测试集修改trainval_percent
+#   train_percent不需要修改
+#----------------------------------------------------------------------#
 trainval_percent=1
 train_percent=1


--- a/ciou_test.py
+++ b/ciou_test.py
-import torch
 import math
+
 import numpy as np
+import torch
+
+
 def box_ciou(b1, b2):
    """
    输入为：
@@ -53,4 +56,4 @@ def box_ciou(b1, b2):
 box1 = torch.from_numpy(np.array([[25,25,40,40]])).type(torch.FloatTensor)
 box2 = torch.from_numpy(np.array([[25,25,30,40]])).type(torch.FloatTensor)

-print(box_ciou(box1,box2))
\ No newline at end of file
+print(box_ciou(box1,box2))
--- a/eval_coco.py
+++ b/eval_coco.py
+import colorsys
+import json
+import os
+
+import cv2
+import numpy as np
+import torch
+import torch.backends.cudnn as cudnn
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
+from torch.autograd import Variable
+from tqdm import tqdm
+
+from nets.yolo4 import YoloBody
+from utils.utils import (DecodeBox, bbox_iou, diou_non_max_suppression,
+                         letterbox_image, non_max_suppression,
+                         yolo_correct_boxes)
+from yolo import YOLO
+
+coco_classes = {'person': 1, 'bicycle': 2, 'car': 3, 'motorbike': 4, 'aeroplane': 5, 
+    'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11, 
+    '': 83, 'stop sign': 13, 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 
+    'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24, 
+    'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 
+    'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39, 
+    'baseball glove': 40, 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 'bottle': 44, 
+    'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 
+    'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58, 
+    'pizza': 59, 'donut': 60, 'cake': 61, 'chair': 62, 'sofa': 63, 'pottedplant': 64, 'bed': 65, 
+    'diningtable': 67, 'toilet': 70, 'tvmonitor': 72, 'laptop': 73, 'mouse': 74, 'remote': 75, 
+    'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 
+    'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88, 
+    'hair drier': 89, 'toothbrush': 90
+}
+
+class mAP_YOLO(YOLO):
+    #---------------------------------------------------#
+    #   检测图片
+    #---------------------------------------------------#
+    def detect_image(self, image_id, image, results):
+        self.confidence = 0.001
+        image_shape = np.array(np.shape(image)[0:2])
+
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
+        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
+        photo = np.transpose(photo, (2, 0, 1))
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]
+
+        with torch.no_grad():
+            images = torch.from_numpy(np.asarray(images))
+            if self.cuda:
+                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
+            outputs = self.net(images)
+            output_list = []
+            for i in range(3):
+                output_list.append(self.yolo_decodes[i](outputs[i]))
+
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
+            output = torch.cat(output_list, 1)
+            batch_detections = non_max_suppression(output, len(self.class_names),
+                                                    conf_thres=self.confidence,
+                                                    nms_thres=self.iou)
+
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try:
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
+            
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+
+        for i, c in enumerate(top_label):
+            result = {}
+            predicted_class = self.class_names[c]
+            top, left, bottom, right = boxes[i]
+
+            top = max(0, np.floor(top + 0.5).astype('int32'))
+            left = max(0, np.floor(left + 0.5).astype('int32'))
+            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
+            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
+
+            result["image_id"] = int(image_id)
+            result["category_id"] = coco_classes[predicted_class]
+            result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)]
+            result["score"] = float(top_conf[i])
+            results.append(result)
+
+        return results
+
+yolo = mAP_YOLO()
+
+jpg_names = os.listdir("./coco_dataset/val2017")
+
+with open("./coco_dataset/eval_results.json","w") as f:
+    results = []
+    for jpg_name in tqdm(jpg_names):
+        if jpg_name.endswith("jpg"):
+            image_path = "./coco_dataset/val2017/" + jpg_name
+            image = Image.open(image_path)
+            # 开启后在之后计算mAP可以可视化
+            results = yolo.detect_image(jpg_name.split(".")[0],image,results)
+    json.dump(results,f)
--- a/get_dr_txt.py
+++ b/get_dr_txt.py
@@ -3,19 +3,24 @@
 #       具体教程请查看Bilibili
 #       Bubbliiiing
 #-------------------------------------#
-import cv2
-import numpy as np
 import colorsys
 import os
+
+import cv2
+import numpy as np
 import torch
-import torch.nn as nn
 import torch.backends.cudnn as cudnn
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
-from yolo import YOLO
-from nets.yolo4 import YoloBody
-from PIL import Image,ImageFont, ImageDraw
-from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
 from tqdm import tqdm
+
+from nets.yolo4 import YoloBody
+from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         non_max_suppression, yolo_correct_boxes)
+from yolo import YOLO
+
+
 class mAP_Yolo(YOLO):
    #---------------------------------------------------#
    #   检测图片
@@ -26,42 +31,61 @@ class mAP_Yolo(YOLO):
        f = open("./input/detection-results/"+image_id+".txt","w") 
        image_shape = np.array(np.shape(image)[0:2])

+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-        images = np.asarray(images)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]

        with torch.no_grad():
-            images = torch.from_numpy(images)
+            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
            outputs = self.net(images)
+            output_list = []
+            for i in range(3):
+                output_list.append(self.yolo_decodes[i](outputs[i]))
+
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
+            output = torch.cat(output_list, 1)
+            batch_detections = non_max_suppression(output, len(self.class_names),
+                                                    conf_thres=self.confidence,
+                                                    nms_thres=self.iou)
+
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try:
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
            
-        output_list = []
-        for i in range(3):
-            output_list.append(self.yolo_decodes[i](outputs[i]))
-        output = torch.cat(output_list, 1)
-        batch_detections = non_max_suppression(output, len(self.class_names),
-                                                conf_thres=self.confidence,
-                                                nms_thres=self.iou)
-
-        try:
-            batch_detections = batch_detections[0].cpu().numpy()
-        except:
-            return image
-            
-        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-        top_label = np.array(batch_detections[top_index,-1],np.int32)
-        top_bboxes = np.array(batch_detections[top_index,:4])
-        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-
-        # 去掉灰条
-        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]

--- a/get_map.py
+++ b/get_map.py
+import argparse
 import glob
 import json
+import math
+import operator
 import os
 import shutil
-import operator
 import sys
-import argparse
-import math

 import numpy as np
+
 #----------------------------------------------------#
 #   用于计算mAP
 #   代码克隆自https://github.com/Cartucho/mAP

--- a/kmeans_for_anchors.py
+++ b/kmeans_for_anchors.py
-import numpy as np
-import xml.etree.ElementTree as ET
 import glob
 import random
+import xml.etree.ElementTree as ET
+
+import numpy as np
+

 def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
@@ -61,6 +63,9 @@ def load_data(path):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
+        if height<=0 or width<=0:
+            continue
+        
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
@@ -103,4 +108,4 @@ if __name__ == '__main__':
        else:
            x_y = ", %d,%d" % (data[i][0], data[i][1])
        f.write(x_y)
-    f.close()
\ No newline at end of file
+    f.close()
--- a/nets/CSPdarknet.py
+++ b/nets/CSPdarknet.py
-import torch
-import torch.nn.functional as F
-import torch.nn as nn
 import math
 from collections import OrderedDict

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
 #-------------------------------------------------#
 #   MISH激活函数
 #-------------------------------------------------#
@@ -14,10 +16,10 @@ class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

-#-------------------------------------------------#
-#   卷积块
-#   CONV+BATCHNORM+MISH
-#-------------------------------------------------#
+#---------------------------------------------------#
+#   卷积块 -> 卷积 + 标准化 + 激活函数
+#   Conv2d + BatchNormalization + Mish
+#---------------------------------------------------#
 class BasicConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
        super(BasicConv, self).__init__()
@@ -37,7 +39,7 @@ class BasicConv(nn.Module):
 #   内部堆叠的残差块
 #---------------------------------------------------#
 class Resblock(nn.Module):
-    def __init__(self, channels, hidden_channels=None, residual_activation=nn.Identity()):
+    def __init__(self, channels, hidden_channels=None):
        super(Resblock, self).__init__()

        if hidden_channels is None:
@@ -51,33 +53,52 @@ class Resblock(nn.Module):
    def forward(self, x):
        return x + self.block(x)

-#---------------------------------------------------#
+#--------------------------------------------------------------------#
 #   CSPdarknet的结构块
-#   存在一个大残差边
-#   这个大残差边绕过了很多的残差结构
-#---------------------------------------------------#
+#   首先利用ZeroPadding2D和一个步长为2x2的卷积块进行高和宽的压缩
+#   然后建立一个大的残差边shortconv、这个大残差边绕过了很多的残差结构
+#   主干部分会对num_blocks进行循环，循环内部是残差结构。
+#   对于整个CSPdarknet的结构块，就是一个大残差块+内部多个小残差块
+#--------------------------------------------------------------------#
 class Resblock_body(nn.Module):
    def __init__(self, in_channels, out_channels, num_blocks, first):
        super(Resblock_body, self).__init__()
-
+        #----------------------------------------------------------------#
+        #   利用一个步长为2x2的卷积块进行高和宽的压缩
+        #----------------------------------------------------------------#
        self.downsample_conv = BasicConv(in_channels, out_channels, 3, stride=2)

        if first:
+            #--------------------------------------------------------------------------#
+            #   然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
+            #--------------------------------------------------------------------------#
            self.split_conv0 = BasicConv(out_channels, out_channels, 1)
+
+            #----------------------------------------------------------------#
+            #   主干部分会对num_blocks进行循环，循环内部是残差结构。
+            #----------------------------------------------------------------#
            self.split_conv1 = BasicConv(out_channels, out_channels, 1)  
            self.blocks_conv = nn.Sequential(
                Resblock(channels=out_channels, hidden_channels=out_channels//2),
                BasicConv(out_channels, out_channels, 1)
            )
+
            self.concat_conv = BasicConv(out_channels*2, out_channels, 1)
        else:
+            #--------------------------------------------------------------------------#
+            #   然后建立一个大的残差边self.split_conv0、这个大残差边绕过了很多的残差结构
+            #--------------------------------------------------------------------------#
            self.split_conv0 = BasicConv(out_channels, out_channels//2, 1)
-            self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)

+            #----------------------------------------------------------------#
+            #   主干部分会对num_blocks进行循环，循环内部是残差结构。
+            #----------------------------------------------------------------#
+            self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)
            self.blocks_conv = nn.Sequential(
                *[Resblock(out_channels//2) for _ in range(num_blocks)],
                BasicConv(out_channels//2, out_channels//2, 1)
            )
+
            self.concat_conv = BasicConv(out_channels, out_channels, 1)

    def forward(self, x):
@@ -88,28 +109,44 @@ class Resblock_body(nn.Module):
        x1 = self.split_conv1(x)
        x1 = self.blocks_conv(x1)

+        #------------------------------------#
+        #   将大残差边再堆叠回来
+        #------------------------------------#
        x = torch.cat([x1, x0], dim=1)
+        #------------------------------------#
+        #   最后对通道数进行整合
+        #------------------------------------#
        x = self.concat_conv(x)

        return x

+#---------------------------------------------------#
+#   CSPdarknet53 的主体部分
+#   输入为一张416x416x3的图片
+#   输出为三个有效特征层
+#---------------------------------------------------#
 class CSPDarkNet(nn.Module):
    def __init__(self, layers):
        super(CSPDarkNet, self).__init__()
        self.inplanes = 32
+        # 416,416,3 -> 416,416,32
        self.conv1 = BasicConv(3, self.inplanes, kernel_size=3, stride=1)
        self.feature_channels = [64, 128, 256, 512, 1024]

        self.stages = nn.ModuleList([
+            # 416,416,32 -> 208,208,64
            Resblock_body(self.inplanes, self.feature_channels[0], layers[0], first=True),
+            # 208,208,64 -> 104,104,128
            Resblock_body(self.feature_channels[0], self.feature_channels[1], layers[1], first=False),
+            # 104,104,128 -> 52,52,256
            Resblock_body(self.feature_channels[1], self.feature_channels[2], layers[2], first=False),
+            # 52,52,256 -> 26,26,512
            Resblock_body(self.feature_channels[2], self.feature_channels[3], layers[3], first=False),
+            # 26,26,512 -> 13,13,1024
            Resblock_body(self.feature_channels[3], self.feature_channels[4], layers[4], first=False)
        ])

        self.num_features = 1
-        # 进行权值初始化
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels

--- a/nets/yolo4.py
+++ b/nets/yolo4.py
+from collections import OrderedDict
+
 import torch
 import torch.nn as nn
-from collections import OrderedDict
+
 from nets.CSPdarknet import darknet53

+
 def conv2d(filter_in, filter_out, kernel_size, stride=1):
    pad = (kernel_size - 1) // 2 if kernel_size else 0
    return nn.Sequential(OrderedDict([
@@ -83,7 +86,13 @@ def yolo_head(filters_list, in_filters):
 class YoloBody(nn.Module):
    def __init__(self, num_anchors, num_classes):
        super(YoloBody, self).__init__()
-        #  backbone
+        #---------------------------------------------------#   
+        #   生成CSPdarknet53的主干模型
+        #   获得三个有效特征层，他们的shape分别是：
+        #   52,52,256
+        #   26,26,512
+        #   13,13,1024
+        #---------------------------------------------------#
        self.backbone = darknet53(None)

        self.conv1 = make_three_conv([512,1024],1024)
@@ -97,20 +106,21 @@ class YoloBody(nn.Module):
        self.upsample2 = Upsample(256,128)
        self.conv_for_P3 = conv2d(256,128,1)
        self.make_five_conv2 = make_five_conv([128, 256],256)
-        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
-        # 4+1+num_classes
+
+        # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
        final_out_filter2 = num_anchors * (5 + num_classes)
        self.yolo_head3 = yolo_head([256, final_out_filter2],128)

        self.down_sample1 = conv2d(128,256,3,stride=2)
        self.make_five_conv3 = make_five_conv([256, 512],512)
-        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
+
+        # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
        final_out_filter1 =  num_anchors * (5 + num_classes)
        self.yolo_head2 = yolo_head([512, final_out_filter1],256)

-
        self.down_sample2 = conv2d(256,512,3,stride=2)
        self.make_five_conv4 = make_five_conv([512, 1024],1024)
+
        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
        final_out_filter0 =  num_anchors * (5 + num_classes)
        self.yolo_head1 = yolo_head([1024, final_out_filter0],512)
@@ -120,30 +130,58 @@ class YoloBody(nn.Module):
        #  backbone
        x2, x1, x0 = self.backbone(x)

+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 
        P5 = self.conv1(x0)
        P5 = self.SPP(P5)
+        # 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
        P5 = self.conv2(P5)

+        # 13,13,512 -> 13,13,256 -> 26,26,256
        P5_upsample = self.upsample1(P5)
+        # 26,26,512 -> 26,26,256
        P4 = self.conv_for_P4(x1)
+        # 26,26,256 + 26,26,256 -> 26,26,512
        P4 = torch.cat([P4,P5_upsample],axis=1)
+        # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
        P4 = self.make_five_conv1(P4)

+        # 26,26,256 -> 26,26,128 -> 52,52,128
        P4_upsample = self.upsample2(P4)
+        # 52,52,256 -> 52,52,128
        P3 = self.conv_for_P3(x2)
+        # 52,52,128 + 52,52,128 -> 52,52,256
        P3 = torch.cat([P3,P4_upsample],axis=1)
+        # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
        P3 = self.make_five_conv2(P3)

+        # 52,52,128 -> 26,26,256
        P3_downsample = self.down_sample1(P3)
+        # 26,26,256 + 26,26,256 -> 26,26,512
        P4 = torch.cat([P3_downsample,P4],axis=1)
+        # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
        P4 = self.make_five_conv3(P4)

+        # 26,26,256 -> 13,13,512
        P4_downsample = self.down_sample2(P4)
+        # 13,13,512 + 13,13,512 -> 13,13,1024
        P5 = torch.cat([P4_downsample,P5],axis=1)
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
        P5 = self.make_five_conv4(P5)

+        #---------------------------------------------------#
+        #   第三个特征层
+        #   y3=(batch_size,75,52,52)
+        #---------------------------------------------------#
        out2 = self.yolo_head3(P3)
+        #---------------------------------------------------#
+        #   第二个特征层
+        #   y2=(batch_size,75,26,26)
+        #---------------------------------------------------#
        out1 = self.yolo_head2(P4)
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   y1=(batch_size,75,13,13)
+        #---------------------------------------------------#
        out0 = self.yolo_head1(P5)

        return out0, out1, out2

--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
--- a/predict.py
+++ b/predict.py
-#-------------------------------------#
-#       对单张图片进行预测
-#-------------------------------------#
-from yolo import YOLO
+'''
+predict.py有几个注意点
+1、无法进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
+2、如果想要保存，利用r_image.save("img.jpg")即可保存。
+3、如果想要获得框的坐标，可以进入detect_image函数，读取top,left,bottom,right这四个值。
+4、如果想要截取下目标，可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
+'''
 from PIL import Image

+from yolo import YOLO
+
 yolo = YOLO()

 while True:

--- a/test.py
+++ b/test.py
@@ -5,6 +5,7 @@
 #--------------------------------------------#
 import torch
 from torchsummary import summary
+
 from nets.CSPdarknet import darknet53
 from nets.yolo4 import YoloBody


--- a/train.py
+++ b/train.py
@@ -2,20 +2,23 @@
 #       对数据集进行训练
 #-------------------------------------#
 import os
-import numpy as np
 import time
+
+import numpy as np
 import torch
-from torch.autograd import Variable
+import torch.backends.cudnn as cudnn
 import torch.nn as nn
-import torch.optim as optim
 import torch.nn.functional as F
-import torch.backends.cudnn as cudnn
+import torch.optim as optim
+from torch.autograd import Variable
 from torch.utils.data import DataLoader
-from utils.dataloader import yolo_dataset_collate, YoloDataset
-from nets.yolo_training import YOLOLoss,Generator
-from nets.yolo4 import YoloBody
 from tqdm import tqdm

+from nets.yolo4 import YoloBody
+from nets.yolo_training import Generator, YOLOLoss
+from utils.dataloader import YoloDataset, yolo_dataset_collate
+
+
 #---------------------------------------------------#
 #   获得类和先验框
 #---------------------------------------------------#
@@ -37,10 +40,12 @@ def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

+        
 def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
    total_loss = 0
    val_loss = 0
-    start_time = time.time()
+
+    net.train()
    with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
@@ -53,25 +58,38 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                else:
                    images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                    targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+
+            #----------------------#
+            #   清零梯度
+            #----------------------#
            optimizer.zero_grad()
+            #----------------------#
+            #   前向传播
+            #----------------------#
            outputs = net(images)
            losses = []
+            num_pos_all = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
            for i in range(3):
-                loss_item = yolo_losses[i](outputs[i], targets)
-                losses.append(loss_item[0])
-            loss = sum(losses)
+                loss_item, num_pos = yolo_losses[i](outputs[i], targets)
+                losses.append(loss_item)
+                num_pos_all += num_pos
+
+            loss = sum(losses) / num_pos_all
+            #----------------------#
+            #   反向传播
+            #----------------------#
            loss.backward()
            optimizer.step()

-            total_loss += loss
-            waste_time = time.time() - start_time
+            total_loss += loss.item()
            
-            pbar.set_postfix(**{'total_loss': total_loss.item() / (iteration + 1), 
-                                'lr'        : get_lr(optimizer),
-                                'step/s'    : waste_time})
+            pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), 
+                                'lr'        : get_lr(optimizer)})
            pbar.update(1)

-            start_time = time.time()
    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
@@ -90,14 +108,15 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                optimizer.zero_grad()
                outputs = net(images_val)
                losses = []
+                num_pos_all = 0
                for i in range(3):
-                    loss_item = yolo_losses[i](outputs[i], targets_val)
-                    losses.append(loss_item[0])
-                loss = sum(losses)
-                val_loss += loss
-            pbar.set_postfix(**{'total_loss': val_loss.item() / (iteration + 1)})
+                    loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
+                    losses.append(loss_item)
+                    num_pos_all += num_pos
+                loss = sum(losses) / num_pos_all
+                val_loss += loss.item()
+            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
            pbar.update(1)
-    net.train()
    print('Finish Validation')
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
@@ -111,41 +130,58 @@ def fit_one_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
 #----------------------------------------------------#
 if __name__ == "__main__":
    #-------------------------------#
-    #   输入的shape大小
-    #   显存比较小可以使用416x416
-    #   显存比较大可以使用608x608
-    #-------------------------------#
-    input_shape = (416,416)
-    #-------------------------------#
-    #   tricks的使用设置
+    #   是否使用Cuda
+    #   没有GPU可以设置成False
    #-------------------------------#
-    Cosine_lr = False
-    mosaic = True
-    # 用于设定是否使用cuda
    Cuda = True
-    smoooth_label = 0
    #-------------------------------#
    #   Dataloder的使用
    #-------------------------------#
    Use_Data_Loader = True
-
-    annotation_path = '2007_train.txt'
+    #------------------------------------------------------#
+    #   是否对损失进行归一化
+    #------------------------------------------------------#
+    normalize = True
    #-------------------------------#
-    #   获得先验框和类
+    #   输入的shape大小
+    #   显存比较小可以使用416x416
+    #   显存比较大可以使用608x608
    #-------------------------------#
+    input_shape = (416,416)
+
+    #----------------------------------------------------#
+    #   classes和anchor的路径，非常重要
+    #   训练前一定要修改classes_path，使其对应自己的数据集
+    #----------------------------------------------------#
    anchors_path = 'model_data/yolo_anchors.txt'
    classes_path = 'model_data/voc_classes.txt'   
+    #----------------------------------------------------#
+    #   获取classes和anchor
+    #----------------------------------------------------#
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    num_classes = len(class_names)
    
-    # 创建模型
-    model = YoloBody(len(anchors[0]),num_classes)
-    #-------------------------------------------#
-    #   权值文件的下载请看README
-    #-------------------------------------------#
+    #------------------------------------------------------#
+    #   Yolov4的tricks应用
+    #   mosaic 马赛克数据增强 True or False
+    #   Cosine_scheduler 余弦退火学习率 True or False
+    #   label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
+    #------------------------------------------------------#
+    mosaic = True
+    Cosine_lr = False
+    smoooth_label = 0
+
+    #------------------------------------------------------#
+    #   创建yolo模型
+    #   训练前一定要修改classes_path和对应的txt文件
+    #------------------------------------------------------#
+    model = YoloBody(len(anchors[0]), num_classes)
+
+    #------------------------------------------------------#
+    #   权值文件请看README，百度网盘下载
+    #------------------------------------------------------#
    model_path = "model_data/yolo4_weights.pth"
-    # 加快模型训练的效率
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
@@ -166,9 +202,17 @@ if __name__ == "__main__":
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(np.reshape(anchors,[-1,2]),num_classes, \
-                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))
+                                (input_shape[1], input_shape[0]), smoooth_label, Cuda, normalize))

-    # 0.1用于验证，0.9用于训练
+    #----------------------------------------------------#
+    #   获得图片路径和标签
+    #----------------------------------------------------#
+    annotation_path = '2007_train.txt'
+    #----------------------------------------------------------------------#
+    #   验证集的划分在train.py代码里面进行
+    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
+    #   当前划分方式下，验证集和训练集的比例为1:9
+    #----------------------------------------------------------------------#
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
@@ -199,17 +243,17 @@ if __name__ == "__main__":
            lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)

        if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
-            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
+            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
+            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
        else:
            gen = Generator(Batch_size, lines[:num_train],
-                            (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
+                            (input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
            gen_val = Generator(Batch_size, lines[num_train:],
-                            (input_shape[0], input_shape[1])).generate(mosaic = False)
+                            (input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)

        epoch_size = max(1, num_train//Batch_size)
        epoch_size_val = num_val//Batch_size
@@ -236,17 +280,17 @@ if __name__ == "__main__":
            lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)

        if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
-            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
+            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
+            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
        else:
            gen = Generator(Batch_size, lines[:num_train],
-                            (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
+                            (input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
            gen_val = Generator(Batch_size, lines[num_train:],
-                            (input_shape[0], input_shape[1])).generate(mosaic = False)
+                            (input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)

        epoch_size = max(1, num_train//Batch_size)
        epoch_size_val = num_val//Batch_size

--- a/train_with_tensorboard.py
+++ b/train_with_tensorboard.py
@@ -39,9 +39,11 @@ def get_lr(optimizer):
        return param_group['lr']

 def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda,writer):
+    global train_tensorboard_step, val_tensorboard_step
    total_loss = 0
    val_loss = 0
-    start_time = time.time()
+
+    net.train()
    with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
@@ -54,28 +56,41 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                else:
                    images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                    targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+
+            #----------------------#
+            #   清零梯度
+            #----------------------#
            optimizer.zero_grad()
+            #----------------------#
+            #   前向传播
+            #----------------------#
            outputs = net(images)
            losses = []
+            num_pos_all = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
            for i in range(3):
-                loss_item = yolo_losses[i](outputs[i], targets)
-                losses.append(loss_item[0])
-            loss = sum(losses)
+                loss_item, num_pos = yolo_losses[i](outputs[i], targets)
+                losses.append(loss_item)
+                num_pos_all += num_pos
+
+            loss = sum(losses) / num_pos_all
+            total_loss += loss.item()
+
+            #----------------------#
+            #   反向传播
+            #----------------------#
            loss.backward()
            optimizer.step()
-            # 将loss写入tensorboard，每一步都写
-            writer.add_scalar('Train_loss', loss, (epoch*epoch_size + iteration))
-
-            total_loss += loss
-            waste_time = time.time() - start_time
-            
-            pbar.set_postfix(**{'total_loss': total_loss.item() / (iteration + 1), 
-                                'lr'        : get_lr(optimizer),
-                                'step/s'    : waste_time})
-            pbar.update(1)

+            # 将loss写入tensorboard，每一步都写
+            writer.add_scalar('Train_loss', loss, train_tensorboard_step)
+            train_tensorboard_step += 1

-            start_time = time.time()
+            pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), 
+                                'lr'        : get_lr(optimizer)})
+            pbar.update(1)
        
    # 将loss写入tensorboard，下面注释的是每个世代保存一次
    # writer.add_scalar('Train_loss', total_loss/(iteration+1), epoch)
@@ -97,20 +112,24 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                optimizer.zero_grad()
                outputs = net(images_val)
                losses = []
-
+                num_pos_all = 0
                for i in range(3):
-                    loss_item = yolo_losses[i](outputs[i], targets_val)
-                    losses.append(loss_item[0])
-                loss = sum(losses)
-                val_loss += loss
+                    loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
+                    losses.append(loss_item)
+                    num_pos_all += num_pos
+
+                loss = sum(losses) / num_pos_all
+                val_loss += loss.item()
+
                # 将loss写入tensorboard, 下面注释的是每一步都写
-                # writer.add_scalar('Val_loss',val_loss/(epoch_size_val+1), (epoch*epoch_size_val + iteration))
+                # writer.add_scalar('Val_loss', loss, val_tensorboard_step)
+                # val_tensorboard_step += 1
+
+                pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
+                pbar.update(1)

-            pbar.set_postfix(**{'total_loss': val_loss.item() / (iteration + 1)})
-            pbar.update(1)
-    net.train()
    # 将loss写入tensorboard，每个世代保存一次
-    writer.add_scalar('Val_loss',val_loss/(epoch_size_val+1), epoch)
+    writer.add_scalar('Val_loss',val_loss / (epoch_size_val+1), epoch)
    print('Finish Validation')
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
@@ -121,38 +140,58 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo

 if __name__ == "__main__":
    #-------------------------------#
-    #   输入的shape大小
-    #   显存比较小可以使用416x416
-    #   显存比较大可以使用608x608
-    #-------------------------------#
-    input_shape = (416,416)
+    #   是否使用Cuda
+    #   没有GPU可以设置成False
    #-------------------------------#
-    #   tricks的使用设置
-    #-------------------------------#
-    Cosine_lr = False
-    mosaic = True
-    # 用于设定是否使用cuda
    Cuda = True
-    smoooth_label = 0
    #-------------------------------#
    #   Dataloder的使用
    #-------------------------------#
    Use_Data_Loader = True
-
-    annotation_path = '2007_train.txt'
+    #------------------------------------------------------#
+    #   是否对损失进行归一化
+    #------------------------------------------------------#
+    normalize = True
    #-------------------------------#
-    #   获得先验框和类
+    #   输入的shape大小
+    #   显存比较小可以使用416x416
+    #   显存比较大可以使用608x608
    #-------------------------------#
+    input_shape = (416,416)
+
+    #----------------------------------------------------#
+    #   classes和anchor的路径，非常重要
+    #   训练前一定要修改classes_path，使其对应自己的数据集
+    #----------------------------------------------------#
    anchors_path = 'model_data/yolo_anchors.txt'
    classes_path = 'model_data/voc_classes.txt'   
+    #----------------------------------------------------#
+    #   获取classes和anchor
+    #----------------------------------------------------#
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    num_classes = len(class_names)
    
-    # 创建模型
-    model = YoloBody(len(anchors[0]),num_classes)
+    #------------------------------------------------------#
+    #   Yolov4的tricks应用
+    #   mosaic 马赛克数据增强 True or False
+    #   Cosine_scheduler 余弦退火学习率 True or False
+    #   label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
+    #------------------------------------------------------#
+    mosaic = True
+    Cosine_lr = False
+    smoooth_label = 0
+
+    #------------------------------------------------------#
+    #   创建yolo模型
+    #   训练前一定要修改classes_path和对应的txt文件
+    #------------------------------------------------------#
+    model = YoloBody(len(anchors[0]), num_classes)
+
+    #------------------------------------------------------#
+    #   权值文件请看README，百度网盘下载
+    #------------------------------------------------------#
    model_path = "model_data/yolo4_weights.pth"
-    # 加快模型训练的效率
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
@@ -173,9 +212,17 @@ if __name__ == "__main__":
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(np.reshape(anchors,[-1,2]),num_classes, \
-                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))
+                                (input_shape[1], input_shape[0]), smoooth_label, Cuda, normalize))

-    # 0.1用于验证，0.9用于训练
+    #----------------------------------------------------#
+    #   获得图片路径和标签
+    #----------------------------------------------------#
+    annotation_path = '2007_train.txt'
+    #----------------------------------------------------------------------#
+    #   验证集的划分在train.py代码里面进行
+    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
+    #   当前划分方式下，验证集和训练集的比例为1:9
+    #----------------------------------------------------------------------#
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
@@ -184,7 +231,7 @@ if __name__ == "__main__":
    np.random.seed(None)
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
-    
+
    writer = SummaryWriter(log_dir='logs',flush_secs=60)
    if Cuda:
        graph_inputs = torch.from_numpy(np.random.rand(1,3,input_shape[0],input_shape[1])).type(torch.FloatTensor).cuda()
@@ -192,6 +239,16 @@ if __name__ == "__main__":
        graph_inputs = torch.from_numpy(np.random.rand(1,3,input_shape[0],input_shape[1])).type(torch.FloatTensor)
    writer.add_graph(model, (graph_inputs,))

+    #------------------------------------------------------#
+    #   主干特征提取网络特征通用，冻结训练可以加快训练速度
+    #   也可以在训练初期防止权值被破坏。
+    #   Init_Epoch为起始世代
+    #   Freeze_Epoch为冻结训练的世代
+    #   Epoch总训练世代
+    #   提示OOM或者显存不足请调小Batch_size
+    #------------------------------------------------------#
+    train_tensorboard_step = 1
+    val_tensorboard_step = 1
    if True:
        lr = 1e-3
        Batch_size = 4
@@ -205,17 +262,17 @@ if __name__ == "__main__":
            lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)

        if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
-            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
+            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
+            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
        else:
            gen = Generator(Batch_size, lines[:num_train],
-                            (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
+                            (input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
            gen_val = Generator(Batch_size, lines[num_train:],
-                            (input_shape[0], input_shape[1])).generate(mosaic = False)
+                            (input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)

        epoch_size = max(1, num_train//Batch_size)
        epoch_size_val = num_val//Batch_size
@@ -242,17 +299,17 @@ if __name__ == "__main__":
            lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)

        if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic)
-            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False)
+            train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
+            val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
        else:
            gen = Generator(Batch_size, lines[:num_train],
-                            (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
+                            (input_shape[0], input_shape[1])).generate(train=True, mosaic = mosaic)
            gen_val = Generator(Batch_size, lines[num_train:],
-                            (input_shape[0], input_shape[1])).generate(mosaic = False)
+                            (input_shape[0], input_shape[1])).generate(train=False, mosaic = mosaic)

        epoch_size = max(1, num_train//Batch_size)
        epoch_size_val = num_val//Batch_size

--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -14,7 +14,7 @@ from nets.yolo_training import Generator
 import cv2

 class YoloDataset(Dataset):
-    def __init__(self, train_lines, image_size, mosaic=True):
+    def __init__(self, train_lines, image_size, mosaic=True, is_train=True):
        super(YoloDataset, self).__init__()

        self.train_lines = train_lines
@@ -22,6 +22,7 @@ class YoloDataset(Dataset):
        self.image_size = image_size
        self.mosaic = mosaic
        self.flag = True
+        self.is_train = is_train

    def __len__(self):
        return self.train_batches
@@ -29,7 +30,7 @@ class YoloDataset(Dataset):
    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

-    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
        """实时数据增强的随机预处理"""
        line = annotation_line.split()
        image = Image.open(line[0])
@@ -37,6 +38,35 @@ class YoloDataset(Dataset):
        h, w = input_shape
        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])

+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            image = image.resize((nw,nh), Image.BICUBIC)
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image, np.float32)
+
+            # 调整目标框坐标
+            box_data = np.zeros((len(box), 5))
+            if len(box) > 0:
+                np.random.shuffle(box)
+                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
+                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+                box[:, 0:2][box[:, 0:2] < 0] = 0
+                box[:, 2][box[:, 2] > w] = w
+                box[:, 3][box[:, 3] > h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
+                box_data = np.zeros((len(box), 5))
+                box_data[:len(box)] = box
+
+            return image_data, box_data
+
        # 调整图片大小
        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2)
@@ -92,13 +122,8 @@ class YoloDataset(Dataset):
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
            box_data = np.zeros((len(box), 5))
            box_data[:len(box)] = box
-        if len(box) == 0:
-            return image_data, []

-        if (box_data[:, :4] > 0).any():
-            return image_data, box_data
-        else:
-            return image_data, []
+        return image_data, box_data

    def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):
        h, w = input_shape
@@ -197,12 +222,7 @@ class YoloDataset(Dataset):
        # 对框进行进一步的处理
        new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))

-        if len(new_boxes) == 0:
-            return new_image, []
-        if (new_boxes[:, :4] > 0).any():
-            return new_image, new_boxes
-        else:
-            return new_image, []
+        return new_image, new_boxes

    def __getitem__(self, index):
        lines = self.train_lines
@@ -212,10 +232,10 @@ class YoloDataset(Dataset):
            if self.flag and (index + 4) < n:
                img, y = self.get_random_data_with_Mosaic(lines[index:index + 4], self.image_size[0:2])
            else:
-                img, y = self.get_random_data(lines[index], self.image_size[0:2])
+                img, y = self.get_random_data(lines[index], self.image_size[0:2], random=self.is_train)
            self.flag = bool(1-self.flag)
        else:
-            img, y = self.get_random_data(lines[index], self.image_size[0:2])
+            img, y = self.get_random_data(lines[index], self.image_size[0:2], random=self.is_train)

        if len(y) != 0:
            # 从坐标转换成0~1的百分比

--- a/utils/utils.py
+++ b/utils/utils.py
 from __future__ import division
-import os
+
 import math
+import os
 import time
+
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
-import matplotlib.pyplot as plt
-from torch.autograd import Variable
 from PIL import Image, ImageDraw, ImageFont
+from torch.autograd import Variable
 from torchvision.ops import nms

+
 class DecodeBox(nn.Module):
    def __init__(self, anchors, num_classes, img_size):
        super(DecodeBox, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
+        #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
+        #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
+        #-----------------------------------------------------------#
        self.anchors = anchors
        self.num_anchors = len(anchors)
        self.num_classes = num_classes
@@ -21,26 +28,33 @@ class DecodeBox(nn.Module):
        self.img_size = img_size

    def forward(self, input):
-        # input为bs,3*(1+4+num_classes),13,13
-
-        # 一共多少张图片
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 255, 13, 13
+        #   batch_size, 255, 26, 26
+        #   batch_size, 255, 52, 52
+        #-----------------------------------------------#
        batch_size = input.size(0)
-        # 13，13
        input_height = input.size(2)
        input_width = input.size(3)

-        # 计算步长
-        # 每一个特征点对应原来的图片上多少个像素点
-        # 如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
-        # 416/13 = 32
+        #-----------------------------------------------#
+        #   输入为416x416时
+        #   stride_h = stride_w = 32、16、8
+        #-----------------------------------------------#
        stride_h = self.img_size[1] / input_height
        stride_w = self.img_size[0] / input_width
-
-        # 把先验框的尺寸调整成特征层大小的形式
-        # 计算出先验框在特征层上对应的宽高
+        #-------------------------------------------------#
+        #   此时获得的scaled_anchors大小是相对于特征层的
+        #-------------------------------------------------#
        scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors]

-        # bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 3, 13, 13, 85
+        #   batch_size, 3, 26, 26, 85
+        #   batch_size, 3, 52, 52, 85
+        #-----------------------------------------------#
        prediction = input.view(batch_size, self.num_anchors,
                                self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()

@@ -48,30 +62,39 @@ class DecodeBox(nn.Module):
        x = torch.sigmoid(prediction[..., 0])  
        y = torch.sigmoid(prediction[..., 1])
        # 先验框的宽高调整参数
-        w = prediction[..., 2]  # Width
-        h = prediction[..., 3]  # Height
-
+        w = prediction[..., 2]
+        h = prediction[..., 3]
        # 获得置信度，是否有物体
        conf = torch.sigmoid(prediction[..., 4])
        # 种类置信度
-        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+        pred_cls = torch.sigmoid(prediction[..., 5:])

        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor

-        # 生成网格，先验框中心，网格左上角 batch_size,3,13,13
+        #----------------------------------------------------------#
+        #   生成网格，先验框中心，网格左上角 
+        #   batch_size,3,13,13
+        #----------------------------------------------------------#
        grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
            batch_size * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
            batch_size * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)

-        # 生成先验框的宽高
+        #----------------------------------------------------------#
+        #   按照网格格式生成先验框的宽高
+        #   batch_size,3,13,13
+        #----------------------------------------------------------#
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
        anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
        anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
-        
-        # 计算调整后的先验框中心与宽高
+
+        #----------------------------------------------------------#
+        #   利用预测结果对先验框进行调整
+        #   首先调整先验框的中心，从先验框中心向右下角偏移
+        #   再调整先验框的宽高。
+        #----------------------------------------------------------#
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
@@ -127,7 +150,10 @@ class DecodeBox(nn.Module):
        # ax.add_patch(rect3)

        # plt.show()
-        # 用于将输出调整为相对于416x416的大小
+
+        #----------------------------------------------------------#
+        #   将输出结果调整成相对于输入图像大小
+        #----------------------------------------------------------#
        _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor)
        output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale,
                            conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
@@ -198,7 +224,10 @@ def bbox_iou(box1, box2, x1y1x2y2=True):


 def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
-    # 求左上角和右下角
+    #----------------------------------------------------------#
+    #   将预测结果的格式转换成左上角右下角的格式。
+    #   prediction  [batch_size, num_anchors, 85]
+    #----------------------------------------------------------#
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
@@ -208,21 +237,35 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):

    output = [None for _ in range(len(prediction))]
    for image_i, image_pred in enumerate(prediction):
-        # 获得种类及其置信度
+        #----------------------------------------------------------#
+        #   对种类预测部分取max。
+        #   class_conf  [batch_size, num_anchors, 1]    种类置信度
+        #   class_pred  [batch_size, num_anchors, 1]    种类
+        #----------------------------------------------------------#
        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)

-        # 利用置信度进行第一轮筛选
-        conf_mask = (image_pred[:, 4]*class_conf[:, 0] >= conf_thres).squeeze()
+        #----------------------------------------------------------#
+        #   利用置信度进行第一轮筛选
+        #----------------------------------------------------------#
+        conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()

+        #----------------------------------------------------------#
+        #   根据置信度进行预测结果的筛选
+        #----------------------------------------------------------#
        image_pred = image_pred[conf_mask]
        class_conf = class_conf[conf_mask]
        class_pred = class_pred[conf_mask]
        if not image_pred.size(0):
            continue
-        # 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        #-------------------------------------------------------------------------#
+        #   detections  [batch_size, num_anchors, 7]
+        #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
+        #-------------------------------------------------------------------------#
        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)

-        # 获得种类
+        #------------------------------------------#
+        #   获得预测结果中包含的所有种类
+        #------------------------------------------#
        unique_labels = detections[:, -1].cpu().unique()

        if prediction.is_cuda:
@@ -230,7 +273,9 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
            detections = detections.cuda()

        for c in unique_labels:
-            # 获得某一类初步筛选后全部的预测结果
+            #------------------------------------------#
+            #   获得某一类得分筛选后全部的预测结果
+            #------------------------------------------#
            detections_class = detections[detections[:, -1] == c]

            #------------------------------------------#
@@ -238,7 +283,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
            #------------------------------------------#
            keep = nms(
                detections_class[:, :4],
-                detections_class[:, 4]*detections_class[:, 5],
+                detections_class[:, 4] * detections_class[:, 5],
                nms_thres
            )
            max_detections = detections_class[keep]
@@ -264,6 +309,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):

    return output

+
 def merge_bboxes(bboxes, cutx, cuty):
    merge_bbox = []
    for i in range(len(bboxes)):

--- a/video.py
+++ b/video.py
 #-------------------------------------#
-#       调用摄像头检测
+#   调用摄像头或者视频进行检测
+#   调用摄像头直接运行即可
+#   调用视频可以将cv2.VideoCapture()指定路径
+#   视频的保存并不难，可以百度一下看看
 #-------------------------------------#
-from yolo import YOLO
-from PIL import Image
-import numpy as np
-import cv2
 import time
-yolo = YOLO()
-# 调用摄像头
-capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")

+import cv2
+import numpy as np
+from PIL import Image
+
+from yolo import YOLO
+
+yolo = YOLO()
+#-------------------------------------#
+#   调用摄像头
+#   capture=cv2.VideoCapture("1.mp4")
+#-------------------------------------#
+capture=cv2.VideoCapture(0)
 fps = 0.0
 while(True):
    t1 = time.time()
@@ -19,10 +27,8 @@ while(True):
    frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    # 转变成Image
    frame = Image.fromarray(np.uint8(frame))
-
    # 进行检测
    frame = np.array(yolo.detect_image(frame))
-
    # RGBtoBGR满足opencv显示格式
    frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)

@@ -32,7 +38,6 @@ while(True):

    cv2.imshow("video",frame)

-
    c= cv2.waitKey(1) & 0xff 
    if c==27:
        capture.release()

--- a/voc_annotation.py
+++ b/voc_annotation.py
+#---------------------------------------------#
+#   运行前一定要修改classes
+#   如果生成的2007_train.txt里面没有目标信息
+#   那么就是因为classes没有设定正确
+#---------------------------------------------#
 import xml.etree.ElementTree as ET
 from os import getcwd


--- a/yolo.py
+++ b/yolo.py
 #-------------------------------------#
 #       创建YOLO类
 #-------------------------------------#
-import cv2
-import numpy as np
 import colorsys
 import os
+
+import cv2
+import numpy as np
 import torch
-import torch.nn as nn
-from nets.yolo4 import YoloBody
 import torch.backends.cudnn as cudnn
-from PIL import Image,ImageFont, ImageDraw
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
-from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
+
+from nets.yolo4 import YoloBody
+from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         non_max_suppression, yolo_correct_boxes)
+

 #--------------------------------------------#
 #   使用自己训练好的模型预测需要修改2个参数
 #   model_path和classes_path都需要修改！
+#   如果出现shape不匹配，一定要注意
+#   训练时的model_path和classes_path参数的修改
 #--------------------------------------------#
 class YOLO(object):
    _defaults = {
-        "model_path": 'model_data/yolo4_weights.pth',
-        "anchors_path": 'model_data/yolo_anchors.txt',
-        "classes_path": 'model_data/coco_classes.txt',
-        "model_image_size" : (416, 416, 3),
-        "confidence": 0.5,
-        "iou" : 0.3,
-        "cuda": True
+        "model_path"        : 'model_data/yolo4_weights.pth',
+        "anchors_path"      : 'model_data/yolo_anchors.txt',
+        "classes_path"      : 'model_data/coco_classes.txt',
+        "model_image_size"  : (416, 416, 3),
+        "confidence"        : 0.5,
+        "iou"               : 0.3,
+        "cuda"              : True
    }

    @classmethod
@@ -43,6 +49,7 @@ class YOLO(object):
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()
+
    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
@@ -64,25 +71,31 @@ class YOLO(object):
        return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]

    #---------------------------------------------------#
-    #   获得所有的分类
+    #   生成模型
    #---------------------------------------------------#
    def generate(self):
-        
-        self.net = YoloBody(len(self.anchors[0]),len(self.class_names)).eval()
-
-        # 加快模型训练的效率
+        #---------------------------------------------------#
+        #   建立yolov4模型
+        #---------------------------------------------------#
+        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()
+
+        #---------------------------------------------------#
+        #   载入yolov4模型的权重
+        #---------------------------------------------------#
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)
+        print('Finished!')
        
        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()
-    
-        print('Finished!')

+        #---------------------------------------------------#
+        #   建立三个特征层解码用的工具
+        #---------------------------------------------------#
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names),  (self.model_image_size[1], self.model_image_size[0])))
@@ -103,45 +116,65 @@ class YOLO(object):
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-        images = np.asarray(images)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]

        with torch.no_grad():
-            images = torch.from_numpy(images)
+            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
            outputs = self.net(images)
+            output_list = []
+            for i in range(3):
+                output_list.append(self.yolo_decodes[i](outputs[i]))
+
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
+            output = torch.cat(output_list, 1)
+            batch_detections = non_max_suppression(output, len(self.class_names),
+                                                    conf_thres=self.confidence,
+                                                    nms_thres=self.iou)
+
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try:
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
            
-        output_list = []
-        for i in range(3):
-            output_list.append(self.yolo_decodes[i](outputs[i]))
-        output = torch.cat(output_list, 1)
-        batch_detections = non_max_suppression(output, len(self.class_names),
-                                                conf_thres=self.confidence,
-                                                nms_thres=self.iou)
-        try:
-            batch_detections = batch_detections[0].cpu().numpy()
-        except:
-            return image
-            
-        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-        top_label = np.array(batch_detections[top_index,-1],np.int32)
-        top_bboxes = np.array(batch_detections[top_index,:4])
-        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-
-        # 去掉灰条
-        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

-        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
+        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
@@ -163,7 +196,7 @@ class YOLO(object):
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
-            print(label)
+            print(label, top, left, bottom, right)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])