diff --git a/FPS_test.py b/FPS_test.py
index 7f024e6cf6b12516ff39a20401b5076e86488657..105b43a95ba31c1fc7a46a6c45bd70817797363c 100644
--- a/FPS_test.py
+++ b/FPS_test.py
@@ -25,20 +25,20 @@ video.py里面测试的FPS会低于该FPS，因为摄像头的读取频率有限
 '''
 class FPS_YOLO(YOLO):
     def get_FPS(self, image, test_interval):
-        # 调整图片使其符合输入要求
         image_shape = np.array(np.shape(image)[0:2])
-
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
         crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
         photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-        images = np.asarray(images)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]
 
         with torch.no_grad():
-            images = torch.from_numpy(images)
+            images = torch.from_numpy(np.asarray(images))
             if self.cuda:
                 images = images.cuda()
             outputs = self.net(images)
diff --git a/VOCdevkit/VOC2007/voc2yolo3.py b/VOCdevkit/VOC2007/voc2yolo3.py
index 672eb48f75b95a74ed53433e3022597f94f0c6a4..c1eadf8a0d8c13e8f5f7a4712223e876b491787b 100644
--- a/VOCdevkit/VOC2007/voc2yolo3.py
+++ b/VOCdevkit/VOC2007/voc2yolo3.py
@@ -1,10 +1,18 @@
+
+#----------------------------------------------------------------------#
+#   验证集的划分在train.py代码里面进行
+#   test.txt和val.txt里面没有内容是正常的。训练不会使用到。
+#----------------------------------------------------------------------#
 import os
 import random 
-random.seed(0)
-
+ 
 xmlfilepath=r'./VOCdevkit/VOC2007/Annotations'
 saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/"
  
+#----------------------------------------------------------------------#
+#   想要增加测试集修改trainval_percent
+#   train_percent不需要修改
+#----------------------------------------------------------------------#
 trainval_percent=1
 train_percent=1
 
diff --git a/get_dr_txt.py b/get_dr_txt.py
index f92bcd0faea88b565f0d76f8cec54ee5a8b02e44..177d53d9d642bf75f1d00a812d1421f2a10c22d8 100644
--- a/get_dr_txt.py
+++ b/get_dr_txt.py
@@ -1,22 +1,26 @@
-#-------------------------------------#
-#       mAP所需文件计算代码
-#       具体教程请查看Bilibili
-#       Bubbliiiing
-#-------------------------------------#
-import cv2
-import numpy as np
+#----------------------------------------------------#
+#   获取测试集的detection-result和images-optional
+#   具体视频教程可查看
+#   https://www.bilibili.com/video/BV1zE411u7Vw
+#----------------------------------------------------#
 import colorsys
 import os
+
+import cv2
+import numpy as np
 import torch
-import torch.nn as nn
 import torch.backends.cudnn as cudnn
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
-from yolo import YOLO
+from tqdm import tqdm
+
 from nets.yolo3 import YoloBody
-from PIL import Image,ImageFont, ImageDraw
 from utils.config import Config
-from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
-from tqdm import tqdm
+from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         non_max_suppression, yolo_correct_boxes)
+from yolo import YOLO
+
 
 class mAP_Yolo(YOLO):
     #---------------------------------------------------#
@@ -28,40 +32,61 @@ class mAP_Yolo(YOLO):
         f = open("./input/detection-results/"+image_id+".txt","w") 
         image_shape = np.array(np.shape(image)[0:2])
 
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
         crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
         photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-
-        images = np.asarray(images)
-        images = torch.from_numpy(images)
-        if self.cuda:
-            images = images.cuda()
-        
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]
+
         with torch.no_grad():
+            images = torch.from_numpy(np.asarray(images))
+            if self.cuda:
+                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
             outputs = self.net(images)
             output_list = []
             for i in range(3):
                 output_list.append(self.yolo_decodes[i](outputs[i]))
+                
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
             output = torch.cat(output_list, 1)
             batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                     conf_thres=self.confidence,
                                                     nms_thres=self.iou)
-        try :
-            batch_detections = batch_detections[0].cpu().numpy()
-        except:
-            return image
-        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-        top_label = np.array(batch_detections[top_index,-1],np.int32)
-        top_bboxes = np.array(batch_detections[top_index,:4])
-        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-
-        # 去掉灰条
-        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                                                    
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try :
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
+
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
 
         for i, c in enumerate(top_label):
             predicted_class = self.class_names[c]
diff --git a/nets/darknet.py b/nets/darknet.py
index 7d02903c3f33eab3fa2a0493202071ddc6f1287b..71cb9b7575905b1707baaa8a2245d77b10af938a 100644
--- a/nets/darknet.py
+++ b/nets/darknet.py
@@ -1,9 +1,15 @@
-import torch
-import torch.nn as nn
 import math
 from collections import OrderedDict
 
-# 基本的darknet块
+import torch
+import torch.nn as nn
+
+
+#---------------------------------------------------------------------#
+#   残差结构
+#   利用一个1x1卷积下降通道数，然后利用一个3x3卷积提取特征并且上升通道数
+#   最后接上一个残差边
+#---------------------------------------------------------------------#
 class BasicBlock(nn.Module):
     def __init__(self, inplanes, planes):
         super(BasicBlock, self).__init__()
@@ -36,14 +42,20 @@ class DarkNet(nn.Module):
     def __init__(self, layers):
         super(DarkNet, self).__init__()
         self.inplanes = 32
+        # 416,416,3 -> 416,416,32
         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
         self.bn1 = nn.BatchNorm2d(self.inplanes)
         self.relu1 = nn.LeakyReLU(0.1)
 
+        # 416,416,32 -> 208,208,64
         self.layer1 = self._make_layer([32, 64], layers[0])
+        # 208,208,64 -> 104,104,128
         self.layer2 = self._make_layer([64, 128], layers[1])
+        # 104,104,128 -> 52,52,256
         self.layer3 = self._make_layer([128, 256], layers[2])
+        # 52,52,256 -> 26,26,512
         self.layer4 = self._make_layer([256, 512], layers[3])
+        # 26,26,512 -> 13,13,1024
         self.layer5 = self._make_layer([512, 1024], layers[4])
 
         self.layers_out_filters = [64, 128, 256, 512, 1024]
@@ -57,6 +69,10 @@ class DarkNet(nn.Module):
                 m.weight.data.fill_(1)
                 m.bias.data.zero_()
 
+    #---------------------------------------------------------------------#
+    #   在每一个layer里面，首先利用一个步长为2的3x3卷积进行下采样
+    #   然后进行残差结构的堆叠
+    #---------------------------------------------------------------------#
     def _make_layer(self, planes, blocks):
         layers = []
         # 下采样，步长为2，卷积核大小为3
@@ -64,7 +80,7 @@ class DarkNet(nn.Module):
                                 stride=2, padding=1, bias=False)))
         layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
         layers.append(("ds_relu", nn.LeakyReLU(0.1)))
-        # 加入darknet模块   
+        # 加入残差结构
         self.inplanes = planes[1]
         for i in range(0, blocks):
             layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
diff --git a/nets/yolo3.py b/nets/yolo3.py
index a7aa43bc21c07fb2a7e278f4de12a2362760380a..6916db5e97dcf73dc4a2cc7063df1c1b59bd5c2d 100644
--- a/nets/yolo3.py
+++ b/nets/yolo3.py
@@ -1,8 +1,11 @@
+from collections import OrderedDict
+
 import torch
 import torch.nn as nn
-from collections import OrderedDict
+
 from nets.darknet import darknet53
 
+
 def conv2d(filter_in, filter_out, kernel_size):
     pad = (kernel_size - 1) // 2 if kernel_size else 0
     return nn.Sequential(OrderedDict([
@@ -11,6 +14,10 @@ def conv2d(filter_in, filter_out, kernel_size):
         ("relu", nn.LeakyReLU(0.1)),
     ]))
 
+#------------------------------------------------------------------------#
+#   make_last_layers里面一共有七个卷积，前五个用于提取特征。
+#   后两个用于获得yolo网络的预测结果
+#------------------------------------------------------------------------#
 def make_last_layers(filters_list, in_filters, out_filter):
     m = nn.ModuleList([
         conv2d(in_filters, filters_list[0], 1),
@@ -28,21 +35,30 @@ class YoloBody(nn.Module):
     def __init__(self, config):
         super(YoloBody, self).__init__()
         self.config = config
-        #  backbone
+        #---------------------------------------------------#   
+        #   生成darknet53的主干模型
+        #   获得三个有效特征层，他们的shape分别是：
+        #   13,13,256
+        #   26,26,512
+        #   13,13,1024
+        #---------------------------------------------------#
         self.backbone = darknet53(None)
 
+        # out_filters : [64, 128, 256, 512, 1024]
         out_filters = self.backbone.layers_out_filters
-        #  last_layer0
+
+        #------------------------------------------------------------------------#
+        #   计算yolo_head的输出通道数，对于voc数据集而言
+        #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
+        #------------------------------------------------------------------------#
         final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
         self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)
 
-        #  embedding1
         final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
         self.last_layer1_conv = conv2d(512, 256, 1)
         self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
         self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)
 
-        #  embedding2
         final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
         self.last_layer2_conv = conv2d(256, 128, 1)
         self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
@@ -56,21 +72,43 @@ class YoloBody(nn.Module):
                 if i == 4:
                     out_branch = layer_in
             return layer_in, out_branch
-        #  backbone
+        #---------------------------------------------------#   
+        #   获得三个有效特征层，他们的shape分别是：
+        #   13,13,256；26,26,512；13,13,1024
+        #---------------------------------------------------#
         x2, x1, x0 = self.backbone(x)
-        #  yolo branch 0
+
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   out0 = (batch_size,255,13,13)
+        #---------------------------------------------------#
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
         out0, out0_branch = _branch(self.last_layer0, x0)
 
-        #  yolo branch 1
+        # 13,13,512 -> 13,13,256 -> 26,26,256
         x1_in = self.last_layer1_conv(out0_branch)
         x1_in = self.last_layer1_upsample(x1_in)
+
+        # 26,26,256 + 26,26,512 -> 26,26,768
         x1_in = torch.cat([x1_in, x1], 1)
+        #---------------------------------------------------#
+        #   第二个特征层
+        #   out1 = (batch_size,255,26,26)
+        #---------------------------------------------------#
+        # 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
         out1, out1_branch = _branch(self.last_layer1, x1_in)
 
-        #  yolo branch 2
+        # 26,26,256 -> 26,26,128 -> 52,52,128
         x2_in = self.last_layer2_conv(out1_branch)
         x2_in = self.last_layer2_upsample(x2_in)
+
+        # 52,52,128 + 52,52,256 -> 52,52,384
         x2_in = torch.cat([x2_in, x2], 1)
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   out3 = (batch_size,255,52,52)
+        #---------------------------------------------------#
+        # 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
         out2, _ = _branch(self.last_layer2, x2_in)
         return out0, out1, out2
 
diff --git a/nets/yolo_training.py b/nets/yolo_training.py
index f9e97a5f1b35141dce62dd20f8d05b5da3d1433e..6bc9610d6854726b03be9c655f83b8bf53c45061 100644
--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
@@ -1,17 +1,21 @@
-import cv2
+import math
 from random import shuffle
+
+import cv2
 import numpy as np
 import torch
 import torch.nn as nn
-import math
 import torch.nn.functional as F
-from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
+from matplotlib.colors import hsv_to_rgb, rgb_to_hsv
 from PIL import Image
 from utils.utils import bbox_iou
 
+
 def jaccard(_box_a, _box_b):
+    # 计算真实框的左上角和右下角
     b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
     b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
+    # 计算先验框的左上角和右下角
     b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
     b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
     box_a = torch.zeros_like(_box_a)
@@ -53,12 +57,21 @@ def BCELoss(pred,target):
     return output
 
 class YOLOLoss(nn.Module):
-    def __init__(self, anchors, num_classes, img_size, cuda):
+    def __init__(self, anchors, num_classes, img_size, cuda, normalize):
         super(YOLOLoss, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
+        #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
+        #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
+        #-----------------------------------------------------------#
         self.anchors = anchors
         self.num_anchors = len(anchors)
         self.num_classes = num_classes
         self.bbox_attrs = 5 + num_classes
+        #-------------------------------------#
+        #   获得特征层的宽高
+        #   13、26、52
+        #-------------------------------------#
         self.feature_length = [img_size[0]//32,img_size[0]//16,img_size[0]//8]
         self.img_size = img_size
 
@@ -68,60 +81,103 @@ class YOLOLoss(nn.Module):
         self.lambda_conf = 1.0
         self.lambda_cls = 1.0
         self.cuda = cuda
+        self.normalize = normalize
 
     def forward(self, input, targets=None):
-        # input为bs,3*(5+num_classes),13,13
+        #----------------------------------------------------#
+        #   input的shape为  bs, 3*(5+num_classes), 13, 13
+        #                   bs, 3*(5+num_classes), 26, 26
+        #                   bs, 3*(5+num_classes), 52, 52
+        #----------------------------------------------------#
         
-        # 一共多少张图片
+        #-----------------------#
+        #   一共多少张图片
+        #-----------------------#
         bs = input.size(0)
-        # 特征层的高
+        #-----------------------#
+        #   特征层的高
+        #-----------------------#
         in_h = input.size(2)
-        # 特征层的宽
+        #-----------------------#
+        #   特征层的宽
+        #-----------------------#
         in_w = input.size(3)
 
-        # 计算步长
-        # 每一个特征点对应原来的图片上多少个像素点
-        # 如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
+        #-----------------------------------------------------------------------#
+        #   计算步长
+        #   每一个特征点对应原来的图片上多少个像素点
+        #   如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
+        #   如果特征层为26x26的话，一个特征点就对应原来的图片上的16个像素点
+        #   如果特征层为52x52的话，一个特征点就对应原来的图片上的8个像素点
+        #   stride_h = stride_w = 32、16、8
+        #-----------------------------------------------------------------------#
         stride_h = self.img_size[1] / in_h
         stride_w = self.img_size[0] / in_w
 
-        # 把先验框的尺寸调整成特征层大小的形式
-        # 计算出先验框在特征层上对应的宽高
+        #-------------------------------------------------#
+        #   此时获得的scaled_anchors大小是相对于特征层的
+        #-------------------------------------------------#
         scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
         
-        # bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 3, 13, 13, 5 + num_classes
+        #   batch_size, 3, 26, 26, 5 + num_classes
+        #   batch_size, 3, 52, 52, 5 + num_classes
+        #-----------------------------------------------#
         prediction = input.view(bs, int(self.num_anchors/3),
                                 self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
         
-        # 对prediction预测进行调整
-        x = torch.sigmoid(prediction[..., 0])  # Center x
-        y = torch.sigmoid(prediction[..., 1])  # Center y
-        w = prediction[..., 2]  # Width
-        h = prediction[..., 3]  # Height
-        conf = torch.sigmoid(prediction[..., 4])  # Conf
-        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
-
-        # 找到哪些先验框内部包含物体
+        # 先验框的中心位置的调整参数
+        x = torch.sigmoid(prediction[..., 0])
+        y = torch.sigmoid(prediction[..., 1])
+        # 先验框的宽高调整参数
+        w = prediction[..., 2]
+        h = prediction[..., 3]
+        # 获得置信度，是否有物体
+        conf = torch.sigmoid(prediction[..., 4])
+        # 种类置信度
+        pred_cls = torch.sigmoid(prediction[..., 5:])
+
+        #---------------------------------------------------------------#
+        #   找到哪些先验框内部包含物体
+        #   利用真实框和先验框计算交并比
+        #   mask        batch_size, 3, in_h, in_w   无目标的特征点
+        #   noobj_mask  batch_size, 3, in_h, in_w   有目标的特征点
+        #   tx          batch_size, 3, in_h, in_w   中心x偏移情况
+        #   ty          batch_size, 3, in_h, in_w   中心y偏移情况
+        #   tw          batch_size, 3, in_h, in_w   宽高调整参数的真实值
+        #   th          batch_size, 3, in_h, in_w   宽高调整参数的真实值
+        #   tconf       batch_size, 3, in_h, in_w   置信度真实值
+        #   tcls        batch_size, 3, in_h, in_w, num_classes  种类真实值
+        #----------------------------------------------------------------#
         mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y =\
                                                                             self.get_target(targets, scaled_anchors,
                                                                                             in_w, in_h,
                                                                                             self.ignore_threshold)
 
+        #---------------------------------------------------------------#
+        #   将预测结果进行解码，判断预测结果和真实值的重合程度
+        #   如果重合程度过大则忽略，因为这些特征点属于预测比较准确的特征点
+        #   作为负样本不合适
+        #----------------------------------------------------------------#
         noobj_mask = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask)
+
         if self.cuda:
             box_loss_scale_x = (box_loss_scale_x).cuda()
             box_loss_scale_y = (box_loss_scale_y).cuda()
             mask, noobj_mask = mask.cuda(), noobj_mask.cuda()
             tx, ty, tw, th = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda()
             tconf, tcls = tconf.cuda(), tcls.cuda()
-        box_loss_scale = 2 - box_loss_scale_x*box_loss_scale_y
+        box_loss_scale = 2 - box_loss_scale_x * box_loss_scale_y
         
-        #  losses.
+        # 计算中心偏移情况的loss，使用BCELoss效果好一些
         loss_x = torch.sum(BCELoss(x, tx) / bs * box_loss_scale * mask)
         loss_y = torch.sum(BCELoss(y, ty) / bs * box_loss_scale * mask)
+        # 计算宽高调整值的loss
         loss_w = torch.sum(MSELoss(w, tw) / bs * 0.5 * box_loss_scale * mask)
         loss_h = torch.sum(MSELoss(h, th) / bs * 0.5 * box_loss_scale * mask)
-
+        # 计算置信度的loss
         loss_conf = torch.sum(BCELoss(conf, mask) * mask / bs) + \
                     torch.sum(BCELoss(conf, mask) * noobj_mask / bs)
                     
@@ -130,19 +186,30 @@ class YOLOLoss(nn.Module):
         loss = loss_x * self.lambda_xy + loss_y * self.lambda_xy + \
                 loss_w * self.lambda_wh + loss_h * self.lambda_wh + \
                 loss_conf * self.lambda_conf + loss_cls * self.lambda_cls
+
         # print(loss, loss_x.item() + loss_y.item(), loss_w.item() + loss_h.item(), 
         #         loss_conf.item(), loss_cls.item(), \
         #         torch.sum(mask),torch.sum(noobj_mask))
-        return loss, loss_x.item(), loss_y.item(), loss_w.item(), \
-                loss_h.item(), loss_conf.item(), loss_cls.item()
+        if self.normalize:
+            num_pos = torch.sum(mask)
+            num_pos = torch.max(num_pos, torch.ones_like(num_pos))
+        else:
+            num_pos = bs
+        return loss, num_pos
 
     def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
-        # 计算一共有多少张图片
+        #-----------------------------------------------------#
+        #   计算一共有多少张图片
+        #-----------------------------------------------------#
         bs = len(target)
-        # 获得先验框
+        #-------------------------------------------------------#
+        #   获得当前特征层先验框所属的编号，方便后面对先验框筛选
+        #-------------------------------------------------------#
         anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
         subtract_index = [0,3,6][self.feature_length.index(in_w)]
-        # 创建全是0或者全是1的阵列
+        #-------------------------------------------------------#
+        #   创建全是0或者全是1的阵列
+        #-------------------------------------------------------#
         mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
         noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
 
@@ -158,55 +225,96 @@ class YOLOLoss(nn.Module):
         for b in range(bs):            
             if len(target[b])==0:
                 continue
-            # 计算出在特征层上的点位
+            #-------------------------------------------------------#
+            #   计算出正样本在特征层上的中心点
+            #-------------------------------------------------------#
             gxs = target[b][:, 0:1] * in_w
             gys = target[b][:, 1:2] * in_h
             
+            #-------------------------------------------------------#
+            #   计算出正样本相对于特征层的宽高
+            #-------------------------------------------------------#
             gws = target[b][:, 2:3] * in_w
             ghs = target[b][:, 3:4] * in_h
 
-            # 计算出属于哪个网格
+            #-------------------------------------------------------#
+            #   计算出正样本属于特征层的哪个特征点
+            #-------------------------------------------------------#
             gis = torch.floor(gxs)
             gjs = torch.floor(gys)
             
-            # 计算真实框的位置
+            #-------------------------------------------------------#
+            #   将真实框转换一个形式
+            #   num_true_box, 4
+            #-------------------------------------------------------#
             gt_box = torch.FloatTensor(torch.cat([torch.zeros_like(gws), torch.zeros_like(ghs), gws, ghs], 1))
             
-            # 计算出所有先验框的位置
+            #-------------------------------------------------------#
+            #   将先验框转换一个形式
+            #   9, 4
+            #-------------------------------------------------------#
             anchor_shapes = torch.FloatTensor(torch.cat((torch.zeros((self.num_anchors, 2)), torch.FloatTensor(anchors)), 1))
-            # 计算重合程度
+            #-------------------------------------------------------#
+            #   计算交并比
+            #   num_true_box, 9
+            #-------------------------------------------------------#
             anch_ious = jaccard(gt_box, anchor_shapes)
 
-            # Find the best matching anchor box
+            #-------------------------------------------------------#
+            #   计算重合度最大的先验框是哪个
+            #   num_true_box, 
+            #-------------------------------------------------------#
             best_ns = torch.argmax(anch_ious,dim=-1)
             for i, best_n in enumerate(best_ns):
                 if best_n not in anchor_index:
                     continue
-                # Masks
+                #-------------------------------------------------------------#
+                #   取出各类坐标：
+                #   gi和gj代表的是真实框对应的特征点的x轴y轴坐标
+                #   gx和gy代表真实框的x轴和y轴坐标
+                #   gw和gh代表真实框的宽和高
+                #-------------------------------------------------------------#
                 gi = gis[i].long()
                 gj = gjs[i].long()
                 gx = gxs[i]
                 gy = gys[i]
                 gw = gws[i]
                 gh = ghs[i]
-                # Masks
+
                 if (gj < in_h) and (gi < in_w):
                     best_n = best_n - subtract_index
-                    # 判定哪些先验框内部真实的存在物体
+
+                    #----------------------------------------#
+                    #   noobj_mask代表无目标的特征点
+                    #----------------------------------------#
                     noobj_mask[b, best_n, gj, gi] = 0
+                    #----------------------------------------#
+                    #   mask代表有目标的特征点
+                    #----------------------------------------#
                     mask[b, best_n, gj, gi] = 1
-                    # 计算先验框中心调整参数
+                    #----------------------------------------#
+                    #   tx、ty代表中心调整参数的真实值
+                    #----------------------------------------#
                     tx[b, best_n, gj, gi] = gx - gi.float()
                     ty[b, best_n, gj, gi] = gy - gj.float()
-                    # 计算先验框宽高调整参数
+                    #----------------------------------------#
+                    #   tw、th代表宽高调整参数的真实值
+                    #----------------------------------------#
                     tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n+subtract_index][0])
                     th[b, best_n, gj, gi] = math.log(gh / anchors[best_n+subtract_index][1])
-                    # 用于获得xywh的比例
+                    #----------------------------------------#
+                    #   用于获得xywh的比例
+                    #   大目标loss权重小，小目标loss权重大
+                    #----------------------------------------#
                     box_loss_scale_x[b, best_n, gj, gi] = target[b][i, 2]
                     box_loss_scale_y[b, best_n, gj, gi] = target[b][i, 3]
-                    # 物体置信度
+                    #----------------------------------------#
+                    #   tconf代表物体置信度
+                    #----------------------------------------#
                     tconf[b, best_n, gj, gi] = 1
-                    # 种类
+                    #----------------------------------------#
+                    #   tcls代表种类置信度
+                    #----------------------------------------#
                     tcls[b, best_n, gj, gi, int(target[b][i, 4])] = 1
                 else:
                     print('Step {0} out of bound'.format(b))
@@ -216,10 +324,16 @@ class YOLOLoss(nn.Module):
         return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
 
     def get_ignore(self,prediction,target,scaled_anchors,in_w, in_h,noobj_mask):
+        #-----------------------------------------------------#
+        #   计算一共有多少张图片
+        #-----------------------------------------------------#
         bs = len(target)
+        #-------------------------------------------------------#
+        #   获得当前特征层先验框所属的编号，方便后面对先验框筛选
+        #-------------------------------------------------------#
         anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
         scaled_anchors = np.array(scaled_anchors)[anchor_index]
-        # print(scaled_anchors)
+
         # 先验框的中心位置的调整参数
         x = torch.sigmoid(prediction[..., 0])  
         y = torch.sigmoid(prediction[..., 1])
@@ -243,7 +357,9 @@ class YOLOLoss(nn.Module):
         anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
         anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
         
-        # 计算调整后的先验框中心与宽高
+        #-------------------------------------------------------#
+        #   计算调整后的先验框中心与宽高
+        #-------------------------------------------------------#
         pred_boxes = FloatTensor(prediction[..., :4].shape)
         pred_boxes[..., 0] = x.data + grid_x
         pred_boxes[..., 1] = y.data + grid_y
@@ -252,7 +368,15 @@ class YOLOLoss(nn.Module):
 
         for i in range(bs):
             pred_boxes_for_ignore = pred_boxes[i]
+            #-------------------------------------------------------#
+            #   将预测结果转换一个形式
+            #   pred_boxes_for_ignore      num_anchors, 4
+            #-------------------------------------------------------#
             pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4)
+            #-------------------------------------------------------#
+            #   计算真实框，并把真实框转换成相对于特征层的大小
+            #   gt_box      num_true_box, 4
+            #-------------------------------------------------------#
             if len(target[i]) > 0:
                 gx = target[i][:, 0:1] * in_w
                 gy = target[i][:, 1:2] * in_h
@@ -260,11 +384,18 @@ class YOLOLoss(nn.Module):
                 gh = target[i][:, 3:4] * in_h
                 gt_box = torch.FloatTensor(torch.cat([gx, gy, gw, gh],-1)).type(FloatTensor)
 
+                #-------------------------------------------------------#
+                #   计算交并比
+                #   anch_ious       num_true_box, num_anchors
+                #-------------------------------------------------------#
                 anch_ious = jaccard(gt_box, pred_boxes_for_ignore)
+                #-------------------------------------------------------#
+                #   每个先验框对应真实框的最大重合度
+                #   anch_ious_max   num_anchors
+                #-------------------------------------------------------#
                 anch_ious_max, _ = torch.max(anch_ious,dim=0)
                 anch_ious_max = anch_ious_max.view(pred_boxes[i].size()[:3])
                 noobj_mask[i][anch_ious_max>self.ignore_threshold] = 0
-                # print(torch.max(anch_ious))
         return noobj_mask
 
 
@@ -282,7 +413,7 @@ class Generator(object):
         self.train_batches = len(train_lines)
         self.image_size = image_size
         
-    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
         '''r实时数据增强的随机预处理'''
         line = annotation_line.split()
         image = Image.open(line[0])
@@ -290,6 +421,35 @@ class Generator(object):
         h, w = input_shape
         box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 
+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            image = image.resize((nw,nh), Image.BICUBIC)
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image, np.float32)
+
+            # 调整目标框坐标
+            box_data = np.zeros((len(box), 5))
+            if len(box) > 0:
+                np.random.shuffle(box)
+                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
+                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+                box[:, 0:2][box[:, 0:2] < 0] = 0
+                box[:, 2][box[:, 2] > w] = w
+                box[:, 3][box[:, 3] > h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
+                box_data = np.zeros((len(box), 5))
+                box_data[:len(box)] = box
+
+            return image_data, box_data
+
         # resize image
         new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
         scale = rand(.25, 2)
@@ -342,13 +502,8 @@ class Generator(object):
             box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
             box_data = np.zeros((len(box),5))
             box_data[:len(box)] = box
-        if len(box) == 0:
-            return image_data, []
 
-        if (box_data[:,:4]>0).any():
-            return image_data, box_data
-        else:
-            return image_data, []
+        return image_data, box_data
 
     def generate(self, train=True):
         while True:
@@ -357,7 +512,10 @@ class Generator(object):
             inputs = []
             targets = []
             for annotation_line in lines:  
-                img,y=self.get_random_data(annotation_line,self.image_size[0:2])
+                if train:
+                    img,y=self.get_random_data(annotation_line, self.image_size[0:2])
+                else:
+                    img,y=self.get_random_data(annotation_line, self.image_size[0:2], False)
 
                 if len(y)!=0:
                     boxes = np.array(y[:,:4],dtype=np.float32)
@@ -373,6 +531,7 @@ class Generator(object):
                     boxes[:,0] = boxes[:,0] + boxes[:,2]/2
                     boxes[:,1] = boxes[:,1] + boxes[:,3]/2
                     y = np.concatenate([boxes,y[:,-1:]],axis=-1)
+                    
                 img = np.array(img,dtype = np.float32)
 
                 inputs.append(np.transpose(img/255.0,(2,0,1)))                  
diff --git a/predict.py b/predict.py
index 07c7406a3cab29426d9621d755bca0d0c7c78b50..9dceed406946f7a60aee9b9a45f7e4ba58540c9c 100644
--- a/predict.py
+++ b/predict.py
@@ -1,9 +1,14 @@
-#-------------------------------------#
-#       对单张图片进行预测
-#-------------------------------------#
-from yolo import YOLO
+'''
+predict.py有几个注意点
+1、无法进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
+2、如果想要保存，利用r_image.save("img.jpg")即可保存。
+3、如果想要获得框的坐标，可以进入detect_image函数，读取top,left,bottom,right这四个值。
+4、如果想要截取下目标，可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
+'''
 from PIL import Image
 
+from yolo import YOLO
+
 yolo = YOLO()
 
 while True:
diff --git a/test.py b/test.py
index cc50ad7f4191073ea83064e2d14f3c6418846dff..999fc487982e1eef2b6aac75b6a2e175baf99ac2 100644
--- a/test.py
+++ b/test.py
@@ -5,6 +5,7 @@
 #--------------------------------------------#
 import torch
 from torchsummary import summary
+
 from nets.yolo3 import YoloBody
 from utils.config import Config
 
diff --git a/train.py b/train.py
index 44e6521547531124527be2ba38ff03a9c230d9d2..36d50bff21dcde29bf377fc51f921cc81b8a734b 100644
--- a/train.py
+++ b/train.py
@@ -2,21 +2,24 @@
 #       对数据集进行训练
 #-------------------------------------#
 import os
-import numpy as np
 import time
+
+import numpy as np
 import torch
-from torch.autograd import Variable
+import torch.backends.cudnn as cudnn
 import torch.nn as nn
-import torch.optim as optim
 import torch.nn.functional as F
-import torch.backends.cudnn as cudnn
-from utils.config import Config
+import torch.optim as optim
+from torch.autograd import Variable
 from torch.utils.data import DataLoader
-from utils.dataloader import yolo_dataset_collate, YoloDataset
-from nets.yolo_training import YOLOLoss,Generator
-from nets.yolo3 import YoloBody
 from tqdm import tqdm
 
+from nets.yolo3 import YoloBody
+from nets.yolo_training import Generator, YOLOLoss
+from utils.config import Config
+from utils.dataloader import YoloDataset, yolo_dataset_collate
+
+
 def get_lr(optimizer):
     for param_group in optimizer.param_groups:
         return param_group['lr']
@@ -24,7 +27,8 @@ def get_lr(optimizer):
 def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
     total_loss = 0
     val_loss = 0
-    start_time = time.time()
+
+    net.train()
     with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
         for iteration, batch in enumerate(gen):
             if iteration >= epoch_size:
@@ -37,25 +41,38 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                 else:
                     images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                     targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+
+            #----------------------#
+            #   清零梯度
+            #----------------------#
             optimizer.zero_grad()
+            #----------------------#
+            #   前向传播
+            #----------------------#
             outputs = net(images)
             losses = []
+            num_pos_all = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
             for i in range(3):
-                loss_item = yolo_losses[i](outputs[i], targets)
-                losses.append(loss_item[0])
-            loss = sum(losses)
+                loss_item, num_pos = yolo_losses[i](outputs[i], targets)
+                losses.append(loss_item)
+                num_pos_all += num_pos
+
+            loss = sum(losses) / num_pos
+            #----------------------#
+            #   反向传播
+            #----------------------#
             loss.backward()
             optimizer.step()
 
-            total_loss += loss
-            waste_time = time.time() - start_time
+            total_loss += loss.item()
             
-            pbar.set_postfix(**{'total_loss': total_loss.item() / (iteration + 1), 
-                                'lr'        : get_lr(optimizer),
-                                'step/s'    : waste_time})
+            pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), 
+                                'lr'        : get_lr(optimizer)})
             pbar.update(1)
 
-            start_time = time.time()
     net.eval()
     print('Start Validation')
     with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
@@ -74,14 +91,15 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                 optimizer.zero_grad()
                 outputs = net(images_val)
                 losses = []
+                num_pos_all = 0
                 for i in range(3):
-                    loss_item = yolo_losses[i](outputs[i], targets_val)
-                    losses.append(loss_item[0])
-                loss = sum(losses)
-                val_loss += loss
-            pbar.set_postfix(**{'total_loss': val_loss.item() / (iteration + 1)})
+                    loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
+                    losses.append(loss_item)
+                    num_pos_all += num_pos
+                loss = sum(losses) / num_pos
+                val_loss += loss.item()
+            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
             pbar.update(1)
-    net.train()
     print('Finish Validation')
     print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
     print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
@@ -94,22 +112,33 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
 #   https://www.bilibili.com/video/BV1zE411u7Vw
 #----------------------------------------------------#
 if __name__ == "__main__":
-    # 参数初始化
-    annotation_path = '2007_train.txt'
-    model = YoloBody(Config)
+    #-------------------------------#
+    #   是否使用Cuda
+    #   没有GPU可以设置成False
+    #-------------------------------#
     Cuda = True
     #-------------------------------#
     #   Dataloder的使用
     #-------------------------------#
     Use_Data_Loader = True
+    #------------------------------------------------------#
+    #   是否对损失进行归一化
+    #------------------------------------------------------#
+    normalize = True
+    #------------------------------------------------------#
+    #   创建yolo模型
+    #   训练前一定要修改Config里面的classes参数
+    #------------------------------------------------------#
+    model = YoloBody(Config)
 
-    #-------------------------------------------#
-    #   权值文件的下载请看README
-    #-------------------------------------------#
+    #------------------------------------------------------#
+    #   权值文件请看README，百度网盘下载
+    #------------------------------------------------------#
+    model_path = "model_data/yolo_weights.pth"
     print('Loading weights into state dict...')
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model_dict = model.state_dict()
-    pretrained_dict = torch.load("model_data/yolo_weights.pth", map_location=device)
+    pretrained_dict = torch.load(model_path, map_location=device)
     pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
     model_dict.update(pretrained_dict)
     model.load_state_dict(model_dict)
@@ -126,9 +155,17 @@ if __name__ == "__main__":
     yolo_losses = []
     for i in range(3):
         yolo_losses.append(YOLOLoss(np.reshape(Config["yolo"]["anchors"],[-1,2]),
-                                    Config["yolo"]["classes"], (Config["img_w"], Config["img_h"]), Cuda))
+                                    Config["yolo"]["classes"], (Config["img_w"], Config["img_h"]), Cuda, normalize))
 
-    # 0.1用于验证，0.9用于训练
+    #----------------------------------------------------#
+    #   获得图片路径和标签
+    #----------------------------------------------------#
+    annotation_path = '2007_train.txt'
+    #----------------------------------------------------------------------#
+    #   验证集的划分在train.py代码里面进行
+    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
+    #   当前划分方式下，验证集和训练集的比例为1:9
+    #----------------------------------------------------------------------#
     val_split = 0.1
     with open(annotation_path) as f:
         lines = f.readlines()
@@ -138,17 +175,15 @@ if __name__ == "__main__":
     num_val = int(len(lines)*val_split)
     num_train = len(lines) - num_val
     
-
     #------------------------------------------------------#
     #   主干特征提取网络特征通用，冻结训练可以加快训练速度
     #   也可以在训练初期防止权值被破坏。
     #   Init_Epoch为起始世代
     #   Freeze_Epoch为冻结训练的世代
-    #   Epoch总训练世代
+    #   Unfreeze_Epoch总训练世代
     #   提示OOM或者显存不足请调小Batch_size
     #------------------------------------------------------#
     if True:
-        # 最开始使用1e-3的学习率可以收敛的更快
         lr = 1e-3
         Batch_size = 8
         Init_Epoch = 0
@@ -158,17 +193,17 @@ if __name__ == "__main__":
         lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
 
         if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]))
-            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]))
+            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True)
+            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False)
             gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                     drop_last=True, collate_fn=yolo_dataset_collate)
             gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                     drop_last=True, collate_fn=yolo_dataset_collate)
         else:
             gen = Generator(Batch_size, lines[:num_train],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(True)
             gen_val = Generator(Batch_size, lines[num_train:],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(False)
                         
         epoch_size = num_train//Batch_size
         epoch_size_val = num_val//Batch_size
@@ -190,18 +225,19 @@ if __name__ == "__main__":
 
         optimizer = optim.Adam(net.parameters(),lr)
         lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
+        
         if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]))
-            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]))
+            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True)
+            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False)
             gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                     drop_last=True, collate_fn=yolo_dataset_collate)
             gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                     drop_last=True, collate_fn=yolo_dataset_collate)
         else:
             gen = Generator(Batch_size, lines[:num_train],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(True)
             gen_val = Generator(Batch_size, lines[num_train:],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(False)
                         
         epoch_size = num_train//Batch_size
         epoch_size_val = num_val//Batch_size
diff --git a/utils/config.py b/utils/config.py
index 41656d1e3e7c81821c0c20ac7dd13bfdb125e0a6..460beca4c37eb93ae8189629a87d5712bed52cd1 100644
--- a/utils/config.py
+++ b/utils/config.py
@@ -1,11 +1,19 @@
 Config = \
-{
+{   
+    #-------------------------------------------------------------#
+    #   训练前一定要修改classes参数
+    #   anchors可以不修改，因为anchors的通用性较大
+    #   而且大中小的设置非常符合yolo的特征层情况
+    #-------------------------------------------------------------#
     "yolo": {
         "anchors": [[[116, 90], [156, 198], [373, 326]],
                     [[30, 61], [62, 45], [59, 119]],
                     [[10, 13], [16, 30], [33, 23]]],
         "classes": 20,
     },
+    #-------------------------------------------------------------#
+    #   img_h和img_w可以修改成608x608
+    #-------------------------------------------------------------#
     "img_h": 416,
     "img_w": 416,
 }
diff --git a/utils/dataloader.py b/utils/dataloader.py
index 093c951bc7b44dcb93f13d52226eb1d1ef0ab2c6..398a47990bd4c44bc16e4927d2dbe5b10d52c94f 100644
--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -13,12 +13,13 @@ from nets.yolo_training import Generator
 import cv2
 
 class YoloDataset(Dataset):
-    def __init__(self, train_lines, image_size):
+    def __init__(self, train_lines, image_size, is_train):
         super(YoloDataset, self).__init__()
 
         self.train_lines = train_lines
         self.train_batches = len(train_lines)
         self.image_size = image_size
+        self.is_train = is_train
 
     def __len__(self):
         return self.train_batches
@@ -26,7 +27,7 @@ class YoloDataset(Dataset):
     def rand(self, a=0, b=1):
         return np.random.rand() * (b - a) + a
 
-    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
         """实时数据增强的随机预处理"""
         line = annotation_line.split()
         image = Image.open(line[0])
@@ -34,6 +35,35 @@ class YoloDataset(Dataset):
         h, w = input_shape
         box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
 
+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            image = image.resize((nw,nh), Image.BICUBIC)
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image, np.float32)
+
+            # 调整目标框坐标
+            box_data = np.zeros((len(box), 5))
+            if len(box) > 0:
+                np.random.shuffle(box)
+                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
+                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+                box[:, 0:2][box[:, 0:2] < 0] = 0
+                box[:, 2][box[:, 2] > w] = w
+                box[:, 3][box[:, 3] > h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
+                box_data = np.zeros((len(box), 5))
+                box_data[:len(box)] = box
+
+            return image_data, box_data
+            
         # 调整图片大小
         new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
         scale = self.rand(.25, 2)
@@ -48,8 +78,7 @@ class YoloDataset(Dataset):
         # 放置图片
         dx = int(self.rand(0, w - nw))
         dy = int(self.rand(0, h - nh))
-        new_image = Image.new('RGB', (w, h),
-                              (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
+        new_image = Image.new('RGB', (w, h), (128, 128, 128))
         new_image.paste(image, (dx, dy))
         image = new_image
 
@@ -89,19 +118,18 @@ class YoloDataset(Dataset):
             box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
             box_data = np.zeros((len(box), 5))
             box_data[:len(box)] = box
-        if len(box) == 0:
-            return image_data, []
-
-        if (box_data[:, :4] > 0).any():
-            return image_data, box_data
-        else:
-            return image_data, []
+            
+        return image_data, box_data
 
     def __getitem__(self, index):
         lines = self.train_lines
         n = self.train_batches
         index = index % n
-        img, y = self.get_random_data(lines[index], self.image_size[0:2])
+        if self.is_train:
+            img, y = self.get_random_data(lines[index], self.image_size[0:2])
+        else:
+            img, y = self.get_random_data(lines[index], self.image_size[0:2], False)
+
         if len(y) != 0:
             # 从坐标转换成0~1的百分比
             boxes = np.array(y[:, :4], dtype=np.float32)
diff --git a/utils/utils.py b/utils/utils.py
index e67dcc3f89a6c47707a2e27e6c00a0fee1f59d90..2055a8fb0316a7856bbe8c9f4e852dabf1a580aa 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,18 +1,26 @@
 from __future__ import division
-import os
+
 import math
+import os
 import time
+
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
 from torchvision.ops import nms
-from PIL import Image, ImageDraw, ImageFont
+
 
 class DecodeBox(nn.Module):
     def __init__(self, anchors, num_classes, img_size):
         super(DecodeBox, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
+        #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
+        #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
+        #-----------------------------------------------------------#
         self.anchors = anchors
         self.num_anchors = len(anchors)
         self.num_classes = num_classes
@@ -20,17 +28,33 @@ class DecodeBox(nn.Module):
         self.img_size = img_size
 
     def forward(self, input):
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 255, 13, 13
+        #   batch_size, 255, 26, 26
+        #   batch_size, 255, 52, 52
+        #-----------------------------------------------#
         batch_size = input.size(0)
         input_height = input.size(2)
         input_width = input.size(3)
 
-        # 计算步长
+        #-----------------------------------------------#
+        #   输入为416x416时
+        #   stride_h = stride_w = 32、16、8
+        #-----------------------------------------------#
         stride_h = self.img_size[1] / input_height
         stride_w = self.img_size[0] / input_width
-        # 归一到特征层上
+        #-------------------------------------------------#
+        #   此时获得的scaled_anchors大小是相对于特征层的
+        #-------------------------------------------------#
         scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors]
 
-        # 对预测结果进行resize
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 3, 13, 13, 85
+        #   batch_size, 3, 26, 26, 85
+        #   batch_size, 3, 52, 52, 85
+        #-----------------------------------------------#
         prediction = input.view(batch_size, self.num_anchors,
                                 self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
 
@@ -38,37 +62,48 @@ class DecodeBox(nn.Module):
         x = torch.sigmoid(prediction[..., 0])  
         y = torch.sigmoid(prediction[..., 1])
         # 先验框的宽高调整参数
-        w = prediction[..., 2]  # Width
-        h = prediction[..., 3]  # Height
-
+        w = prediction[..., 2]
+        h = prediction[..., 3]
         # 获得置信度，是否有物体
         conf = torch.sigmoid(prediction[..., 4])
         # 种类置信度
-        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+        pred_cls = torch.sigmoid(prediction[..., 5:])
 
         FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
         LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
 
-        # 生成网格，先验框中心，网格左上角 batch_size,3,13,13
+        #----------------------------------------------------------#
+        #   生成网格，先验框中心，网格左上角 
+        #   batch_size,3,13,13
+        #----------------------------------------------------------#
         grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
             batch_size * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
         grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
             batch_size * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
 
-        # 生成先验框的宽高
+        #----------------------------------------------------------#
+        #   按照网格格式生成先验框的宽高
+        #   batch_size,3,13,13
+        #----------------------------------------------------------#
         anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
         anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
         anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
         anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
 
-        # 计算调整后的先验框中心与宽高
+        #----------------------------------------------------------#
+        #   利用预测结果对先验框进行调整
+        #   首先调整先验框的中心，从先验框中心向右下角偏移
+        #   再调整先验框的宽高。
+        #----------------------------------------------------------#
         pred_boxes = FloatTensor(prediction[..., :4].shape)
         pred_boxes[..., 0] = x.data + grid_x
         pred_boxes[..., 1] = y.data + grid_y
         pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
         pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
 
-        # 用于将输出调整为相对于416x416的大小
+        #----------------------------------------------------------#
+        #   将输出结果调整成相对于输入图像大小
+        #----------------------------------------------------------#
         _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor)
         output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale,
                             conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
@@ -139,7 +174,10 @@ def bbox_iou(box1, box2, x1y1x2y2=True):
 
 
 def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
-    # 求左上角和右下角
+    #----------------------------------------------------------#
+    #   将预测结果的格式转换成左上角右下角的格式。
+    #   prediction  [batch_size, num_anchors, 85]
+    #----------------------------------------------------------#
     box_corner = prediction.new(prediction.shape)
     box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
     box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
@@ -149,21 +187,35 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
 
     output = [None for _ in range(len(prediction))]
     for image_i, image_pred in enumerate(prediction):
-        # 获得种类及其置信度
+        #----------------------------------------------------------#
+        #   对种类预测部分取max。
+        #   class_conf  [batch_size, num_anchors, 1]    种类置信度
+        #   class_pred  [batch_size, num_anchors, 1]    种类
+        #----------------------------------------------------------#
         class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
 
-        # 利用置信度进行第一轮筛选
-        conf_mask = (image_pred[:, 4]*class_conf[:, 0] >= conf_thres).squeeze()
+        #----------------------------------------------------------#
+        #   利用置信度进行第一轮筛选
+        #----------------------------------------------------------#
+        conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
 
+        #----------------------------------------------------------#
+        #   根据置信度进行预测结果的筛选
+        #----------------------------------------------------------#
         image_pred = image_pred[conf_mask]
         class_conf = class_conf[conf_mask]
         class_pred = class_pred[conf_mask]
         if not image_pred.size(0):
             continue
-        # 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        #-------------------------------------------------------------------------#
+        #   detections  [batch_size, num_anchors, 7]
+        #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
+        #-------------------------------------------------------------------------#
         detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
 
-        # 获得种类
+        #------------------------------------------#
+        #   获得预测结果中包含的所有种类
+        #------------------------------------------#
         unique_labels = detections[:, -1].cpu().unique()
 
         if prediction.is_cuda:
@@ -171,7 +223,9 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
             detections = detections.cuda()
 
         for c in unique_labels:
-            # 获得某一类初步筛选后全部的预测结果
+            #------------------------------------------#
+            #   获得某一类得分筛选后全部的预测结果
+            #------------------------------------------#
             detections_class = detections[detections[:, -1] == c]
 
             #------------------------------------------#
@@ -179,7 +233,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
             #------------------------------------------#
             keep = nms(
                 detections_class[:, :4],
-                detections_class[:, 4]*detections_class[:, 5],
+                detections_class[:, 4] * detections_class[:, 5],
                 nms_thres
             )
             max_detections = detections_class[keep]
diff --git a/video.py b/video.py
index 3c21ddcc860f4ced365637c1b7a9a4915a159d98..76cb910d5a470a9178e441fd6da3885b39218e17 100644
--- a/video.py
+++ b/video.py
@@ -1,15 +1,23 @@
 #-------------------------------------#
-#       调用摄像头检测
+#   调用摄像头或者视频进行检测
+#   调用摄像头直接运行即可
+#   调用视频可以将cv2.VideoCapture()指定路径
+#   视频的保存并不难，可以百度一下看看
 #-------------------------------------#
-from yolo import YOLO
-from PIL import Image
-import numpy as np
-import cv2
 import time
-yolo = YOLO()
-# 调用摄像头
-capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")
 
+import cv2
+import numpy as np
+from PIL import Image
+
+from yolo import YOLO
+
+yolo = YOLO()
+#-------------------------------------#
+#   调用摄像头
+#   capture=cv2.VideoCapture("1.mp4")
+#-------------------------------------#
+capture=cv2.VideoCapture(0)
 fps = 0.0
 while(True):
     t1 = time.time()
@@ -19,10 +27,8 @@ while(True):
     frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
     # 转变成Image
     frame = Image.fromarray(np.uint8(frame))
-
     # 进行检测
     frame = np.array(yolo.detect_image(frame))
-
     # RGBtoBGR满足opencv显示格式
     frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
 
@@ -32,7 +38,6 @@ while(True):
 
     cv2.imshow("video",frame)
 
-
     c= cv2.waitKey(1) & 0xff 
     if c==27:
         capture.release()
diff --git a/voc_annotation.py b/voc_annotation.py
index aad06573cb4ebb7df461256759d793fe2ad44827..ca0f88d364b7792396625da86c09a928c01158f0 100644
--- a/voc_annotation.py
+++ b/voc_annotation.py
@@ -1,3 +1,8 @@
+#---------------------------------------------#
+#   运行前一定要修改classes
+#   如果生成的2007_train.txt里面没有目标信息
+#   那么就是因为classes没有设定正确
+#---------------------------------------------#
 import xml.etree.ElementTree as ET
 from os import getcwd
 
diff --git a/yolo.py b/yolo.py
index 06b396586cfa7e92df1908352f6d29579df96240..d80840a38a6dbfa36e93687e7e9bc4dce6b71904 100644
--- a/yolo.py
+++ b/yolo.py
@@ -1,22 +1,28 @@
 #-------------------------------------#
 #       创建YOLO类
 #-------------------------------------#
-import cv2
-import numpy as np
 import colorsys
 import os
+
+import cv2
+import numpy as np
 import torch
-import torch.nn as nn
-from nets.yolo3 import YoloBody
 import torch.backends.cudnn as cudnn
-from PIL import Image,ImageFont, ImageDraw
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
+
+from nets.yolo3 import YoloBody
 from utils.config import Config
-from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
+from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         non_max_suppression, yolo_correct_boxes)
+
 
 #--------------------------------------------#
 #   使用自己训练好的模型预测需要修改2个参数
 #   model_path和classes_path都需要修改！
+#   如果出现shape不匹配，一定要注意
+#   训练时的model_path和classes_path参数的修改
 #--------------------------------------------#
 class YOLO(object):
     _defaults = {
@@ -52,14 +58,20 @@ class YOLO(object):
             class_names = f.readlines()
         class_names = [c.strip() for c in class_names]
         return class_names
+
     #---------------------------------------------------#
-    #   获得所有的分类
+    #   生成模型
     #---------------------------------------------------#
     def generate(self):
         self.config["yolo"]["classes"] = len(self.class_names)
+        #---------------------------------------------------#
+        #   建立yolov3模型
+        #---------------------------------------------------#
         self.net = YoloBody(self.config)
 
-        # 加快模型训练的效率
+        #---------------------------------------------------#
+        #   载入yolov3模型的权重
+        #---------------------------------------------------#
         print('Loading weights into state dict...')
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         state_dict = torch.load(self.model_path, map_location=device)
@@ -71,10 +83,12 @@ class YOLO(object):
             self.net = nn.DataParallel(self.net)
             self.net = self.net.cuda()
 
+        #---------------------------------------------------#
+        #   建立三个特征层解码用的工具
+        #---------------------------------------------------#
         self.yolo_decodes = []
         for i in range(3):
-            self.yolo_decodes.append(DecodeBox(self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"],  (self.model_image_size[1], self.model_image_size[0])))
-
+            self.yolo_decodes.append(DecodeBox(self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0])))
 
         print('{} model, anchors, and classes loaded.'.format(self.model_path))
         # 画框设置不同的颜色
@@ -91,44 +105,65 @@ class YOLO(object):
     def detect_image(self, image):
         image_shape = np.array(np.shape(image)[0:2])
 
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
         crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
         photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]
 
-        images = np.asarray(images)
-        images = torch.from_numpy(images)
-        if self.cuda:
-            images = images.cuda()
-        
         with torch.no_grad():
+            images = torch.from_numpy(np.asarray(images))
+            if self.cuda:
+                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
             outputs = self.net(images)
             output_list = []
             for i in range(3):
                 output_list.append(self.yolo_decodes[i](outputs[i]))
+                
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
             output = torch.cat(output_list, 1)
             batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                     conf_thres=self.confidence,
                                                     nms_thres=self.iou)
-        try :
-            batch_detections = batch_detections[0].cpu().numpy()
-        except:
-            return image
-        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-        top_label = np.array(batch_detections[top_index,-1],np.int32)
-        top_bboxes = np.array(batch_detections[top_index,:4])
-        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-
-        # 去掉灰条
-        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                                                    
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try :
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
+
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
 
         font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
 
-        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
+        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
 
         for i, c in enumerate(top_label):
             predicted_class = self.class_names[c]
@@ -150,7 +185,7 @@ class YOLO(object):
             draw = ImageDraw.Draw(image)
             label_size = draw.textsize(label, font)
             label = label.encode('utf-8')
-            print(label)
+            print(label, top, left, bottom, right)
             
             if top - label_size[1] >= 0:
                 text_origin = np.array([left, top - label_size[1]])