diff --git a/VOCdevkit/VOC2007/Annotations/README.md b/VOCdevkit/VOC2007/Annotations/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d73a1916c99aecd41ce3fbebad1da573bc3c5845
--- /dev/null
+++ b/VOCdevkit/VOC2007/Annotations/README.md
@@ -0,0 +1 @@
+存放标签文件
\ No newline at end of file
diff --git a/VOCdevkit/VOC2007/ImageSets/Main/README.md b/VOCdevkit/VOC2007/ImageSets/Main/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2f64921067993322973aba9260091c48c26aa43f
--- /dev/null
+++ b/VOCdevkit/VOC2007/ImageSets/Main/README.md
@@ -0,0 +1 @@
+存放训练索引文件
\ No newline at end of file
diff --git a/VOCdevkit/VOC2007/JPEGImages/README.md b/VOCdevkit/VOC2007/JPEGImages/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..58f32ea1aa6b31fcbbeba5b862e31bfff5ae527c
--- /dev/null
+++ b/VOCdevkit/VOC2007/JPEGImages/README.md
@@ -0,0 +1 @@
+存放图片文件
\ No newline at end of file
diff --git a/VOCdevkit/VOC2007/voc2yolo4.py b/VOCdevkit/VOC2007/voc2yolo4.py
new file mode 100644
index 0000000000000000000000000000000000000000..02458b71c6d285366ce5caafd5c25a3ec55fdf91
--- /dev/null
+++ b/VOCdevkit/VOC2007/voc2yolo4.py
@@ -0,0 +1,44 @@
+import os
+import random 
+ 
+xmlfilepath=r'./VOCdevkit/VOC2007/Annotations'
+saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/"
+ 
+trainval_percent=0
+train_percent=1
+
+temp_xml = os.listdir(xmlfilepath)
+total_xml = []
+for xml in temp_xml:
+    if xml.endswith(".xml"):
+        total_xml.append(xml)
+
+num=len(total_xml)  
+list=range(num)  
+tv=int(num*trainval_percent)  
+tr=int(tv*train_percent)  
+trainval= random.sample(list,tv)  
+train=random.sample(trainval,tr)  
+ 
+print("train and val size",tv)
+print("traub suze",tr)
+ftrainval = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
+ftest = open(os.path.join(saveBasePath,'test.txt'), 'w')  
+ftrain = open(os.path.join(saveBasePath,'train.txt'), 'w')  
+fval = open(os.path.join(saveBasePath,'val.txt'), 'w')  
+ 
+for i  in list:  
+    name=total_xml[i][:-4]+'\n'  
+    if i in trainval:  
+        ftrainval.write(name)  
+        if i in train:  
+            ftrain.write(name)  
+        else:  
+            fval.write(name)  
+    else:  
+        ftest.write(name)  
+  
+ftrainval.close()  
+ftrain.close()  
+fval.close()  
+ftest .close()
diff --git a/ciou_test.py b/ciou_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..20ad2228964050a53f46fa99a4f43706d61fd2a6
--- /dev/null
+++ b/ciou_test.py
@@ -0,0 +1,56 @@
+import torch
+import math
+import numpy as np
+def box_ciou(b1, b2):
+    """
+    输入为：
+    ----------
+    b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+    b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+
+    返回为：
+    -------
+    ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
+    """
+    # 求出预测框左上角右下角
+    b1_xy = b1[..., :2]
+    b1_wh = b1[..., 2:4]
+    b1_wh_half = b1_wh/2.
+    b1_mins = b1_xy - b1_wh_half
+    b1_maxes = b1_xy + b1_wh_half
+    # 求出真实框左上角右下角
+    b2_xy = b2[..., :2]
+    b2_wh = b2[..., 2:4]
+    b2_wh_half = b2_wh/2.
+    b2_mins = b2_xy - b2_wh_half
+    b2_maxes = b2_xy + b2_wh_half
+
+    # 求真实框和预测框所有的iou
+    intersect_mins = torch.max(b1_mins, b2_mins)
+    intersect_maxes = torch.min(b1_maxes, b2_maxes)
+    intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
+    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
+    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
+    union_area = b1_area + b2_area - intersect_area
+    iou = intersect_area / (union_area + 1e-7)
+
+    # 计算中心的差距
+    center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1)
+    # 找到包裹两个框的最小框的左上角和右下角
+    enclose_mins = torch.min(b1_mins, b2_mins)
+    enclose_maxes = torch.max(b1_maxes, b2_maxes)
+    enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
+    # 计算对角线距离
+    enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1)
+    ciou = iou - 1.0 * (center_distance) / (enclose_diagonal + 1e-7)
+
+    v = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0]/b1_wh[..., 1]) - torch.atan(b2_wh[..., 0]/b2_wh[..., 1])), 2)
+    alpha = v / (1.0 - iou + v)
+    ciou = ciou - alpha * v
+    return ciou
+
+box1 = torch.from_numpy(np.array([[25,25,40,40]])).type(torch.FloatTensor)
+box2 = torch.from_numpy(np.array([[25,25,30,40]])).type(torch.FloatTensor)
+
+print(box_ciou(box1,box2))
\ No newline at end of file
diff --git a/get_dr_txt.py b/get_dr_txt.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1136b0db428d1128217ea2aa599d35628f16a8a
--- /dev/null
+++ b/get_dr_txt.py
@@ -0,0 +1,97 @@
+#-------------------------------------#
+#       mAP所需文件计算代码
+#       具体教程请查看Bilibili
+#       Bubbliiiing
+#-------------------------------------#
+import cv2
+import keras
+import numpy as np
+import colorsys
+import os
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+from torch.autograd import Variable
+from yolo import YOLO
+from nets.yolo4 import YoloBody
+from PIL import Image,ImageFont, ImageDraw
+from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
+
+class mAP_Yolo(YOLO):
+    #---------------------------------------------------#
+    #   检测图片
+    #---------------------------------------------------#
+    def detect_image(self,image_id,image):
+        self.confidence = 0.05
+        f = open("./input/detection-results/"+image_id+".txt","w") 
+        image_shape = np.array(np.shape(image)[0:2])
+
+        crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
+        photo = np.array(crop_img,dtype = np.float32)
+        photo /= 255.0
+        photo = np.transpose(photo, (2, 0, 1))
+        photo = photo.astype(np.float32)
+        images = []
+        images.append(photo)
+        images = np.asarray(images)
+
+        with torch.no_grad():
+            images = torch.from_numpy(images)
+            if self.cuda:
+                images = images.cuda()
+                
+            
+        outputs = self.net(images)
+        output_list = []
+        for i in range(3):
+            output_list.append(self.yolo_decodes[i](outputs[i]))
+        output = torch.cat(output_list, 1)
+        batch_detections = non_max_suppression(output, len(self.class_names),
+                                                conf_thres=self.confidence,
+                                                nms_thres=0.3)
+
+        try:
+            batch_detections = batch_detections[0].cpu().numpy()
+        except:
+            return image
+            
+        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
+        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+        top_label = np.array(batch_detections[top_index,-1],np.int32)
+        top_bboxes = np.array(batch_detections[top_index,:4])
+        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+        # 去掉灰条
+        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+
+        for i, c in enumerate(top_label):
+            predicted_class = self.class_names[c]
+            score = str(top_conf[i])
+
+            top, left, bottom, right = boxes[i]
+            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
+
+        f.close()
+        return 
+
+yolo = mAP_Yolo()
+image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split()
+
+if not os.path.exists("./input"):
+    os.makedirs("./input")
+if not os.path.exists("./input/detection-results"):
+    os.makedirs("./input/detection-results")
+if not os.path.exists("./input/images-optional"):
+    os.makedirs("./input/images-optional")
+
+
+for image_id in image_ids:
+    image_path = "./VOCdevkit/VOC2007/JPEGImages/"+image_id+".jpg"
+    image = Image.open(image_path)
+    # 开启后在之后计算mAP可以可视化
+    # image.save("./input/images-optional/"+image_id+".jpg")
+    yolo.detect_image(image_id,image)
+    print(image_id," done!")
+    
+
+print("Conversion completed!")
\ No newline at end of file
diff --git a/get_gt_txt.py b/get_gt_txt.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fd1ddb47b67050704cda0eb0a24f9be8bd67f37
--- /dev/null
+++ b/get_gt_txt.py
@@ -0,0 +1,33 @@
+#-------------------------------------#
+#       mAP所需文件计算代码
+#       具体教程请查看Bilibili
+#       Bubbliiiing
+#-------------------------------------#
+import sys
+import os
+import glob
+import xml.etree.ElementTree as ET
+
+image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split()
+
+if not os.path.exists("./input"):
+    os.makedirs("./input")
+if not os.path.exists("./input/ground-truth"):
+    os.makedirs("./input/ground-truth")
+
+for image_id in image_ids:
+    with open("./input/ground-truth/"+image_id+".txt", "w") as new_f:
+        root = ET.parse("VOCdevkit/VOC2007/Annotations/"+image_id+".xml").getroot()
+        for obj in root.findall('object'):
+            if obj.find('difficult')!=None:
+                difficult = obj.find('difficult').text
+                if int(difficult)==1:
+                    continue
+            obj_name = obj.find('name').text
+            bndbox = obj.find('bndbox')
+            left = bndbox.find('xmin').text
+            top = bndbox.find('ymin').text
+            right = bndbox.find('xmax').text
+            bottom = bndbox.find('ymax').text
+            new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
+print("Conversion completed!")
\ No newline at end of file
diff --git a/get_map.py b/get_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cd70c81d7a5ffc8f15f91785b1f986e0483bdb0
--- /dev/null
+++ b/get_map.py
@@ -0,0 +1,880 @@
+import glob
+import json
+import os
+import shutil
+import operator
+import sys
+import argparse
+import math
+
+import numpy as np
+#----------------------------------------------------#
+#   用于计算mAP
+#   代码克隆自https://github.com/Cartucho/mAP
+#----------------------------------------------------#
+MINOVERLAP = 0.5 # default value (defined in the PASCAL VOC2012 challenge)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-na', '--no-animation', help="no animation is shown.", action="store_true")
+parser.add_argument('-np', '--no-plot', help="no plot is shown.", action="store_true")
+parser.add_argument('-q', '--quiet', help="minimalistic console output.", action="store_true")
+# argparse receiving list of classes to be ignored
+parser.add_argument('-i', '--ignore', nargs='+', type=str, help="ignore a list of classes.")
+# argparse receiving list of classes with specific IoU (e.g., python main.py --set-class-iou person 0.7)
+parser.add_argument('--set-class-iou', nargs='+', type=str, help="set IoU for a specific class.")
+args = parser.parse_args()
+
+'''
+    0,0 ------> x (width)
+     |
+     |  (Left,Top)
+     |      *_________
+     |      |         |
+            |         |
+     y      |_________|
+  (height)            *
+                (Right,Bottom)
+'''
+
+# if there are no classes to ignore then replace None by empty list
+if args.ignore is None:
+    args.ignore = []
+
+specific_iou_flagged = False
+if args.set_class_iou is not None:
+    specific_iou_flagged = True
+
+# make sure that the cwd() is the location of the python script (so that every path makes sense)
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+
+GT_PATH = os.path.join(os.getcwd(), 'input', 'ground-truth')
+DR_PATH = os.path.join(os.getcwd(), 'input', 'detection-results')
+# if there are no images then no animation can be shown
+IMG_PATH = os.path.join(os.getcwd(), 'input', 'images-optional')
+if os.path.exists(IMG_PATH): 
+    for dirpath, dirnames, files in os.walk(IMG_PATH):
+        if not files:
+            # no image files found
+            args.no_animation = True
+else:
+    args.no_animation = True
+
+# try to import OpenCV if the user didn't choose the option --no-animation
+show_animation = False
+if not args.no_animation:
+    try:
+        import cv2
+        show_animation = True
+    except ImportError:
+        print("\"opencv-python\" not found, please install to visualize the results.")
+        args.no_animation = True
+
+# try to import Matplotlib if the user didn't choose the option --no-plot
+draw_plot = False
+if not args.no_plot:
+    try:
+        import matplotlib.pyplot as plt
+        draw_plot = True
+    except ImportError:
+        print("\"matplotlib\" not found, please install it to get the resulting plots.")
+        args.no_plot = True
+
+
+def log_average_miss_rate(precision, fp_cumsum, num_images):
+    """
+        log-average miss rate:
+            Calculated by averaging miss rates at 9 evenly spaced FPPI points
+            between 10e-2 and 10e0, in log-space.
+
+        output:
+                lamr | log-average miss rate
+                mr | miss rate
+                fppi | false positives per image
+
+        references:
+            [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
+               State of the Art." Pattern Analysis and Machine Intelligence, IEEE
+               Transactions on 34.4 (2012): 743 - 761.
+    """
+
+    # if there were no detections of that class
+    if precision.size == 0:
+        lamr = 0
+        mr = 1
+        fppi = 0
+        return lamr, mr, fppi
+
+    fppi = fp_cumsum / float(num_images)
+    mr = (1 - precision)
+
+    fppi_tmp = np.insert(fppi, 0, -1.0)
+    mr_tmp = np.insert(mr, 0, 1.0)
+
+    # Use 9 evenly spaced reference points in log-space
+    ref = np.logspace(-2.0, 0.0, num = 9)
+    for i, ref_i in enumerate(ref):
+        # np.where() will always find at least 1 index, since min(ref) = 0.01 and min(fppi_tmp) = -1.0
+        j = np.where(fppi_tmp <= ref_i)[-1][-1]
+        ref[i] = mr_tmp[j]
+
+    # log(0) is undefined, so we use the np.maximum(1e-10, ref)
+    lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
+
+    return lamr, mr, fppi
+
+"""
+ throw error and exit
+"""
+def error(msg):
+    print(msg)
+    sys.exit(0)
+
+"""
+ check if the number is a float between 0.0 and 1.0
+"""
+def is_float_between_0_and_1(value):
+    try:
+        val = float(value)
+        if val > 0.0 and val < 1.0:
+            return True
+        else:
+            return False
+    except ValueError:
+        return False
+
+"""
+ Calculate the AP given the recall and precision array
+    1st) We compute a version of the measured precision/recall curve with
+         precision monotonically decreasing
+    2nd) We compute the AP as the area under this curve by numerical integration.
+"""
+def voc_ap(rec, prec):
+    """
+    --- Official matlab code VOC2012---
+    mrec=[0 ; rec ; 1];
+    mpre=[0 ; prec ; 0];
+    for i=numel(mpre)-1:-1:1
+            mpre(i)=max(mpre(i),mpre(i+1));
+    end
+    i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    rec.insert(0, 0.0) # insert 0.0 at begining of list
+    rec.append(1.0) # insert 1.0 at end of list
+    mrec = rec[:]
+    prec.insert(0, 0.0) # insert 0.0 at begining of list
+    prec.append(0.0) # insert 0.0 at end of list
+    mpre = prec[:]
+    """
+     This part makes the precision monotonically decreasing
+        (goes from the end to the beginning)
+        matlab: for i=numel(mpre)-1:-1:1
+                    mpre(i)=max(mpre(i),mpre(i+1));
+    """
+    # matlab indexes start in 1 but python in 0, so I have to do:
+    #     range(start=(len(mpre) - 2), end=0, step=-1)
+    # also the python function range excludes the end, resulting in:
+    #     range(start=(len(mpre) - 2), end=-1, step=-1)
+    for i in range(len(mpre)-2, -1, -1):
+        mpre[i] = max(mpre[i], mpre[i+1])
+    """
+     This part creates a list of indexes where the recall changes
+        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    """
+    i_list = []
+    for i in range(1, len(mrec)):
+        if mrec[i] != mrec[i-1]:
+            i_list.append(i) # if it was matlab would be i + 1
+    """
+     The Average Precision (AP) is the area under the curve
+        (numerical integration)
+        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    ap = 0.0
+    for i in i_list:
+        ap += ((mrec[i]-mrec[i-1])*mpre[i])
+    return ap, mrec, mpre
+
+
+"""
+ Convert the lines of a file to a list
+"""
+def file_lines_to_list(path):
+    # open txt file lines to a list
+    with open(path) as f:
+        content = f.readlines()
+    # remove whitespace characters like `\n` at the end of each line
+    content = [x.strip() for x in content]
+    return content
+
+"""
+ Draws text in image
+"""
+def draw_text_in_image(img, text, pos, color, line_width):
+    font = cv2.FONT_HERSHEY_PLAIN
+    fontScale = 1
+    lineType = 1
+    bottomLeftCornerOfText = pos
+    cv2.putText(img, text,
+            bottomLeftCornerOfText,
+            font,
+            fontScale,
+            color,
+            lineType)
+    text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
+    return img, (line_width + text_width)
+
+"""
+ Plot - adjust axes
+"""
+def adjust_axes(r, t, fig, axes):
+    # get text width for re-scaling
+    bb = t.get_window_extent(renderer=r)
+    text_width_inches = bb.width / fig.dpi
+    # get axis width in inches
+    current_fig_width = fig.get_figwidth()
+    new_fig_width = current_fig_width + text_width_inches
+    propotion = new_fig_width / current_fig_width
+    # get axis limit
+    x_lim = axes.get_xlim()
+    axes.set_xlim([x_lim[0], x_lim[1]*propotion])
+
+"""
+ Draw plot using Matplotlib
+"""
+def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
+    # sort the dictionary by decreasing value, into a list of tuples
+    sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
+    # unpacking the list of tuples into two lists
+    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
+    # 
+    if true_p_bar != "":
+        """
+         Special case to draw in:
+            - green -> TP: True Positives (object detected and matches ground-truth)
+            - red -> FP: False Positives (object detected but does not match ground-truth)
+            - orange -> FN: False Negatives (object not detected but present in the ground-truth)
+        """
+        fp_sorted = []
+        tp_sorted = []
+        for key in sorted_keys:
+            fp_sorted.append(dictionary[key] - true_p_bar[key])
+            tp_sorted.append(true_p_bar[key])
+        plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
+        plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
+        # add legend
+        plt.legend(loc='lower right')
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            fp_val = fp_sorted[i]
+            tp_val = tp_sorted[i]
+            fp_str_val = " " + str(fp_val)
+            tp_str_val = fp_str_val + " " + str(tp_val)
+            # trick to paint multicolor with offset:
+            # first paint everything and then repaint the first number
+            t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
+            plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    else:
+        plt.barh(range(n_classes), sorted_values, color=plot_color)
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            str_val = " " + str(val) # add a space before
+            if val < 1.0:
+                str_val = " {0:.2f}".format(val)
+            t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
+            # re-set axes to show number inside the figure
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    # set window title
+    fig.canvas.set_window_title(window_title)
+    # write classes in y axis
+    tick_font_size = 12
+    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
+    """
+     Re-scale height accordingly
+    """
+    init_height = fig.get_figheight()
+    # comput the matrix height in points and inches
+    dpi = fig.dpi
+    height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
+    height_in = height_pt / dpi
+    # compute the required figure height 
+    top_margin = 0.15 # in percentage of the figure height
+    bottom_margin = 0.05 # in percentage of the figure height
+    figure_height = height_in / (1 - top_margin - bottom_margin)
+    # set new height
+    if figure_height > init_height:
+        fig.set_figheight(figure_height)
+
+    # set plot title
+    plt.title(plot_title, fontsize=14)
+    # set axis titles
+    # plt.xlabel('classes')
+    plt.xlabel(x_label, fontsize='large')
+    # adjust size of window
+    fig.tight_layout()
+    # save the plot
+    fig.savefig(output_path)
+    # show image
+    if to_show:
+        plt.show()
+    # close the plot
+    plt.close()
+
+"""
+ Create a ".temp_files/" and "results/" directory
+"""
+TEMP_FILES_PATH = ".temp_files"
+if not os.path.exists(TEMP_FILES_PATH): # if it doesn't exist already
+    os.makedirs(TEMP_FILES_PATH)
+results_files_path = "results"
+if os.path.exists(results_files_path): # if it exist already
+    # reset the results directory
+    shutil.rmtree(results_files_path)
+
+os.makedirs(results_files_path)
+if draw_plot:
+    os.makedirs(os.path.join(results_files_path, "classes"))
+if show_animation:
+    os.makedirs(os.path.join(results_files_path, "images", "detections_one_by_one"))
+
+"""
+ ground-truth
+     Load each of the ground-truth files into a temporary ".json" file.
+     Create a list of all the class names present in the ground-truth (gt_classes).
+"""
+# get a list with the ground-truth files
+ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
+if len(ground_truth_files_list) == 0:
+    error("Error: No ground-truth files found!")
+ground_truth_files_list.sort()
+# dictionary with counter per class
+gt_counter_per_class = {}
+counter_images_per_class = {}
+
+for txt_file in ground_truth_files_list:
+    #print(txt_file)
+    file_id = txt_file.split(".txt", 1)[0]
+    file_id = os.path.basename(os.path.normpath(file_id))
+    # check if there is a correspondent detection-results file
+    temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
+    if not os.path.exists(temp_path):
+        error_msg = "Error. File not found: {}\n".format(temp_path)
+        error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
+        error(error_msg)
+    lines_list = file_lines_to_list(txt_file)
+    # create ground-truth dictionary
+    bounding_boxes = []
+    is_difficult = False
+    already_seen_classes = []
+    for line in lines_list:
+        try:
+            if "difficult" in line:
+                    class_name, left, top, right, bottom, _difficult = line.split()
+                    is_difficult = True
+            else:
+                    class_name, left, top, right, bottom = line.split()
+        except ValueError:
+            error_msg = "Error: File " + txt_file + " in the wrong format.\n"
+            error_msg += " Expected: <class_name> <left> <top> <right> <bottom> ['difficult']\n"
+            error_msg += " Received: " + line
+            error_msg += "\n\nIf you have a <class_name> with spaces between words you should remove them\n"
+            error_msg += "by running the script \"remove_space.py\" or \"rename_class.py\" in the \"extra/\" folder."
+            error(error_msg)
+        # check if class is in the ignore list, if yes skip
+        if class_name in args.ignore:
+            continue
+        bbox = left + " " + top + " " + right + " " +bottom
+        if is_difficult:
+                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
+                is_difficult = False
+        else:
+                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
+                # count that object
+                if class_name in gt_counter_per_class:
+                    gt_counter_per_class[class_name] += 1
+                else:
+                    # if class didn't exist yet
+                    gt_counter_per_class[class_name] = 1
+
+                if class_name not in already_seen_classes:
+                    if class_name in counter_images_per_class:
+                        counter_images_per_class[class_name] += 1
+                    else:
+                        # if class didn't exist yet
+                        counter_images_per_class[class_name] = 1
+                    already_seen_classes.append(class_name)
+
+
+    # dump bounding_boxes into a ".json" file
+    with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
+        json.dump(bounding_boxes, outfile)
+
+gt_classes = list(gt_counter_per_class.keys())
+# let's sort the classes alphabetically
+gt_classes = sorted(gt_classes)
+n_classes = len(gt_classes)
+#print(gt_classes)
+#print(gt_counter_per_class)
+
+"""
+ Check format of the flag --set-class-iou (if used)
+    e.g. check if class exists
+"""
+if specific_iou_flagged:
+    n_args = len(args.set_class_iou)
+    error_msg = \
+        '\n --set-class-iou [class_1] [IoU_1] [class_2] [IoU_2] [...]'
+    if n_args % 2 != 0:
+        error('Error, missing arguments. Flag usage:' + error_msg)
+    # [class_1] [IoU_1] [class_2] [IoU_2]
+    # specific_iou_classes = ['class_1', 'class_2']
+    specific_iou_classes = args.set_class_iou[::2] # even
+    # iou_list = ['IoU_1', 'IoU_2']
+    iou_list = args.set_class_iou[1::2] # odd
+    if len(specific_iou_classes) != len(iou_list):
+        error('Error, missing arguments. Flag usage:' + error_msg)
+    for tmp_class in specific_iou_classes:
+        if tmp_class not in gt_classes:
+                    error('Error, unknown class \"' + tmp_class + '\". Flag usage:' + error_msg)
+    for num in iou_list:
+        if not is_float_between_0_and_1(num):
+            error('Error, IoU must be between 0.0 and 1.0. Flag usage:' + error_msg)
+
+"""
+ detection-results
+     Load each of the detection-results files into a temporary ".json" file.
+"""
+# get a list with the detection-results files
+dr_files_list = glob.glob(DR_PATH + '/*.txt')
+dr_files_list.sort()
+
+for class_index, class_name in enumerate(gt_classes):
+    bounding_boxes = []
+    for txt_file in dr_files_list:
+        #print(txt_file)
+        # the first time it checks if all the corresponding ground-truth files exist
+        file_id = txt_file.split(".txt",1)[0]
+        file_id = os.path.basename(os.path.normpath(file_id))
+        temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
+        if class_index == 0:
+            if not os.path.exists(temp_path):
+                error_msg = "Error. File not found: {}\n".format(temp_path)
+                error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
+                error(error_msg)
+        lines = file_lines_to_list(txt_file)
+        for line in lines:
+            try:
+                tmp_class_name, confidence, left, top, right, bottom = line.split()
+            except ValueError:
+                error_msg = "Error: File " + txt_file + " in the wrong format.\n"
+                error_msg += " Expected: <class_name> <confidence> <left> <top> <right> <bottom>\n"
+                error_msg += " Received: " + line
+                error(error_msg)
+            if tmp_class_name == class_name:
+                #print("match")
+                bbox = left + " " + top + " " + right + " " +bottom
+                bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
+                #print(bounding_boxes)
+    # sort detection-results by decreasing confidence
+    bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
+    with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
+        json.dump(bounding_boxes, outfile)
+
+"""
+ Calculate the AP for each class
+"""
+sum_AP = 0.0
+ap_dictionary = {}
+lamr_dictionary = {}
+# open file to store the results
+with open(results_files_path + "/results.txt", 'w') as results_file:
+    results_file.write("# AP and precision/recall per class\n")
+    count_true_positives = {}
+    for class_index, class_name in enumerate(gt_classes):
+        count_true_positives[class_name] = 0
+        """
+         Load detection-results of that class
+        """
+        dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
+        dr_data = json.load(open(dr_file))
+
+        """
+         Assign detection-results to ground-truth objects
+        """
+        nd = len(dr_data)
+        tp = [0] * nd # creates an array of zeros of size nd
+        fp = [0] * nd
+        for idx, detection in enumerate(dr_data):
+            file_id = detection["file_id"]
+            if show_animation:
+                # find ground truth image
+                ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
+                #tifCounter = len(glob.glob1(myPath,"*.tif"))
+                if len(ground_truth_img) == 0:
+                    error("Error. Image not found with id: " + file_id)
+                elif len(ground_truth_img) > 1:
+                    error("Error. Multiple image with id: " + file_id)
+                else: # found image
+                    #print(IMG_PATH + "/" + ground_truth_img[0])
+                    # Load image
+                    img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
+                    # load image with draws of multiple detections
+                    img_cumulative_path = results_files_path + "/images/" + ground_truth_img[0]
+                    if os.path.isfile(img_cumulative_path):
+                        img_cumulative = cv2.imread(img_cumulative_path)
+                    else:
+                        img_cumulative = img.copy()
+                    # Add bottom border to image
+                    bottom_border = 60
+                    BLACK = [0, 0, 0]
+                    img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
+            # assign detection-results to ground truth object if any
+            # open ground-truth with that file_id
+            gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
+            ground_truth_data = json.load(open(gt_file))
+            ovmax = -1
+            gt_match = -1
+            # load detected object bounding-box
+            bb = [ float(x) for x in detection["bbox"].split() ]
+            for obj in ground_truth_data:
+                # look for a class_name match
+                if obj["class_name"] == class_name:
+                    bbgt = [ float(x) for x in obj["bbox"].split() ]
+                    bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
+                    iw = bi[2] - bi[0] + 1
+                    ih = bi[3] - bi[1] + 1
+                    if iw > 0 and ih > 0:
+                        # compute overlap (IoU) = area of intersection / area of union
+                        ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
+                                        + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
+                        ov = iw * ih / ua
+                        if ov > ovmax:
+                            ovmax = ov
+                            gt_match = obj
+
+            # assign detection as true positive/don't care/false positive
+            if show_animation:
+                status = "NO MATCH FOUND!" # status is only used in the animation
+            # set minimum overlap
+            min_overlap = MINOVERLAP
+            if specific_iou_flagged:
+                if class_name in specific_iou_classes:
+                    index = specific_iou_classes.index(class_name)
+                    min_overlap = float(iou_list[index])
+            if ovmax >= min_overlap:
+                if "difficult" not in gt_match:
+                        if not bool(gt_match["used"]):
+                            # true positive
+                            tp[idx] = 1
+                            gt_match["used"] = True
+                            count_true_positives[class_name] += 1
+                            # update the ".json" file
+                            with open(gt_file, 'w') as f:
+                                    f.write(json.dumps(ground_truth_data))
+                            if show_animation:
+                                status = "MATCH!"
+                        else:
+                            # false positive (multiple detection)
+                            fp[idx] = 1
+                            if show_animation:
+                                status = "REPEATED MATCH!"
+            else:
+                # false positive
+                fp[idx] = 1
+                if ovmax > 0:
+                    status = "INSUFFICIENT OVERLAP"
+
+            """
+             Draw image to show animation
+            """
+            if show_animation:
+                height, widht = img.shape[:2]
+                # colors (OpenCV works with BGR)
+                white = (255,255,255)
+                light_blue = (255,200,100)
+                green = (0,255,0)
+                light_red = (30,30,255)
+                # 1st line
+                margin = 10
+                v_pos = int(height - margin - (bottom_border / 2.0))
+                text = "Image: " + ground_truth_img[0] + " "
+                img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
+                text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
+                img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
+                if ovmax != -1:
+                    color = light_red
+                    if status == "INSUFFICIENT OVERLAP":
+                        text = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
+                    else:
+                        text = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
+                        color = green
+                    img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
+                # 2nd line
+                v_pos += int(bottom_border / 2.0)
+                rank_pos = str(idx+1) # rank position (idx starts at 0)
+                text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
+                img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
+                color = light_red
+                if status == "MATCH!":
+                    color = green
+                text = "Result: " + status + " "
+                img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
+
+                font = cv2.FONT_HERSHEY_SIMPLEX
+                if ovmax > 0: # if there is intersections between the bounding-boxes
+                    bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
+                    cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
+                    cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
+                    cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
+                bb = [int(i) for i in bb]
+                cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
+                cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
+                cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
+                # show image
+                cv2.imshow("Animation", img)
+                cv2.waitKey(20) # show for 20 ms
+                # save image to results
+                output_img_path = results_files_path + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
+                cv2.imwrite(output_img_path, img)
+                # save the image with all the objects drawn to it
+                cv2.imwrite(img_cumulative_path, img_cumulative)
+
+        #print(tp)
+        # compute precision/recall
+        cumsum = 0
+        for idx, val in enumerate(fp):
+            fp[idx] += cumsum
+            cumsum += val
+        cumsum = 0
+        for idx, val in enumerate(tp):
+            tp[idx] += cumsum
+            cumsum += val
+        #print(tp)
+        rec = tp[:]
+        for idx, val in enumerate(tp):
+            rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
+        #print(rec)
+        prec = tp[:]
+        for idx, val in enumerate(tp):
+            prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
+        #print(prec)
+
+        ap, mrec, mprec = voc_ap(rec[:], prec[:])
+        sum_AP += ap
+        text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
+        """
+         Write to results.txt
+        """
+        rounded_prec = [ '%.2f' % elem for elem in prec ]
+        rounded_rec = [ '%.2f' % elem for elem in rec ]
+        results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
+        if not args.quiet:
+            print(text)
+        ap_dictionary[class_name] = ap
+
+        n_images = counter_images_per_class[class_name]
+        lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
+        lamr_dictionary[class_name] = lamr
+
+        """
+         Draw plot
+        """
+        if draw_plot:
+            plt.plot(rec, prec, '-o')
+            # add a new penultimate point to the list (mrec[-2], 0.0)
+            # since the last line segment (and respective area) do not affect the AP value
+            area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
+            area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
+            plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
+            # set window title
+            fig = plt.gcf() # gcf - get current figure
+            fig.canvas.set_window_title('AP ' + class_name)
+            # set plot title
+            plt.title('class: ' + text)
+            #plt.suptitle('This is a somewhat long figure title', fontsize=16)
+            # set axis titles
+            plt.xlabel('Recall')
+            plt.ylabel('Precision')
+            # optional - set axes
+            axes = plt.gca() # gca - get current axes
+            axes.set_xlim([0.0,1.0])
+            axes.set_ylim([0.0,1.05]) # .05 to give some extra space
+            # Alternative option -> wait for button to be pressed
+            #while not plt.waitforbuttonpress(): pass # wait for key display
+            # Alternative option -> normal display
+            #plt.show()
+            # save the plot
+            fig.savefig(results_files_path + "/classes/" + class_name + ".png")
+            plt.cla() # clear axes for next plot
+
+    if show_animation:
+        cv2.destroyAllWindows()
+
+    results_file.write("\n# mAP of all classes\n")
+    mAP = sum_AP / n_classes
+    text = "mAP = {0:.2f}%".format(mAP*100)
+    results_file.write(text + "\n")
+    print(text)
+
+# remove the temp_files directory
+shutil.rmtree(TEMP_FILES_PATH)
+
+"""
+ Count total of detection-results
+"""
+# iterate through all the files
+det_counter_per_class = {}
+for txt_file in dr_files_list:
+    # get lines to list
+    lines_list = file_lines_to_list(txt_file)
+    for line in lines_list:
+        class_name = line.split()[0]
+        # check if class is in the ignore list, if yes skip
+        if class_name in args.ignore:
+            continue
+        # count that object
+        if class_name in det_counter_per_class:
+            det_counter_per_class[class_name] += 1
+        else:
+            # if class didn't exist yet
+            det_counter_per_class[class_name] = 1
+#print(det_counter_per_class)
+dr_classes = list(det_counter_per_class.keys())
+
+
+"""
+ Plot the total number of occurences of each class in the ground-truth
+"""
+if draw_plot:
+    window_title = "ground-truth-info"
+    plot_title = "ground-truth\n"
+    plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
+    x_label = "Number of objects per class"
+    output_path = results_files_path + "/ground-truth-info.png"
+    to_show = False
+    plot_color = 'forestgreen'
+    draw_plot_func(
+        gt_counter_per_class,
+        n_classes,
+        window_title,
+        plot_title,
+        x_label,
+        output_path,
+        to_show,
+        plot_color,
+        '',
+        )
+
+"""
+ Write number of ground-truth objects per class to results.txt
+"""
+with open(results_files_path + "/results.txt", 'a') as results_file:
+    results_file.write("\n# Number of ground-truth objects per class\n")
+    for class_name in sorted(gt_counter_per_class):
+        results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
+
+"""
+ Finish counting true positives
+"""
+for class_name in dr_classes:
+    # if class exists in detection-result but not in ground-truth then there are no true positives in that class
+    if class_name not in gt_classes:
+        count_true_positives[class_name] = 0
+#print(count_true_positives)
+
+"""
+ Plot the total number of occurences of each class in the "detection-results" folder
+"""
+if draw_plot:
+    window_title = "detection-results-info"
+    # Plot title
+    plot_title = "detection-results\n"
+    plot_title += "(" + str(len(dr_files_list)) + " files and "
+    count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
+    plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
+    # end Plot title
+    x_label = "Number of objects per class"
+    output_path = results_files_path + "/detection-results-info.png"
+    to_show = False
+    plot_color = 'forestgreen'
+    true_p_bar = count_true_positives
+    draw_plot_func(
+        det_counter_per_class,
+        len(det_counter_per_class),
+        window_title,
+        plot_title,
+        x_label,
+        output_path,
+        to_show,
+        plot_color,
+        true_p_bar
+        )
+
+"""
+ Write number of detected objects per class to results.txt
+"""
+with open(results_files_path + "/results.txt", 'a') as results_file:
+    results_file.write("\n# Number of detected objects per class\n")
+    for class_name in sorted(dr_classes):
+        n_det = det_counter_per_class[class_name]
+        text = class_name + ": " + str(n_det)
+        text += " (tp:" + str(count_true_positives[class_name]) + ""
+        text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
+        results_file.write(text)
+
+"""
+ Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
+"""
+if draw_plot:
+    window_title = "lamr"
+    plot_title = "log-average miss rate"
+    x_label = "log-average miss rate"
+    output_path = results_files_path + "/lamr.png"
+    to_show = False
+    plot_color = 'royalblue'
+    draw_plot_func(
+        lamr_dictionary,
+        n_classes,
+        window_title,
+        plot_title,
+        x_label,
+        output_path,
+        to_show,
+        plot_color,
+        ""
+        )
+
+"""
+ Draw mAP plot (Show AP's of all classes in decreasing order)
+"""
+if draw_plot:
+    window_title = "mAP"
+    plot_title = "mAP = {0:.2f}%".format(mAP*100)
+    x_label = "Average Precision"
+    output_path = results_files_path + "/mAP.png"
+    to_show = True
+    plot_color = 'royalblue'
+    draw_plot_func(
+        ap_dictionary,
+        n_classes,
+        window_title,
+        plot_title,
+        x_label,
+        output_path,
+        to_show,
+        plot_color,
+        ""
+        )
diff --git a/img/street.jpg b/img/street.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6750d3724956500e32d5bc4a918a57db7df30100
Binary files /dev/null and b/img/street.jpg differ
diff --git a/logs/README.md b/logs/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..984825a33fed43b06f1d8cd1f38bb73699e2ff47
--- /dev/null
+++ b/logs/README.md
@@ -0,0 +1 @@
+用于存放训练好的文件
\ No newline at end of file
diff --git a/model_data/coco_classes.txt b/model_data/coco_classes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ec6eeea2f217dca9788d8a4f9ab032dd05e6beb
--- /dev/null
+++ b/model_data/coco_classes.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/model_data/simhei.ttf b/model_data/simhei.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..5bd4687e7212775e23bea569f08fdd1cd7395dc3
Binary files /dev/null and b/model_data/simhei.ttf differ
diff --git a/model_data/voc_classes.txt b/model_data/voc_classes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4c1b622c19478d847285c7fcde40bdbc6355b90
--- /dev/null
+++ b/model_data/voc_classes.txt
@@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
\ No newline at end of file
diff --git a/model_data/yolo_anchors.txt b/model_data/yolo_anchors.txt
new file mode 100644
index 0000000000000000000000000000000000000000..396f07e146622a8450bd3b7719d43390203eb089
--- /dev/null
+++ b/model_data/yolo_anchors.txt
@@ -0,0 +1 @@
+12, 16,  19, 36,  40, 28,  36, 75,  76, 55,  72, 146,  142, 110,  192, 243,  459, 401
\ No newline at end of file
diff --git a/nets/CSPdarknet.py b/nets/CSPdarknet.py
new file mode 100644
index 0000000000000000000000000000000000000000..584690f1362c60d0605ad583a758145a2ae80c6c
--- /dev/null
+++ b/nets/CSPdarknet.py
@@ -0,0 +1,140 @@
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+import math
+from collections import OrderedDict
+
+#-------------------------------------------------#
+#   MISH激活函数
+#-------------------------------------------------#
+class Mish(nn.Module):
+    def __init__(self):
+        super(Mish, self).__init__()
+
+    def forward(self, x):
+        return x * torch.tanh(F.softplus(x))
+
+#-------------------------------------------------#
+#   卷积块
+#   CONV+BATCHNORM+MISH
+#-------------------------------------------------#
+class BasicConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
+        super(BasicConv, self).__init__()
+
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.activation = Mish()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.activation(x)
+        return x
+
+#---------------------------------------------------#
+#   CSPdarknet的结构块的组成部分
+#   内部堆叠的残差块
+#---------------------------------------------------#
+class Resblock(nn.Module):
+    def __init__(self, channels, hidden_channels=None, residual_activation=nn.Identity()):
+        super(Resblock, self).__init__()
+
+        if hidden_channels is None:
+            hidden_channels = channels
+
+        self.block = nn.Sequential(
+            BasicConv(channels, hidden_channels, 1),
+            BasicConv(hidden_channels, channels, 3)
+        )
+
+    def forward(self, x):
+        return x + self.block(x)
+
+#---------------------------------------------------#
+#   CSPdarknet的结构块
+#   存在一个大残差边
+#   这个大残差边绕过了很多的残差结构
+#---------------------------------------------------#
+class Resblock_body(nn.Module):
+    def __init__(self, in_channels, out_channels, num_blocks, first):
+        super(Resblock_body, self).__init__()
+
+        self.downsample_conv = BasicConv(in_channels, out_channels, 3, stride=2)
+
+        if first:
+            self.split_conv0 = BasicConv(out_channels, out_channels, 1)
+            self.split_conv1 = BasicConv(out_channels, out_channels, 1)  
+            self.blocks_conv = nn.Sequential(
+                Resblock(channels=out_channels, hidden_channels=out_channels//2),
+                BasicConv(out_channels, out_channels, 1)
+            )
+            self.concat_conv = BasicConv(out_channels*2, out_channels, 1)
+        else:
+            self.split_conv0 = BasicConv(out_channels, out_channels//2, 1)
+            self.split_conv1 = BasicConv(out_channels, out_channels//2, 1)
+
+            self.blocks_conv = nn.Sequential(
+                *[Resblock(out_channels//2) for _ in range(num_blocks)],
+                BasicConv(out_channels//2, out_channels//2, 1)
+            )
+            self.concat_conv = BasicConv(out_channels, out_channels, 1)
+
+    def forward(self, x):
+        x = self.downsample_conv(x)
+
+        x0 = self.split_conv0(x)
+
+        x1 = self.split_conv1(x)
+        x1 = self.blocks_conv(x1)
+
+        x = torch.cat([x1, x0], dim=1)
+        x = self.concat_conv(x)
+
+        return x
+
+class CSPDarkNet(nn.Module):
+    def __init__(self, layers):
+        super(CSPDarkNet, self).__init__()
+        self.inplanes = 32
+        self.conv1 = BasicConv(3, self.inplanes, kernel_size=3, stride=1)
+        self.feature_channels = [64, 128, 256, 512, 1024]
+
+        self.stages = nn.ModuleList([
+            Resblock_body(self.inplanes, self.feature_channels[0], layers[0], first=True),
+            Resblock_body(self.feature_channels[0], self.feature_channels[1], layers[1], first=False),
+            Resblock_body(self.feature_channels[1], self.feature_channels[2], layers[2], first=False),
+            Resblock_body(self.feature_channels[2], self.feature_channels[3], layers[3], first=False),
+            Resblock_body(self.feature_channels[3], self.feature_channels[4], layers[4], first=False)
+        ])
+
+        self.num_features = 1
+        # 进行权值初始化
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+
+    def forward(self, x):
+        x = self.conv1(x)
+
+        x = self.stages[0](x)
+        x = self.stages[1](x)
+        out3 = self.stages[2](x)
+        out4 = self.stages[3](out3)
+        out5 = self.stages[4](out4)
+
+        return out3, out4, out5
+
+def darknet53(pretrained, **kwargs):
+    model = CSPDarkNet([1, 2, 8, 8, 4])
+    if pretrained:
+        if isinstance(pretrained, str):
+            model.load_state_dict(torch.load(pretrained))
+        else:
+            raise Exception("darknet request a pretrained path. got [{}]".format(pretrained))
+    return model
diff --git a/nets/yolo4.py b/nets/yolo4.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c8f62cee7c2fcd3ca8331b0186acfcd36f5aa76
--- /dev/null
+++ b/nets/yolo4.py
@@ -0,0 +1,150 @@
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+from nets.CSPdarknet import darknet53
+
+def conv2d(filter_in, filter_out, kernel_size, stride=1):
+    pad = (kernel_size - 1) // 2 if kernel_size else 0
+    return nn.Sequential(OrderedDict([
+        ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=stride, padding=pad, bias=False)),
+        ("bn", nn.BatchNorm2d(filter_out)),
+        ("relu", nn.LeakyReLU(0.1)),
+    ]))
+
+#---------------------------------------------------#
+#   SPP结构，利用不同大小的池化核进行池化
+#   池化后堆叠
+#---------------------------------------------------#
+class SpatialPyramidPooling(nn.Module):
+    def __init__(self, pool_sizes=[5, 9, 13]):
+        super(SpatialPyramidPooling, self).__init__()
+
+        self.maxpools = nn.ModuleList([nn.MaxPool2d(pool_size, 1, pool_size//2) for pool_size in pool_sizes])
+
+    def forward(self, x):
+        features = [maxpool(x) for maxpool in self.maxpools[::-1]]
+        features = torch.cat(features + [x], dim=1)
+
+        return features
+
+#---------------------------------------------------#
+#   卷积 + 上采样
+#---------------------------------------------------#
+class Upsample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(Upsample, self).__init__()
+
+        self.upsample = nn.Sequential(
+            conv2d(in_channels, out_channels, 1),
+            nn.Upsample(scale_factor=2, mode='nearest')
+        )
+
+    def forward(self, x,):
+        x = self.upsample(x)
+        return x
+
+#---------------------------------------------------#
+#   三次卷积块
+#---------------------------------------------------#
+def make_three_conv(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+    )
+    return m
+
+#---------------------------------------------------#
+#   五次卷积块
+#---------------------------------------------------#
+def make_five_conv(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+    )
+    return m
+
+#---------------------------------------------------#
+#   最后获得yolov4的输出
+#---------------------------------------------------#
+def yolo_head(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 3),
+        nn.Conv2d(filters_list[0], filters_list[1], 1),
+    )
+    return m
+
+#---------------------------------------------------#
+#   yolo_body
+#---------------------------------------------------#
+class YoloBody(nn.Module):
+    def __init__(self, num_anchors, num_classes):
+        super(YoloBody, self).__init__()
+        #  backbone
+        self.backbone = darknet53(None)
+
+        self.conv1 = make_three_conv([512,1024],1024)
+        self.SPP = SpatialPyramidPooling()
+        self.conv2 = make_three_conv([512,1024],2048)
+
+        self.upsample1 = Upsample(512,256)
+        self.conv_for_P4 = conv2d(512,256,1)
+        self.make_five_conv1 = make_five_conv([256, 512],512)
+
+        self.upsample2 = Upsample(256,128)
+        self.conv_for_P3 = conv2d(256,128,1)
+        self.make_five_conv2 = make_five_conv([128, 256],256)
+        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
+        # 4+1+num_classes
+        final_out_filter2 = num_anchors * (5 + num_classes)
+        self.yolo_head3 = yolo_head([256, final_out_filter2],128)
+
+        self.down_sample1 = conv2d(128,256,3,stride=2)
+        self.make_five_conv3 = make_five_conv([256, 512],512)
+        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
+        final_out_filter1 =  num_anchors * (5 + num_classes)
+        self.yolo_head2 = yolo_head([512, final_out_filter1],256)
+
+
+        self.down_sample2 = conv2d(256,512,3,stride=2)
+        self.make_five_conv4 = make_five_conv([512, 1024],1024)
+        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
+        final_out_filter0 =  num_anchors * (5 + num_classes)
+        self.yolo_head1 = yolo_head([1024, final_out_filter0],512)
+
+
+    def forward(self, x):
+        #  backbone
+        x2, x1, x0 = self.backbone(x)
+
+        P5 = self.conv1(x0)
+        P5 = self.SPP(P5)
+        P5 = self.conv2(P5)
+
+        P5_upsample = self.upsample1(P5)
+        P4 = self.conv_for_P4(x1)
+        P4 = torch.cat([P4,P5_upsample],axis=1)
+        P4 = self.make_five_conv1(P4)
+
+        P4_upsample = self.upsample2(P4)
+        P3 = self.conv_for_P3(x2)
+        P3 = torch.cat([P3,P4_upsample],axis=1)
+        P3 = self.make_five_conv2(P3)
+
+        P3_downsample = self.down_sample1(P3)
+        P4 = torch.cat([P3_downsample,P4],axis=1)
+        P4 = self.make_five_conv3(P4)
+
+        P4_downsample = self.down_sample2(P4)
+        P5 = torch.cat([P4_downsample,P5],axis=1)
+        P5 = self.make_five_conv4(P5)
+
+        out2 = self.yolo_head3(P3)
+        out1 = self.yolo_head2(P4)
+        out0 = self.yolo_head1(P5)
+
+        return out0, out1, out2
+
diff --git a/nets/yolo_training.py b/nets/yolo_training.py
new file mode 100644
index 0000000000000000000000000000000000000000..04678d7247782c35d63cad021407daeeccd3eda4
--- /dev/null
+++ b/nets/yolo_training.py
@@ -0,0 +1,507 @@
+  
+from random import shuffle
+import numpy as np
+import torch
+import torch.nn as nn
+import math
+import torch.nn.functional as F
+from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
+from PIL import Image
+from utils.utils import bbox_iou, merge_bboxes
+
+#---------------------------------------------------#
+#   平滑标签
+#---------------------------------------------------#
+def smooth_labels(y_true, label_smoothing,num_classes):
+    return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
+
+def box_ciou(b1, b2):
+    """
+    输入为：
+    ----------
+    b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+    b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+
+    返回为：
+    -------
+    ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
+    """
+    # 求出预测框左上角右下角
+    b1_xy = b1[..., :2]
+    b1_wh = b1[..., 2:4]
+    b1_wh_half = b1_wh/2.
+    b1_mins = b1_xy - b1_wh_half
+    b1_maxes = b1_xy + b1_wh_half
+    # 求出真实框左上角右下角
+    b2_xy = b2[..., :2]
+    b2_wh = b2[..., 2:4]
+    b2_wh_half = b2_wh/2.
+    b2_mins = b2_xy - b2_wh_half
+    b2_maxes = b2_xy + b2_wh_half
+
+    # 求真实框和预测框所有的iou
+    intersect_mins = torch.max(b1_mins, b2_mins)
+    intersect_maxes = torch.min(b1_maxes, b2_maxes)
+    intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
+    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
+    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
+    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
+    union_area = b1_area + b2_area - intersect_area
+    iou = intersect_area / (union_area + 1e-6)
+
+    # 计算中心的差距
+    center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1)
+    
+    # 找到包裹两个框的最小框的左上角和右下角
+    enclose_mins = torch.min(b1_mins, b2_mins)
+    enclose_maxes = torch.max(b1_maxes, b2_maxes)
+    enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
+    # 计算对角线距离
+    enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1)
+    ciou = iou - 1.0 * (center_distance) / (enclose_diagonal + 1e-7)
+    
+    v = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0]/b1_wh[..., 1]) - torch.atan(b2_wh[..., 0]/b2_wh[..., 1])), 2)
+    alpha = v / (1.0 - iou + v)
+    ciou = ciou - alpha * v
+    return ciou
+
+def clip_by_tensor(t,t_min,t_max):
+    t=t.float()
+    result = (t >= t_min).float() * t + (t < t_min).float() * t_min
+    result = (result <= t_max).float() * result + (result > t_max).float() * t_max
+    return result
+
+def MSELoss(pred,target):
+    return (pred-target)**2
+
+def BCELoss(pred,target):
+    epsilon = 1e-7
+    pred = clip_by_tensor(pred, epsilon, 1.0 - epsilon)
+    output = -target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
+    return output
+
+class YOLOLoss(nn.Module):
+    def __init__(self, anchors, num_classes, img_size, label_smooth=0, cuda=True):
+        super(YOLOLoss, self).__init__()
+        self.anchors = anchors
+        self.num_anchors = len(anchors)
+        self.num_classes = num_classes
+        self.bbox_attrs = 5 + num_classes
+        self.img_size = img_size
+        self.feature_length = [img_size[0]//32,img_size[0]//16,img_size[0]//8]
+        self.label_smooth = label_smooth
+
+        self.ignore_threshold = 0.5
+        self.lambda_conf = 1.0
+        self.lambda_cls = 1.0
+        self.lambda_loc = 1.0
+        self.cuda = cuda
+
+    def forward(self, input, targets=None):
+        # input为bs,3*(5+num_classes),13,13
+        
+        # 一共多少张图片
+        bs = input.size(0)
+        # 特征层的高
+        in_h = input.size(2)
+        # 特征层的宽
+        in_w = input.size(3)
+
+        # 计算步长
+        # 每一个特征点对应原来的图片上多少个像素点
+        # 如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
+        stride_h = self.img_size[1] / in_h
+        stride_w = self.img_size[0] / in_w
+
+        # 把先验框的尺寸调整成特征层大小的形式
+        # 计算出先验框在特征层上对应的宽高
+        scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
+        # bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
+        prediction = input.view(bs, int(self.num_anchors/3),
+                                self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
+        
+        # 对prediction预测进行调整
+        conf = torch.sigmoid(prediction[..., 4])  # Conf
+        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+
+        # 找到哪些先验框内部包含物体
+        mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y = self.get_target(targets, scaled_anchors,in_w, in_h,self.ignore_threshold)
+
+        noobj_mask, pred_boxes_for_ciou = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask)
+
+        if self.cuda:
+            mask, noobj_mask = mask.cuda(), noobj_mask.cuda()
+            box_loss_scale_x, box_loss_scale_y= box_loss_scale_x.cuda(), box_loss_scale_y.cuda()
+            tconf, tcls = tconf.cuda(), tcls.cuda()
+            pred_boxes_for_ciou = pred_boxes_for_ciou.cuda()
+            t_box = t_box.cuda()
+
+        box_loss_scale = 2-box_loss_scale_x*box_loss_scale_y
+        #  losses.
+        ciou = (1 - box_ciou( pred_boxes_for_ciou[mask.bool()], t_box[mask.bool()]))* box_loss_scale[mask.bool()]
+
+        loss_loc = torch.sum(ciou / bs)
+        loss_conf = torch.sum(BCELoss(conf, mask) * mask / bs) + \
+                    torch.sum(BCELoss(conf, mask) * noobj_mask / bs)
+                    
+        # print(smooth_labels(tcls[mask == 1],self.label_smooth,self.num_classes))
+        loss_cls = torch.sum(BCELoss(pred_cls[mask == 1], smooth_labels(tcls[mask == 1],self.label_smooth,self.num_classes))/bs)
+        # print(loss_loc,loss_conf,loss_cls)
+        loss = loss_conf * self.lambda_conf + loss_cls * self.lambda_cls + loss_loc * self.lambda_loc
+        return loss, loss_conf.item(), loss_cls.item(), loss_loc.item()
+
+    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
+        # 计算一共有多少张图片
+        bs = len(target)
+        # 获得先验框
+        anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
+        subtract_index = [0,3,6][self.feature_length.index(in_w)]
+        # 创建全是0或者全是1的阵列
+        mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+
+        tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        t_box = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, 4, requires_grad=False)
+        tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False)
+
+        box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
+        for b in range(bs):
+            for t in range(target[b].shape[0]):
+                # 计算出在特征层上的点位
+                gx = target[b][t, 0] * in_w
+                gy = target[b][t, 1] * in_h
+                
+                gw = target[b][t, 2] * in_w
+                gh = target[b][t, 3] * in_h
+
+                # 计算出属于哪个网格
+                gi = int(gx)
+                gj = int(gy)
+
+                # 计算真实框的位置
+                gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
+                
+                # 计算出所有先验框的位置
+                anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),
+                                                                  np.array(anchors)), 1))
+                # 计算重合程度
+                anch_ious = bbox_iou(gt_box, anchor_shapes)
+               
+                # Find the best matching anchor box
+                best_n = np.argmax(anch_ious)
+                if best_n not in anchor_index:
+                    continue
+                # Masks
+                if (gj < in_h) and (gi < in_w):
+                    best_n = best_n - subtract_index
+                    # 判定哪些先验框内部真实的存在物体
+                    noobj_mask[b, best_n, gj, gi] = 0
+                    mask[b, best_n, gj, gi] = 1
+                    # 计算先验框中心调整参数
+                    tx[b, best_n, gj, gi] = gx
+                    ty[b, best_n, gj, gi] = gy
+                    # 计算先验框宽高调整参数
+                    tw[b, best_n, gj, gi] = gw
+                    th[b, best_n, gj, gi] = gh
+                    # 用于获得xywh的比例
+                    box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2]
+                    box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3]
+                    # 物体置信度
+                    tconf[b, best_n, gj, gi] = 1
+                    # 种类
+                    tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1
+                else:
+                    print('Step {0} out of bound'.format(b))
+                    print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w))
+                    continue
+        t_box[...,0] = tx
+        t_box[...,1] = ty
+        t_box[...,2] = tw
+        t_box[...,3] = th
+        return mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y
+
+    def get_ignore(self,prediction,target,scaled_anchors,in_w, in_h,noobj_mask):
+        bs = len(target)
+        anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
+        scaled_anchors = np.array(scaled_anchors)[anchor_index]
+        # 先验框的中心位置的调整参数
+        x = torch.sigmoid(prediction[..., 0])  
+        y = torch.sigmoid(prediction[..., 1])
+        # 先验框的宽高调整参数
+        w = prediction[..., 2]  # Width
+        h = prediction[..., 3]  # Height
+
+        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+
+        # 生成网格，先验框中心，网格左上角
+        grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_w, 1).repeat(
+            int(bs*self.num_anchors/3), 1, 1).view(x.shape).type(FloatTensor)
+        grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_h, 1).t().repeat(
+            int(bs*self.num_anchors/3), 1, 1).view(y.shape).type(FloatTensor)
+
+        # 生成先验框的宽高
+        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
+        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
+        
+        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
+        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
+        
+        # 计算调整后的先验框中心与宽高
+        pred_boxes = FloatTensor(prediction[..., :4].shape)
+        pred_boxes[..., 0] = x + grid_x
+        pred_boxes[..., 1] = y + grid_y
+        pred_boxes[..., 2] = torch.exp(w) * anchor_w
+        pred_boxes[..., 3] = torch.exp(h) * anchor_h
+        for i in range(bs):
+            pred_boxes_for_ignore = pred_boxes[i]
+            pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4)
+
+            for t in range(target[i].shape[0]):
+                gx = target[i][t, 0] * in_w
+                gy = target[i][t, 1] * in_h
+                gw = target[i][t, 2] * in_w
+                gh = target[i][t, 3] * in_h
+                gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0).type(FloatTensor)
+
+                anch_ious = bbox_iou(gt_box, pred_boxes_for_ignore, x1y1x2y2=False)
+                anch_ious = anch_ious.view(pred_boxes[i].size()[:3])
+                noobj_mask[i][anch_ious>self.ignore_threshold] = 0
+        return noobj_mask, pred_boxes
+
+
+def rand(a=0, b=1):
+    return np.random.rand()*(b-a) + a
+
+
+class Generator(object):
+    def __init__(self,batch_size,
+                 train_lines, image_size,
+                 ):
+        
+        self.batch_size = batch_size
+        self.train_lines = train_lines
+        self.train_batches = len(train_lines)
+        self.image_size = image_size
+        
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
+        '''r实时数据增强的随机预处理'''
+        line = annotation_line.split()
+        image = Image.open(line[0])
+        iw, ih = image.size
+        h, w = input_shape
+        box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
+
+        # resize image
+        new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
+        scale = rand(.25, 2)
+        if new_ar < 1:
+            nh = int(scale*h)
+            nw = int(nh*new_ar)
+        else:
+            nw = int(scale*w)
+            nh = int(nw/new_ar)
+        image = image.resize((nw,nh), Image.BICUBIC)
+
+        # place image
+        dx = int(rand(0, w-nw))
+        dy = int(rand(0, h-nh))
+        new_image = Image.new('RGB', (w,h), (128,128,128))
+        new_image.paste(image, (dx, dy))
+        image = new_image
+
+        # flip image or not
+        flip = rand()<.5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
+
+        # distort image
+        hue = rand(-hue, hue)
+        sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
+        val = rand(1, val) if rand()<.5 else 1/rand(1, val)
+        x = rgb_to_hsv(np.array(image)/255.)
+        x[..., 0] += hue
+        x[..., 0][x[..., 0]>1] -= 1
+        x[..., 0][x[..., 0]<0] += 1
+        x[..., 1] *= sat
+        x[..., 2] *= val
+        x[x>1] = 1
+        x[x<0] = 0
+        image_data = hsv_to_rgb(x)*255 # numpy array, 0 to 1
+
+        # correct boxes
+        box_data = np.zeros((len(box),5))
+        if len(box)>0:
+            np.random.shuffle(box)
+            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+            if flip: box[:, [0,2]] = w - box[:, [2,0]]
+            box[:, 0:2][box[:, 0:2]<0] = 0
+            box[:, 2][box[:, 2]>w] = w
+            box[:, 3][box[:, 3]>h] = h
+            box_w = box[:, 2] - box[:, 0]
+            box_h = box[:, 3] - box[:, 1]
+            box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
+            box_data = np.zeros((len(box),5))
+            box_data[:len(box)] = box
+        if len(box) == 0:
+            return image_data, []
+
+        if (box_data[:,:4]>0).any():
+            return image_data, box_data
+        else:
+            return image_data, []
+
+    def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):
+        '''random preprocessing for real-time data augmentation'''
+        h, w = input_shape
+        min_offset_x = 0.4
+        min_offset_y = 0.4
+        scale_low = 1-min(min_offset_x,min_offset_y)
+        scale_high = scale_low+0.2
+
+        image_datas = [] 
+        box_datas = []
+        index = 0
+
+        place_x = [0,0,int(w*min_offset_x),int(w*min_offset_x)]
+        place_y = [0,int(h*min_offset_y),int(w*min_offset_y),0]
+        for line in annotation_line:
+            # 每一行进行分割
+            line_content = line.split()
+            # 打开图片
+            image = Image.open(line_content[0])
+            image = image.convert("RGB") 
+            # 图片的大小
+            iw, ih = image.size
+            # 保存框的位置
+            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
+            
+            # 是否翻转图片
+            flip = rand()<.5
+            if flip and len(box)>0:
+                image = image.transpose(Image.FLIP_LEFT_RIGHT)
+                box[:, [0,2]] = iw - box[:, [2,0]]
+
+            # 对输入进来的图片进行缩放
+            new_ar = w/h
+            scale = rand(scale_low, scale_high)
+            if new_ar < 1:
+                nh = int(scale*h)
+                nw = int(nh*new_ar)
+            else:
+                nw = int(scale*w)
+                nh = int(nw/new_ar)
+            image = image.resize((nw,nh), Image.BICUBIC)
+
+            # 进行色域变换
+            hue = rand(-hue, hue)
+            sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
+            val = rand(1, val) if rand()<.5 else 1/rand(1, val)
+            x = rgb_to_hsv(np.array(image)/255.)
+            x[..., 0] += hue
+            x[..., 0][x[..., 0]>1] -= 1
+            x[..., 0][x[..., 0]<0] += 1
+            x[..., 1] *= sat
+            x[..., 2] *= val
+            x[x>1] = 1
+            x[x<0] = 0
+            image = hsv_to_rgb(x)
+
+            image = Image.fromarray((image*255).astype(np.uint8))
+            # 将图片进行放置，分别对应四张分割图片的位置
+            dx = place_x[index]
+            dy = place_y[index]
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image)/255
+
+            
+            index = index + 1
+            box_data = []
+            # 对box进行重新处理
+            if len(box)>0:
+                np.random.shuffle(box)
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w>1, box_h>1)]
+                box_data = np.zeros((len(box),5))
+                box_data[:len(box)] = box
+            
+            image_datas.append(image_data)
+            box_datas.append(box_data)
+
+        # 将图片分割，放在一起
+        cutx = np.random.randint(int(w*min_offset_x), int(w*(1 - min_offset_x)))
+        cuty = np.random.randint(int(h*min_offset_y), int(h*(1 - min_offset_y)))
+
+        new_image = np.zeros([h,w,3])
+        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
+        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
+        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
+        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
+
+        # 对框进行进一步的处理
+        new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))
+
+        if len(new_boxes) == 0:
+            return new_image, []
+        if (new_boxes[:,:4]>0).any():
+            return new_image, new_boxes
+        else:
+            return new_image, []
+
+    def generate(self, train = True, mosaic = True):
+        while True:
+            shuffle(self.train_lines)
+            lines = self.train_lines
+            inputs = []
+            targets = []
+            flag = True
+            n = len(lines)
+            for i in range(len(lines)):
+                if mosaic == True:
+                    if flag and (i+4) < n:
+                        img,y = self.get_random_data_with_Mosaic(lines[i:i+4], self.image_size[0:2])
+                        i = (i+4) % n
+                    else:
+                        img,y = self.get_random_data(lines[i], self.image_size[0:2])
+                        i = (i+1) % n
+                    flag = bool(1-flag)
+                else:
+                    img,y = self.get_random_data(lines[i], self.image_size[0:2])
+                    i = (i+1) % n
+                if len(y)==0:
+                    continue
+                boxes = np.array(y[:,:4],dtype=np.float32)
+                boxes[:,0] = boxes[:,0]/self.image_size[1]
+                boxes[:,1] = boxes[:,1]/self.image_size[0]
+                boxes[:,2] = boxes[:,2]/self.image_size[1]
+                boxes[:,3] = boxes[:,3]/self.image_size[0]
+
+                boxes = np.maximum(np.minimum(boxes,1),0)
+                boxes[:,2] = boxes[:,2] - boxes[:,0]
+                boxes[:,3] = boxes[:,3] - boxes[:,1]
+  
+                boxes[:,0] = boxes[:,0] + boxes[:,2]/2
+                boxes[:,1] = boxes[:,1] + boxes[:,3]/2
+                y = np.concatenate([boxes,y[:,-1:]],axis=-1)
+                img = np.array(img,dtype = np.float32)
+
+                inputs.append(np.transpose(img/255.0,(2,0,1)))                
+                targets.append(y)
+                if len(targets) == self.batch_size:
+                    tmp_inp = np.array(inputs)
+                    tmp_targets = np.array(targets)
+                    inputs = []
+                    targets = []
+                    yield tmp_inp, tmp_targets
\ No newline at end of file
diff --git a/predict.py b/predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..07c7406a3cab29426d9621d755bca0d0c7c78b50
--- /dev/null
+++ b/predict.py
@@ -0,0 +1,18 @@
+#-------------------------------------#
+#       对单张图片进行预测
+#-------------------------------------#
+from yolo import YOLO
+from PIL import Image
+
+yolo = YOLO()
+
+while True:
+    img = input('Input image filename:')
+    try:
+        image = Image.open(img)
+    except:
+        print('Open Error! Try again!')
+        continue
+    else:
+        r_image = yolo.detect_image(image)
+        r_image.show()
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9c88c16abdb4cdb87901197b706b3aad6578368
--- /dev/null
+++ b/test.py
@@ -0,0 +1,10 @@
+import torch
+from torchsummary import summary
+from nets.CSPdarknet import darknet53
+from nets.yolo4 import YoloBody
+
+if __name__ == "__main__":
+    # 需要使用device来指定网络在GPU还是CPU运行
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = YoloBody(3,20).to(device)
+    summary(model, input_size=(3, 416, 416))
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cabe85eea96c66af6a27ffab6df7f9b4eab21d8
--- /dev/null
+++ b/train.py
@@ -0,0 +1,207 @@
+#-------------------------------------#
+#       对数据集进行训练
+#-------------------------------------#
+import os
+import numpy as np
+import time
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+from nets.yolo_training import YOLOLoss,Generator
+from nets.yolo4 import YoloBody
+
+
+#---------------------------------------------------#
+#   获得类和先验框
+#---------------------------------------------------#
+def get_classes(classes_path):
+    '''loads the classes'''
+    with open(classes_path) as f:
+        class_names = f.readlines()
+    class_names = [c.strip() for c in class_names]
+    return class_names
+
+def get_anchors(anchors_path):
+    '''loads the anchors from a file'''
+    with open(anchors_path) as f:
+        anchors = f.readline()
+    anchors = [float(x) for x in anchors.split(',')]
+    return np.array(anchors).reshape([-1,3,2])[::-1,:,:]
+
+def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
+    total_loss = 0
+    val_loss = 0
+    for iteration in range(epoch_size):
+        start_time = time.time()
+        images, targets = next(gen)
+        with torch.no_grad():
+            if cuda:
+                images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda()
+                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+            else:
+                images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
+                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+        # print(images)
+        optimizer.zero_grad()
+        outputs = net(images)
+        losses = []
+        for i in range(3):
+            loss_item = yolo_losses[i](outputs[i], targets)
+            losses.append(loss_item[0])
+        loss = sum(losses)
+        loss.backward()
+        optimizer.step()
+
+        total_loss += loss
+        waste_time = time.time() - start_time
+        print('\nEpoch:'+ str(epoch+1) + '/' + str(Epoch))
+        print('iter:' + str(iteration) + '/' + str(epoch_size) + ' || Total Loss: %.4f || %.4fs/step' % (total_loss/(iteration+1),waste_time))
+
+    print('Start Validation')
+    for iteration in range(epoch_size_val):
+        images_val, targets_val = next(genval)
+
+        with torch.no_grad():
+            if cuda:
+                images = Variable(torch.from_numpy(images).cuda().type(torch.FloatTensor))
+                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+            else:
+                images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
+                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+            optimizer.zero_grad()
+            outputs = net(images_val)
+            losses = []
+            for i in range(3):
+                loss_item = yolo_losses[i](outputs[i], targets_val)
+                losses.append(loss_item[0])
+            loss = sum(losses)
+            val_loss += loss
+    print('Finish Validation')
+    print('\nEpoch:'+ str(epoch+1) + '/' + str(Epoch))
+    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
+
+    print('Saving state, iter:', str(epoch+1))
+    torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
+
+
+if __name__ == "__main__":
+    #-------------------------------#
+    #   输入的shape大小
+    #   显存比较小可以使用416x416
+    #   显存比较大可以使用608x608
+    #-------------------------------#
+    input_shape = (416,416)
+    #-------------------------------#
+    #   tricks的使用设置
+    #-------------------------------#
+    Cosine_lr = False
+    mosaic = True
+    # 用于设定是否使用cuda
+    Cuda = True
+    smoooth_label = 0
+
+    annotation_path = '2007_train.txt'
+    #-------------------------------#
+    #   获得先验框和类
+    #-------------------------------#
+    anchors_path = 'model_data/yolo_anchors.txt'
+    classes_path = 'model_data/voc_classes.txt'   
+    class_names = get_classes(classes_path)
+    anchors = get_anchors(anchors_path)
+    num_classes = len(class_names)
+    
+    # 创建模型
+    model = YoloBody(len(anchors[0]),num_classes)
+    model_path = "model_data/yolo4_weights.pth"
+    # 加快模型训练的效率
+    print('Loading weights into state dict...')
+    model_dict = model.state_dict()
+    pretrained_dict = torch.load(model_path)
+    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
+    model_dict.update(pretrained_dict)
+    model.load_state_dict(model_dict)
+    print('Finished!')
+
+    net = model.train()
+
+    if Cuda:
+        net = torch.nn.DataParallel(model)
+        cudnn.benchmark = True
+        net = net.cuda()
+
+    # 建立loss函数
+    yolo_losses = []
+    for i in range(3):
+        yolo_losses.append(YOLOLoss(np.reshape(anchors,[-1,2]),num_classes, \
+                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))
+
+    # 0.1用于验证，0.9用于训练
+    val_split = 0.1
+    with open(annotation_path) as f:
+        lines = f.readlines()
+    np.random.seed(10101)
+    np.random.shuffle(lines)
+    np.random.seed(None)
+    num_val = int(len(lines)*val_split)
+    num_train = len(lines) - num_val
+    
+    if True:
+        lr = 1e-3
+        Batch_size = 4
+        Init_Epoch = 0
+        Freeze_Epoch = 25
+        
+        optimizer = optim.Adam(net.parameters(),lr)
+        if Cosine_lr:
+            lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
+        else:
+            lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
+
+        gen = Generator(Batch_size, lines[:num_train],
+                        (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
+        gen_val = Generator(Batch_size, lines[num_train:],
+                        (input_shape[0], input_shape[1])).generate(mosaic = False)
+                        
+        epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
+        epoch_size_val = num_val//Batch_size
+        #------------------------------------#
+        #   冻结一定部分训练
+        #------------------------------------#
+        for param in model.backbone.parameters():
+            param.requires_grad = False
+
+        for epoch in range(Init_Epoch,Freeze_Epoch):
+            fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,gen_val,Freeze_Epoch,Cuda)
+            lr_scheduler.step()
+
+    if True:
+        lr = 1e-4
+        Batch_size = 2
+        Freeze_Epoch = 25
+        Unfreeze_Epoch = 50
+
+        optimizer = optim.Adam(net.parameters(),lr)
+        if Cosine_lr:
+            lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
+        else:
+            lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
+
+        gen = Generator(Batch_size, lines[:num_train],
+                        (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
+        gen_val = Generator(Batch_size, lines[num_train:],
+                        (input_shape[0], input_shape[1])).generate(mosaic = False)
+                        
+        epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
+        epoch_size_val = num_val//Batch_size
+        #------------------------------------#
+        #   解冻后训练
+        #------------------------------------#
+        for param in model.backbone.parameters():
+            param.requires_grad = True
+
+        for epoch in range(Freeze_Epoch,Unfreeze_Epoch):
+            fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,gen_val,Unfreeze_Epoch,Cuda)
+            lr_scheduler.step()
\ No newline at end of file
diff --git a/utils/utils.py b/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a19ef8fd19d26ef6947a4804f6805ce28c20ecac
--- /dev/null
+++ b/utils/utils.py
@@ -0,0 +1,320 @@
+from __future__ import division
+import os
+import math
+import time
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import matplotlib.pyplot as plt
+
+class DecodeBox(nn.Module):
+    def __init__(self, anchors, num_classes, img_size):
+        super(DecodeBox, self).__init__()
+        self.anchors = anchors
+        self.num_anchors = len(anchors)
+        self.num_classes = num_classes
+        self.bbox_attrs = 5 + num_classes
+        self.img_size = img_size
+
+    def forward(self, input):
+        # input为bs,3*(1+4+num_classes),13,13
+
+        # 一共多少张图片
+        batch_size = input.size(0)
+        # 13，13
+        input_height = input.size(2)
+        input_width = input.size(3)
+
+        # 计算步长
+        # 每一个特征点对应原来的图片上多少个像素点
+        # 如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
+        # 416/13 = 32
+        stride_h = self.img_size[1] / input_height
+        stride_w = self.img_size[0] / input_width
+
+        # 把先验框的尺寸调整成特征层大小的形式
+        # 计算出先验框在特征层上对应的宽高
+        scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors]
+
+        # bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes)
+        prediction = input.view(batch_size, self.num_anchors,
+                                self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
+
+        # 先验框的中心位置的调整参数
+        x = torch.sigmoid(prediction[..., 0])  
+        y = torch.sigmoid(prediction[..., 1])
+        # 先验框的宽高调整参数
+        w = prediction[..., 2]  # Width
+        h = prediction[..., 3]  # Height
+
+        # 获得置信度，是否有物体
+        conf = torch.sigmoid(prediction[..., 4])
+        # 种类置信度
+        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+
+        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+
+        # 生成网格，先验框中心，网格左上角 batch_size,3,13,13
+        grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_width, 1).repeat(
+            batch_size * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
+        grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_height, 1).t().repeat(
+            batch_size * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
+
+        # 生成先验框的宽高
+        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
+        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
+        anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
+        anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
+        
+        # 计算调整后的先验框中心与宽高
+        pred_boxes = FloatTensor(prediction[..., :4].shape)
+        pred_boxes[..., 0] = x.data + grid_x
+        pred_boxes[..., 1] = y.data + grid_y
+        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
+        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
+
+        # fig = plt.figure()
+        # ax = fig.add_subplot(121)
+        # if input_height==13:
+        #     plt.ylim(0,13)
+        #     plt.xlim(0,13)
+        # elif input_height==26:
+        #     plt.ylim(0,26)
+        #     plt.xlim(0,26)
+        # elif input_height==52:
+        #     plt.ylim(0,52)
+        #     plt.xlim(0,52)
+        # plt.scatter(grid_x.cpu(),grid_y.cpu())
+
+        # anchor_left = grid_x - anchor_w/2 
+        # anchor_top = grid_y - anchor_h/2 
+
+        # rect1 = plt.Rectangle([anchor_left[0,0,5,5],anchor_top[0,0,5,5]],anchor_w[0,0,5,5],anchor_h[0,0,5,5],color="r",fill=False)
+        # rect2 = plt.Rectangle([anchor_left[0,1,5,5],anchor_top[0,1,5,5]],anchor_w[0,1,5,5],anchor_h[0,1,5,5],color="r",fill=False)
+        # rect3 = plt.Rectangle([anchor_left[0,2,5,5],anchor_top[0,2,5,5]],anchor_w[0,2,5,5],anchor_h[0,2,5,5],color="r",fill=False)
+
+        # ax.add_patch(rect1)
+        # ax.add_patch(rect2)
+        # ax.add_patch(rect3)
+
+        # ax = fig.add_subplot(122)
+        # if input_height==13:
+        #     plt.ylim(0,13)
+        #     plt.xlim(0,13)
+        # elif input_height==26:
+        #     plt.ylim(0,26)
+        #     plt.xlim(0,26)
+        # elif input_height==52:
+        #     plt.ylim(0,52)
+        #     plt.xlim(0,52)
+        # plt.scatter(grid_x.cpu(),grid_y.cpu())
+        # plt.scatter(pred_boxes[0,:,5,5,0].cpu(),pred_boxes[0,:,5,5,1].cpu(),c='r')
+
+        # pre_left = pred_boxes[...,0] - pred_boxes[...,2]/2 
+        # pre_top = pred_boxes[...,1] - pred_boxes[...,3]/2 
+
+        # rect1 = plt.Rectangle([pre_left[0,0,5,5],pre_top[0,0,5,5]],pred_boxes[0,0,5,5,2],pred_boxes[0,0,5,5,3],color="r",fill=False)
+        # rect2 = plt.Rectangle([pre_left[0,1,5,5],pre_top[0,1,5,5]],pred_boxes[0,1,5,5,2],pred_boxes[0,1,5,5,3],color="r",fill=False)
+        # rect3 = plt.Rectangle([pre_left[0,2,5,5],pre_top[0,2,5,5]],pred_boxes[0,2,5,5,2],pred_boxes[0,2,5,5,3],color="r",fill=False)
+
+        # ax.add_patch(rect1)
+        # ax.add_patch(rect2)
+        # ax.add_patch(rect3)
+
+        # plt.show()
+        # 用于将输出调整为相对于416x416的大小
+        _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor)
+        output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale,
+                            conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
+        return output.data
+        
+def letterbox_image(image, size):
+    iw, ih = image.size
+    w, h = size
+    scale = min(w/iw, h/ih)
+    nw = int(iw*scale)
+    nh = int(ih*scale)
+
+    image = image.resize((nw,nh), Image.BICUBIC)
+    new_image = Image.new('RGB', size, (128,128,128))
+    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+    return new_image
+
+def yolo_correct_boxes(top, left, bottom, right, input_shape, image_shape):
+    new_shape = image_shape*np.min(input_shape/image_shape)
+
+    offset = (input_shape-new_shape)/2./input_shape
+    scale = input_shape/new_shape
+
+    box_yx = np.concatenate(((top+bottom)/2,(left+right)/2),axis=-1)/input_shape
+    box_hw = np.concatenate((bottom-top,right-left),axis=-1)/input_shape
+
+    box_yx = (box_yx - offset) * scale
+    box_hw *= scale
+
+    box_mins = box_yx - (box_hw / 2.)
+    box_maxes = box_yx + (box_hw / 2.)
+    boxes =  np.concatenate([
+        box_mins[:, 0:1],
+        box_mins[:, 1:2],
+        box_maxes[:, 0:1],
+        box_maxes[:, 1:2]
+    ],axis=-1)
+    print(np.shape(boxes))
+    boxes *= np.concatenate([image_shape, image_shape],axis=-1)
+    return boxes
+
+def bbox_iou(box1, box2, x1y1x2y2=True):
+    """
+        计算IOU
+    """
+    if not x1y1x2y2:
+        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
+        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
+        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
+        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+    else:
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
+
+    inter_rect_x1 = torch.max(b1_x1, b2_x1)
+    inter_rect_y1 = torch.max(b1_y1, b2_y1)
+    inter_rect_x2 = torch.min(b1_x2, b2_x2)
+    inter_rect_y2 = torch.min(b1_y2, b2_y2)
+
+    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
+                 torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
+                 
+    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
+
+    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
+
+    return iou
+
+
+def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
+    # 求左上角和右下角
+    box_corner = prediction.new(prediction.shape)
+    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
+    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
+    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
+    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
+    prediction[:, :, :4] = box_corner[:, :, :4]
+
+    output = [None for _ in range(len(prediction))]
+    for image_i, image_pred in enumerate(prediction):
+        # 利用置信度进行第一轮筛选
+        conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
+        image_pred = image_pred[conf_mask]
+
+        if not image_pred.size(0):
+            continue
+
+        # 获得种类及其置信度
+        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
+
+        # 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
+
+        # 获得种类
+        unique_labels = detections[:, -1].cpu().unique()
+
+        if prediction.is_cuda:
+            unique_labels = unique_labels.cuda()
+
+        for c in unique_labels:
+            # 获得某一类初步筛选后全部的预测结果
+            detections_class = detections[detections[:, -1] == c]
+            # 按照存在物体的置信度排序
+            _, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
+            detections_class = detections_class[conf_sort_index]
+            # 进行非极大抑制
+            max_detections = []
+            while detections_class.size(0):
+                # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉
+                max_detections.append(detections_class[0].unsqueeze(0))
+                if len(detections_class) == 1:
+                    break
+                ious = bbox_iou(max_detections[-1], detections_class[1:])
+                detections_class = detections_class[1:][ious < nms_thres]
+            # 堆叠
+            max_detections = torch.cat(max_detections).data
+            # Add max detections to outputs
+            output[image_i] = max_detections if output[image_i] is None else torch.cat(
+                (output[image_i], max_detections))
+
+    return output
+
+def merge_bboxes(bboxes, cutx, cuty):
+    merge_bbox = []
+    for i in range(len(bboxes)):
+        for box in bboxes[i]:
+            tmp_box = []
+            x1,y1,x2,y2 = box[0], box[1], box[2], box[3]
+
+            if i == 0:
+                if y1 > cuty or x1 > cutx:
+                    continue
+                if y2 >= cuty and y1 <= cuty:
+                    y2 = cuty
+                    if y2-y1 < 5:
+                        continue
+                if x2 >= cutx and x1 <= cutx:
+                    x2 = cutx
+                    if x2-x1 < 5:
+                        continue
+                
+            if i == 1:
+                if y2 < cuty or x1 > cutx:
+                    continue
+
+                if y2 >= cuty and y1 <= cuty:
+                    y1 = cuty
+                    if y2-y1 < 5:
+                        continue
+                
+                if x2 >= cutx and x1 <= cutx:
+                    x2 = cutx
+                    if x2-x1 < 5:
+                        continue
+
+            if i == 2:
+                if y2 < cuty or x2 < cutx:
+                    continue
+
+                if y2 >= cuty and y1 <= cuty:
+                    y1 = cuty
+                    if y2-y1 < 5:
+                        continue
+
+                if x2 >= cutx and x1 <= cutx:
+                    x1 = cutx
+                    if x2-x1 < 5:
+                        continue
+
+            if i == 3:
+                if y1 > cuty or x2 < cutx:
+                    continue
+
+                if y2 >= cuty and y1 <= cuty:
+                    y2 = cuty
+                    if y2-y1 < 5:
+                        continue
+
+                if x2 >= cutx and x1 <= cutx:
+                    x1 = cutx
+                    if x2-x1 < 5:
+                        continue
+
+            tmp_box.append(x1)
+            tmp_box.append(y1)
+            tmp_box.append(x2)
+            tmp_box.append(y2)
+            tmp_box.append(box[-1])
+            merge_bbox.append(tmp_box)
+    return merge_bbox
\ No newline at end of file
diff --git a/video.py b/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cf4308dd363b0a3579f676ded6910ab4d14b76f
--- /dev/null
+++ b/video.py
@@ -0,0 +1,39 @@
+#-------------------------------------#
+#       调用摄像头检测
+#-------------------------------------#
+from yolo import YOLO
+from PIL import Image
+import numpy as np
+import cv2
+import time
+yolo = YOLO()
+# 调用摄像头
+capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")
+
+fps = 0.0
+while(True):
+    t1 = time.time()
+    # 读取某一帧
+    ref,frame=capture.read()
+    # 格式转变，BGRtoRGB
+    frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
+    # 转变成Image
+    frame = Image.fromarray(np.uint8(frame))
+
+    # 进行检测
+    frame = np.array(yolo.detect_image(frame))
+
+    # RGBtoBGR满足opencv显示格式
+    frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
+
+    fps  = ( fps + (1./(time.time()-t1)) ) / 2
+    print("fps= %.2f"%(fps))
+    frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+
+    cv2.imshow("video",frame)
+
+
+    c= cv2.waitKey(30) & 0xff 
+    if c==27:
+        capture.release()
+        break
diff --git a/voc_annotation.py b/voc_annotation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3243058997cef4a411b7be90d50db4d6f10f7ab9
--- /dev/null
+++ b/voc_annotation.py
@@ -0,0 +1,33 @@
+import xml.etree.ElementTree as ET
+from os import getcwd
+
+sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
+
+wd = getcwd()
+classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+
+def convert_annotation(year, image_id, list_file):
+    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
+    tree=ET.parse(in_file)
+    root = tree.getroot()
+    if root.find('object')==None:
+        return
+    list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
+    for obj in root.iter('object'):
+        difficult = obj.find('difficult').text
+        cls = obj.find('name').text
+        if cls not in classes or int(difficult)==1:
+            continue
+        cls_id = classes.index(cls)
+        xmlbox = obj.find('bndbox')
+        b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
+        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
+
+    list_file.write('\n')
+
+for year, image_set in sets:
+    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
+    list_file = open('%s_%s.txt'%(year, image_set), 'w')
+    for image_id in image_ids:
+        convert_annotation(year, image_id, list_file)
+    list_file.close()
diff --git a/yolo.py b/yolo.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ed9456ab5a32ac682e0ccd0c7617255723ece54
--- /dev/null
+++ b/yolo.py
@@ -0,0 +1,175 @@
+#-------------------------------------#
+#       创建YOLO类
+#-------------------------------------#
+import cv2
+import numpy as np
+import colorsys
+import os
+import torch
+import torch.nn as nn
+from nets.yolo4 import YoloBody
+import torch.backends.cudnn as cudnn
+from PIL import Image,ImageFont, ImageDraw
+from torch.autograd import Variable
+from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
+
+class YOLO(object):
+    _defaults = {
+        "model_path": 'model_data/yolo4_voc_weights.pth',
+        "anchors_path": 'model_data/yolo_anchors.txt',
+        "classes_path": 'model_data/voc_classes.txt',
+        "model_image_size" : (416, 416, 3),
+        "confidence": 0.5,
+        "cuda": True
+    }
+
+    @classmethod
+    def get_defaults(cls, n):
+        if n in cls._defaults:
+            return cls._defaults[n]
+        else:
+            return "Unrecognized attribute name '" + n + "'"
+
+    #---------------------------------------------------#
+    #   初始化YOLO
+    #---------------------------------------------------#
+    def __init__(self, **kwargs):
+        self.__dict__.update(self._defaults)
+        self.class_names = self._get_class()
+        self.anchors = self._get_anchors()
+        self.generate()
+    #---------------------------------------------------#
+    #   获得所有的分类
+    #---------------------------------------------------#
+    def _get_class(self):
+        classes_path = os.path.expanduser(self.classes_path)
+        with open(classes_path) as f:
+            class_names = f.readlines()
+        class_names = [c.strip() for c in class_names]
+        return class_names
+    
+    #---------------------------------------------------#
+    #   获得所有的先验框
+    #---------------------------------------------------#
+    def _get_anchors(self):
+        anchors_path = os.path.expanduser(self.anchors_path)
+        with open(anchors_path) as f:
+            anchors = f.readline()
+        anchors = [float(x) for x in anchors.split(',')]
+        return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]
+
+    #---------------------------------------------------#
+    #   获得所有的分类
+    #---------------------------------------------------#
+    def generate(self):
+        os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+        self.net = YoloBody(len(self.anchors[0]),len(self.class_names)).eval()
+
+        # 加快模型训练的效率
+        print('Loading weights into state dict...')
+        state_dict = torch.load(self.model_path)
+        self.net.load_state_dict(state_dict)
+        self.net = nn.DataParallel(self.net)
+        if self.cuda:
+            self.net = self.net.cuda()
+    
+        print('Finished!')
+
+        self.yolo_decodes = []
+        for i in range(3):
+            self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names),  (self.model_image_size[1], self.model_image_size[0])))
+
+
+        print('{} model, anchors, and classes loaded.'.format(self.model_path))
+        # 画框设置不同的颜色
+        hsv_tuples = [(x / len(self.class_names), 1., 1.)
+                      for x in range(len(self.class_names))]
+        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        self.colors = list(
+            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
+                self.colors))
+
+    #---------------------------------------------------#
+    #   检测图片
+    #---------------------------------------------------#
+    def detect_image(self, image):
+        image_shape = np.array(np.shape(image)[0:2])
+
+        crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
+        photo = np.array(crop_img,dtype = np.float32)
+        photo /= 255.0
+        photo = np.transpose(photo, (2, 0, 1))
+        photo = photo.astype(np.float32)
+        images = []
+        images.append(photo)
+        images = np.asarray(images)
+
+        with torch.no_grad():
+            images = torch.from_numpy(images)
+            if self.cuda:
+                images = images.cuda()
+            outputs = self.net(images)
+            
+        output_list = []
+        for i in range(3):
+            output_list.append(self.yolo_decodes[i](outputs[i]))
+        output = torch.cat(output_list, 1)
+        batch_detections = non_max_suppression(output, len(self.class_names),
+                                                conf_thres=self.confidence,
+                                                nms_thres=0.3)
+        try:
+            batch_detections = batch_detections[0].cpu().numpy()
+        except:
+            return image
+            
+        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
+        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+        top_label = np.array(batch_detections[top_index,-1],np.int32)
+        top_bboxes = np.array(batch_detections[top_index,:4])
+        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+        # 去掉灰条
+        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+
+        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
+
+        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
+
+        for i, c in enumerate(top_label):
+            predicted_class = self.class_names[c]
+            score = top_conf[i]
+
+            top, left, bottom, right = boxes[i]
+            top = top - 5
+            left = left - 5
+            bottom = bottom + 5
+            right = right + 5
+
+            top = max(0, np.floor(top + 0.5).astype('int32'))
+            left = max(0, np.floor(left + 0.5).astype('int32'))
+            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
+            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
+
+            # 画框框
+            label = '{} {:.2f}'.format(predicted_class, score)
+            draw = ImageDraw.Draw(image)
+            label_size = draw.textsize(label, font)
+            label = label.encode('utf-8')
+            print(label)
+            
+            if top - label_size[1] >= 0:
+                text_origin = np.array([left, top - label_size[1]])
+            else:
+                text_origin = np.array([left, top + 1])
+
+            for i in range(thickness):
+                draw.rectangle(
+                    [left + i, top + i, right - i, bottom - i],
+                    outline=self.colors[self.class_names.index(predicted_class)])
+            draw.rectangle(
+                [tuple(text_origin), tuple(text_origin + label_size)],
+                fill=self.colors[self.class_names.index(predicted_class)])
+            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
+            del draw
+        return image
+