diff --git a/get_map.py b/get_map.py
index e4b58092f4c8d5d72a011772add0da4551698a0f..1d892747277458d4ad9f0830adecf206be35045a 100644
--- a/get_map.py
+++ b/get_map.py
@@ -1,901 +1,113 @@
-import glob
-import json
-import os
-import shutil
-import operator
-import sys
-import argparse
-import math
-
-import numpy as np
-
-'''
-用于计算mAP
-代码克隆自https://github.com/Cartucho/mAP
-如果想要设定mAP0.x，比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
-'''
-MINOVERLAP = 0.5
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-na', '--no-animation', help="no animation is shown.", action="store_true")
-parser.add_argument('-np', '--no-plot', help="no plot is shown.", action="store_true")
-parser.add_argument('-q', '--quiet', help="minimalistic console output.", action="store_true")
-parser.add_argument('-i', '--ignore', nargs='+', type=str, help="ignore a list of classes.")
-parser.add_argument('--set-class-iou', nargs='+', type=str, help="set IoU for a specific class.")
-args = parser.parse_args()
-
-'''
-    0,0 ------> x (width)
-     |
-     |  (Left,Top)
-     |      *_________
-     |      |         |
-            |         |
-     y      |_________|
-  (height)            *
-                (Right,Bottom)
-'''
-
-if args.ignore is None:
-    args.ignore = []
-
-specific_iou_flagged = False
-if args.set_class_iou is not None:
-    specific_iou_flagged = True
-
-os.chdir(os.path.dirname(os.path.abspath(__file__)))
-
-GT_PATH = os.path.join(os.getcwd(), 'input', 'ground-truth')
-DR_PATH = os.path.join(os.getcwd(), 'input', 'detection-results')
-IMG_PATH = os.path.join(os.getcwd(), 'input', 'images-optional')
-if os.path.exists(IMG_PATH): 
-    for dirpath, dirnames, files in os.walk(IMG_PATH):
-        if not files:
-            args.no_animation = True
-else:
-    args.no_animation = True
-
-show_animation = False
-if not args.no_animation:
-    try:
-        import cv2
-        show_animation = True
-    except ImportError:
-        print("\"opencv-python\" not found, please install to visualize the results.")
-        args.no_animation = True
-
-draw_plot = False
-if not args.no_plot:
-    try:
-        import matplotlib.pyplot as plt
-        draw_plot = True
-    except ImportError:
-        print("\"matplotlib\" not found, please install it to get the resulting plots.")
-        args.no_plot = True
-
-
-def log_average_miss_rate(precision, fp_cumsum, num_images):
-    """
-        log-average miss rate:
-            Calculated by averaging miss rates at 9 evenly spaced FPPI points
-            between 10e-2 and 10e0, in log-space.
-
-        output:
-                lamr | log-average miss rate
-                mr | miss rate
-                fppi | false positives per image
-
-        references:
-            [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
-               State of the Art." Pattern Analysis and Machine Intelligence, IEEE
-               Transactions on 34.4 (2012): 743 - 761.
-    """
-
-    if precision.size == 0:
-        lamr = 0
-        mr = 1
-        fppi = 0
-        return lamr, mr, fppi
-
-    fppi = fp_cumsum / float(num_images)
-    mr = (1 - precision)
-
-    fppi_tmp = np.insert(fppi, 0, -1.0)
-    mr_tmp = np.insert(mr, 0, 1.0)
-
-    ref = np.logspace(-2.0, 0.0, num = 9)
-    for i, ref_i in enumerate(ref):
-        j = np.where(fppi_tmp <= ref_i)[-1][-1]
-        ref[i] = mr_tmp[j]
-
-    lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
-
-    return lamr, mr, fppi
-
-"""
- throw error and exit
-"""
-def error(msg):
-    print(msg)
-    sys.exit(0)
-
-"""
- check if the number is a float between 0.0 and 1.0
-"""
-def is_float_between_0_and_1(value):
-    try:
-        val = float(value)
-        if val > 0.0 and val < 1.0:
-            return True
-        else:
-            return False
-    except ValueError:
-        return False
-
-"""
- Calculate the AP given the recall and precision array
-    1st) We compute a version of the measured precision/recall curve with
-         precision monotonically decreasing
-    2nd) We compute the AP as the area under this curve by numerical integration.
-"""
-def voc_ap(rec, prec):
-    """
-    --- Official matlab code VOC2012---
-    mrec=[0 ; rec ; 1];
-    mpre=[0 ; prec ; 0];
-    for i=numel(mpre)-1:-1:1
-            mpre(i)=max(mpre(i),mpre(i+1));
-    end
-    i=find(mrec(2:end)~=mrec(1:end-1))+1;
-    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
-    """
-    rec.insert(0, 0.0) # insert 0.0 at begining of list
-    rec.append(1.0) # insert 1.0 at end of list
-    mrec = rec[:]
-    prec.insert(0, 0.0) # insert 0.0 at begining of list
-    prec.append(0.0) # insert 0.0 at end of list
-    mpre = prec[:]
-    """
-     This part makes the precision monotonically decreasing
-        (goes from the end to the beginning)
-        matlab: for i=numel(mpre)-1:-1:1
-                    mpre(i)=max(mpre(i),mpre(i+1));
-    """
-    for i in range(len(mpre)-2, -1, -1):
-        mpre[i] = max(mpre[i], mpre[i+1])
-    """
-     This part creates a list of indexes where the recall changes
-        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
-    """
-    i_list = []
-    for i in range(1, len(mrec)):
-        if mrec[i] != mrec[i-1]:
-            i_list.append(i) # if it was matlab would be i + 1
-    """
-     The Average Precision (AP) is the area under the curve
-        (numerical integration)
-        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
-    """
-    ap = 0.0
-    for i in i_list:
-        ap += ((mrec[i]-mrec[i-1])*mpre[i])
-    return ap, mrec, mpre
-
-
-"""
- Convert the lines of a file to a list
-"""
-def file_lines_to_list(path):
-    # open txt file lines to a list
-    with open(path) as f:
-        content = f.readlines()
-    # remove whitespace characters like `\n` at the end of each line
-    content = [x.strip() for x in content]
-    return content
-
-"""
- Draws text in image
-"""
-def draw_text_in_image(img, text, pos, color, line_width):
-    font = cv2.FONT_HERSHEY_PLAIN
-    fontScale = 1
-    lineType = 1
-    bottomLeftCornerOfText = pos
-    cv2.putText(img, text,
-            bottomLeftCornerOfText,
-            font,
-            fontScale,
-            color,
-            lineType)
-    text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
-    return img, (line_width + text_width)
-
-"""
- Plot - adjust axes
-"""
-def adjust_axes(r, t, fig, axes):
-    # get text width for re-scaling
-    bb = t.get_window_extent(renderer=r)
-    text_width_inches = bb.width / fig.dpi
-    # get axis width in inches
-    current_fig_width = fig.get_figwidth()
-    new_fig_width = current_fig_width + text_width_inches
-    propotion = new_fig_width / current_fig_width
-    # get axis limit
-    x_lim = axes.get_xlim()
-    axes.set_xlim([x_lim[0], x_lim[1]*propotion])
-
-"""
- Draw plot using Matplotlib
-"""
-def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
-    # sort the dictionary by decreasing value, into a list of tuples
-    sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
-    # unpacking the list of tuples into two lists
-    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
-    # 
-    if true_p_bar != "":
-        """
-         Special case to draw in:
-            - green -> TP: True Positives (object detected and matches ground-truth)
-            - red -> FP: False Positives (object detected but does not match ground-truth)
-            - orange -> FN: False Negatives (object not detected but present in the ground-truth)
-        """
-        fp_sorted = []
-        tp_sorted = []
-        for key in sorted_keys:
-            fp_sorted.append(dictionary[key] - true_p_bar[key])
-            tp_sorted.append(true_p_bar[key])
-        plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
-        plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
-        # add legend
-        plt.legend(loc='lower right')
-        """
-         Write number on side of bar
-        """
-        fig = plt.gcf() # gcf - get current figure
-        axes = plt.gca()
-        r = fig.canvas.get_renderer()
-        for i, val in enumerate(sorted_values):
-            fp_val = fp_sorted[i]
-            tp_val = tp_sorted[i]
-            fp_str_val = " " + str(fp_val)
-            tp_str_val = fp_str_val + " " + str(tp_val)
-            # trick to paint multicolor with offset:
-            # first paint everything and then repaint the first number
-            t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
-            plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
-            if i == (len(sorted_values)-1): # largest bar
-                adjust_axes(r, t, fig, axes)
-    else:
-        plt.barh(range(n_classes), sorted_values, color=plot_color)
-        """
-         Write number on side of bar
-        """
-        fig = plt.gcf() # gcf - get current figure
-        axes = plt.gca()
-        r = fig.canvas.get_renderer()
-        for i, val in enumerate(sorted_values):
-            str_val = " " + str(val) # add a space before
-            if val < 1.0:
-                str_val = " {0:.2f}".format(val)
-            t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
-            # re-set axes to show number inside the figure
-            if i == (len(sorted_values)-1): # largest bar
-                adjust_axes(r, t, fig, axes)
-    # set window title
-    fig.canvas.set_window_title(window_title)
-    # write classes in y axis
-    tick_font_size = 12
-    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
-    """
-     Re-scale height accordingly
-    """
-    init_height = fig.get_figheight()
-    # comput the matrix height in points and inches
-    dpi = fig.dpi
-    height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
-    height_in = height_pt / dpi
-    # compute the required figure height 
-    top_margin = 0.15 # in percentage of the figure height
-    bottom_margin = 0.05 # in percentage of the figure height
-    figure_height = height_in / (1 - top_margin - bottom_margin)
-    # set new height
-    if figure_height > init_height:
-        fig.set_figheight(figure_height)
-
-    # set plot title
-    plt.title(plot_title, fontsize=14)
-    # set axis titles
-    # plt.xlabel('classes')
-    plt.xlabel(x_label, fontsize='large')
-    # adjust size of window
-    fig.tight_layout()
-    # save the plot
-    fig.savefig(output_path)
-    # show image
-    if to_show:
-        plt.show()
-    # close the plot
-    plt.close()
-
-"""
- Create a ".temp_files/" and "results/" directory
-"""
-TEMP_FILES_PATH = ".temp_files"
-if not os.path.exists(TEMP_FILES_PATH): # if it doesn't exist already
-    os.makedirs(TEMP_FILES_PATH)
-results_files_path = "results"
-if os.path.exists(results_files_path): # if it exist already
-    # reset the results directory
-    shutil.rmtree(results_files_path)
-
-os.makedirs(results_files_path)
-if draw_plot:
-    os.makedirs(os.path.join(results_files_path, "AP"))
-    os.makedirs(os.path.join(results_files_path, "F1"))
-    os.makedirs(os.path.join(results_files_path, "Recall"))
-    os.makedirs(os.path.join(results_files_path, "Precision"))
-if show_animation:
-    os.makedirs(os.path.join(results_files_path, "images", "detections_one_by_one"))
-
-"""
- ground-truth
-     Load each of the ground-truth files into a temporary ".json" file.
-     Create a list of all the class names present in the ground-truth (gt_classes).
-"""
-# get a list with the ground-truth files
-ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
-if len(ground_truth_files_list) == 0:
-    error("Error: No ground-truth files found!")
-ground_truth_files_list.sort()
-# dictionary with counter per class
-gt_counter_per_class = {}
-counter_images_per_class = {}
-
-for txt_file in ground_truth_files_list:
-    #print(txt_file)
-    file_id = txt_file.split(".txt", 1)[0]
-    file_id = os.path.basename(os.path.normpath(file_id))
-    # check if there is a correspondent detection-results file
-    temp_path = os.path.join(DR_PATH, (file_id + ".txt"))
-    if not os.path.exists(temp_path):
-        error_msg = "Error. File not found: {}\n".format(temp_path)
-        error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
-        error(error_msg)
-    lines_list = file_lines_to_list(txt_file)
-    # create ground-truth dictionary
-    bounding_boxes = []
-    is_difficult = False
-    already_seen_classes = []
-    for line in lines_list:
-        try:
-            if "difficult" in line:
-                class_name, left, top, right, bottom, _difficult = line.split()
-                is_difficult = True
-            else:
-                class_name, left, top, right, bottom = line.split()
-                    
-        except:
-            if "difficult" in line:
-                line_split = line.split()
-                _difficult = line_split[-1]
-                bottom = line_split[-2]
-                right = line_split[-3]
-                top = line_split[-4]
-                left = line_split[-5]
-                class_name = ""
-                for name in line_split[:-5]:
-                    class_name += name + " "
-                class_name = class_name[:-1]
-                is_difficult = True
-            else:
-                line_split = line.split()
-                bottom = line_split[-1]
-                right = line_split[-2]
-                top = line_split[-3]
-                left = line_split[-4]
-                class_name = ""
-                for name in line_split[:-4]:
-                    class_name += name + " "
-                class_name = class_name[:-1]
-        if class_name in args.ignore:
-            continue
-        bbox = left + " " + top + " " + right + " " +bottom
-        if is_difficult:
-                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
-                is_difficult = False
-        else:
-                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
-                if class_name in gt_counter_per_class:
-                    gt_counter_per_class[class_name] += 1
-                else:
-                    gt_counter_per_class[class_name] = 1
-
-                if class_name not in already_seen_classes:
-                    if class_name in counter_images_per_class:
-                        counter_images_per_class[class_name] += 1
-                    else:
-                        counter_images_per_class[class_name] = 1
-                    already_seen_classes.append(class_name)
-
-
-    with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
-        json.dump(bounding_boxes, outfile)
-
-gt_classes = list(gt_counter_per_class.keys())
-gt_classes = sorted(gt_classes)
-n_classes = len(gt_classes)
-
-"""
- Check format of the flag --set-class-iou (if used)
-    e.g. check if class exists
-"""
-if specific_iou_flagged:
-    n_args = len(args.set_class_iou)
-    error_msg = \
-        '\n --set-class-iou [class_1] [IoU_1] [class_2] [IoU_2] [...]'
-    if n_args % 2 != 0:
-        error('Error, missing arguments. Flag usage:' + error_msg)
-    # [class_1] [IoU_1] [class_2] [IoU_2]
-    # specific_iou_classes = ['class_1', 'class_2']
-    specific_iou_classes = args.set_class_iou[::2] # even
-    # iou_list = ['IoU_1', 'IoU_2']
-    iou_list = args.set_class_iou[1::2] # odd
-    if len(specific_iou_classes) != len(iou_list):
-        error('Error, missing arguments. Flag usage:' + error_msg)
-    for tmp_class in specific_iou_classes:
-        if tmp_class not in gt_classes:
-                    error('Error, unknown class \"' + tmp_class + '\". Flag usage:' + error_msg)
-    for num in iou_list:
-        if not is_float_between_0_and_1(num):
-            error('Error, IoU must be between 0.0 and 1.0. Flag usage:' + error_msg)
-
-"""
- detection-results
-     Load each of the detection-results files into a temporary ".json" file.
-"""
-dr_files_list = glob.glob(DR_PATH + '/*.txt')
-dr_files_list.sort()
-
-for class_index, class_name in enumerate(gt_classes):
-    bounding_boxes = []
-    for txt_file in dr_files_list:
-        file_id = txt_file.split(".txt",1)[0]
-        file_id = os.path.basename(os.path.normpath(file_id))
-        temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
-        if class_index == 0:
-            if not os.path.exists(temp_path):
-                error_msg = "Error. File not found: {}\n".format(temp_path)
-                error_msg += "(You can avoid this error message by running extra/intersect-gt-and-dr.py)"
-                error(error_msg)
-        lines = file_lines_to_list(txt_file)
-        for line in lines:
-            try:
-                tmp_class_name, confidence, left, top, right, bottom = line.split()
-            except:
-                line_split = line.split()
-                bottom = line_split[-1]
-                right = line_split[-2]
-                top = line_split[-3]
-                left = line_split[-4]
-                confidence = line_split[-5]
-                tmp_class_name = ""
-                for name in line_split[:-5]:
-                    tmp_class_name += name + " "
-                tmp_class_name = tmp_class_name[:-1]
-
-            if tmp_class_name == class_name:
-                bbox = left + " " + top + " " + right + " " +bottom
-                bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
-
-    bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
-    with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
-        json.dump(bounding_boxes, outfile)
-
-"""
- Calculate the AP for each class
-"""
-sum_AP = 0.0
-ap_dictionary = {}
-lamr_dictionary = {}
-with open(results_files_path + "/results.txt", 'w') as results_file:
-    results_file.write("# AP and precision/recall per class\n")
-    count_true_positives = {}
-
-    for class_index, class_name in enumerate(gt_classes):
-        count_true_positives[class_name] = 0
-        """
-         Load detection-results of that class
-        """
-        dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
-        dr_data = json.load(open(dr_file))
-        """
-         Assign detection-results to ground-truth objects
-        """
-        nd = len(dr_data)
-        tp = [0] * nd
-        fp = [0] * nd
-        score = [0] * nd
-        score05_idx = 0
-        for idx, detection in enumerate(dr_data):
-            file_id = detection["file_id"]
-            score[idx]   = float(detection["confidence"])
-            if score[idx] > 0.5:
-                score05_idx = idx
-
-            if show_animation:
-                ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
-                if len(ground_truth_img) == 0:
-                    error("Error. Image not found with id: " + file_id)
-                elif len(ground_truth_img) > 1:
-                    error("Error. Multiple image with id: " + file_id)
-                else:
-                    img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
-                    img_cumulative_path = results_files_path + "/images/" + ground_truth_img[0]
-                    if os.path.isfile(img_cumulative_path):
-                        img_cumulative = cv2.imread(img_cumulative_path)
-                    else:
-                        img_cumulative = img.copy()
-                    bottom_border = 60
-                    BLACK = [0, 0, 0]
-                    img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
-
-            gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
-            ground_truth_data = json.load(open(gt_file))
-            ovmax = -1
-            gt_match = -1
-            bb = [ float(x) for x in detection["bbox"].split() ]
-            for obj in ground_truth_data:
-                if obj["class_name"] == class_name:
-                    bbgt = [ float(x) for x in obj["bbox"].split() ]
-                    bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
-                    iw = bi[2] - bi[0] + 1
-                    ih = bi[3] - bi[1] + 1
-                    if iw > 0 and ih > 0:
-                        # compute overlap (IoU) = area of intersection / area of union
-                        ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
-                                        + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
-                        ov = iw * ih / ua
-                        if ov > ovmax:
-                            ovmax = ov
-                            gt_match = obj
-
-            if show_animation:
-                status = "NO MATCH FOUND!" 
-            min_overlap = MINOVERLAP
-            if specific_iou_flagged:
-                if class_name in specific_iou_classes:
-                    index = specific_iou_classes.index(class_name)
-                    min_overlap = float(iou_list[index])
-            if ovmax >= min_overlap:
-                if "difficult" not in gt_match:
-                    if not bool(gt_match["used"]):
-                        tp[idx] = 1
-                        gt_match["used"] = True
-                        count_true_positives[class_name] += 1
-                        with open(gt_file, 'w') as f:
-                                f.write(json.dumps(ground_truth_data))
-                        if show_animation:
-                            status = "MATCH!"
-                    else:
-                        fp[idx] = 1
-                        if show_animation:
-                            status = "REPEATED MATCH!"
-            else:
-                fp[idx] = 1
-                if ovmax > 0:
-                    status = "INSUFFICIENT OVERLAP"
-
-            """
-             Draw image to show animation
-            """
-            if show_animation:
-                height, widht = img.shape[:2]
-                # colors (OpenCV works with BGR)
-                white = (255,255,255)
-                light_blue = (255,200,100)
-                green = (0,255,0)
-                light_red = (30,30,255)
-                # 1st line
-                margin = 10
-                v_pos = int(height - margin - (bottom_border / 2.0))
-                text = "Image: " + ground_truth_img[0] + " "
-                img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
-                text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
-                img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
-                if ovmax != -1:
-                    color = light_red
-                    if status == "INSUFFICIENT OVERLAP":
-                        text = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
-                    else:
-                        text = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
-                        color = green
-                    img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
-                # 2nd line
-                v_pos += int(bottom_border / 2.0)
-                rank_pos = str(idx+1) # rank position (idx starts at 0)
-                text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
-                img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
-                color = light_red
-                if status == "MATCH!":
-                    color = green
-                text = "Result: " + status + " "
-                img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
-
-                font = cv2.FONT_HERSHEY_SIMPLEX
-                if ovmax > 0: # if there is intersections between the bounding-boxes
-                    bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
-                    cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
-                    cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
-                    cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
-                bb = [int(i) for i in bb]
-                cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
-                cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
-                cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
-                # show image
-                cv2.imshow("Animation", img)
-                cv2.waitKey(20) # show for 20 ms
-                # save image to results
-                output_img_path = results_files_path + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
-                cv2.imwrite(output_img_path, img)
-                # save the image with all the objects drawn to it
-                cv2.imwrite(img_cumulative_path, img_cumulative)
-
-        cumsum = 0
-        for idx, val in enumerate(fp):
-            fp[idx] += cumsum
-            cumsum += val
-
-        cumsum = 0
-        for idx, val in enumerate(tp):
-            tp[idx] += cumsum
-            cumsum += val
-
-        rec = tp[:]
-        for idx, val in enumerate(tp):
-            rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
-
-        prec = tp[:]
-        for idx, val in enumerate(tp):
-            prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
-
-        ap, mrec, mprec = voc_ap(rec[:], prec[:])
-        F1 = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
-
-        sum_AP += ap
-        text = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
-
-        if len(prec)>0:
-            F1_text = "{0:.2f}".format(F1[score05_idx]) + " = " + class_name + " F1 "
-            Recall_text = "{0:.2f}%".format(rec[score05_idx]*100) + " = " + class_name + " Recall "
-            Precision_text = "{0:.2f}%".format(prec[score05_idx]*100) + " = " + class_name + " Precision "
-        else:
-            F1_text = "0.00" + " = " + class_name + " F1 " 
-            Recall_text = "0.00%" + " = " + class_name + " Recall " 
-            Precision_text = "0.00%" + " = " + class_name + " Precision " 
-
-        rounded_prec = [ '%.2f' % elem for elem in prec ]
-        rounded_rec = [ '%.2f' % elem for elem in rec ]
-        results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
-        if not args.quiet:
-            if len(prec)>0:
-                print(text + "\t||\tscore_threhold=0.5 : " + "F1=" + "{0:.2f}".format(F1[score05_idx])\
-                    + " ; Recall=" + "{0:.2f}%".format(rec[score05_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score05_idx]*100))
-            else:
-                print(text + "\t||\tscore_threhold=0.5 : F1=0.00% ; Recall=0.00% ; Precision=0.00%")
-        ap_dictionary[class_name] = ap
-
-        n_images = counter_images_per_class[class_name]
-        lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
-        lamr_dictionary[class_name] = lamr
-
-        """
-         Draw plot
-        """
-        if draw_plot:
-            plt.plot(rec, prec, '-o')
-            area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
-            area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
-            plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
-
-            fig = plt.gcf()
-            fig.canvas.set_window_title('AP ' + class_name)
-
-            plt.title('class: ' + text)
-            plt.xlabel('Recall')
-            plt.ylabel('Precision')
-            axes = plt.gca()
-            axes.set_xlim([0.0,1.0])
-            axes.set_ylim([0.0,1.05]) 
-            fig.savefig(results_files_path + "/AP/" + class_name + ".png")
-            plt.cla()
-
-            plt.plot(score, F1, "-", color='orangered')
-            plt.title('class: ' + F1_text + "\nscore_threhold=0.5")
-            plt.xlabel('Score_Threhold')
-            plt.ylabel('F1')
-            axes = plt.gca()
-            axes.set_xlim([0.0,1.0])
-            axes.set_ylim([0.0,1.05])
-            fig.savefig(results_files_path + "/F1/" + class_name + ".png")
-            plt.cla()
-
-            plt.plot(score, rec, "-H", color='gold')
-            plt.title('class: ' + Recall_text + "\nscore_threhold=0.5")
-            plt.xlabel('Score_Threhold')
-            plt.ylabel('Recall')
-            axes = plt.gca()
-            axes.set_xlim([0.0,1.0])
-            axes.set_ylim([0.0,1.05])
-            fig.savefig(results_files_path + "/Recall/" + class_name + ".png")
-            plt.cla()
-
-            plt.plot(score, prec, "-s", color='palevioletred')
-            plt.title('class: ' + Precision_text + "\nscore_threhold=0.5")
-            plt.xlabel('Score_Threhold')
-            plt.ylabel('Precision')
-            axes = plt.gca()
-            axes.set_xlim([0.0,1.0])
-            axes.set_ylim([0.0,1.05])
-            fig.savefig(results_files_path + "/Precision/" + class_name + ".png")
-            plt.cla()
-            
-    if show_animation:
-        cv2.destroyAllWindows()
-
-    results_file.write("\n# mAP of all classes\n")
-    mAP = sum_AP / n_classes
-    text = "mAP = {0:.2f}%".format(mAP*100)
-    results_file.write(text + "\n")
-    print(text)
-
-# remove the temp_files directory
-shutil.rmtree(TEMP_FILES_PATH)
-
-"""
- Count total of detection-results
-"""
-# iterate through all the files
-det_counter_per_class = {}
-for txt_file in dr_files_list:
-    # get lines to list
-    lines_list = file_lines_to_list(txt_file)
-    for line in lines_list:
-        class_name = line.split()[0]
-        # check if class is in the ignore list, if yes skip
-        if class_name in args.ignore:
-            continue
-        # count that object
-        if class_name in det_counter_per_class:
-            det_counter_per_class[class_name] += 1
-        else:
-            # if class didn't exist yet
-            det_counter_per_class[class_name] = 1
-#print(det_counter_per_class)
-dr_classes = list(det_counter_per_class.keys())
-
-
-"""
- Plot the total number of occurences of each class in the ground-truth
-"""
-if draw_plot:
-    window_title = "ground-truth-info"
-    plot_title = "ground-truth\n"
-    plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
-    x_label = "Number of objects per class"
-    output_path = results_files_path + "/ground-truth-info.png"
-    to_show = False
-    plot_color = 'forestgreen'
-    draw_plot_func(
-        gt_counter_per_class,
-        n_classes,
-        window_title,
-        plot_title,
-        x_label,
-        output_path,
-        to_show,
-        plot_color,
-        '',
-        )
-
-"""
- Write number of ground-truth objects per class to results.txt
-"""
-with open(results_files_path + "/results.txt", 'a') as results_file:
-    results_file.write("\n# Number of ground-truth objects per class\n")
-    for class_name in sorted(gt_counter_per_class):
-        results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
-
-"""
- Finish counting true positives
-"""
-for class_name in dr_classes:
-    # if class exists in detection-result but not in ground-truth then there are no true positives in that class
-    if class_name not in gt_classes:
-        count_true_positives[class_name] = 0
-#print(count_true_positives)
-
-"""
- Plot the total number of occurences of each class in the "detection-results" folder
-"""
-if draw_plot:
-    window_title = "detection-results-info"
-    # Plot title
-    plot_title = "detection-results\n"
-    plot_title += "(" + str(len(dr_files_list)) + " files and "
-    count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
-    plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
-    # end Plot title
-    x_label = "Number of objects per class"
-    output_path = results_files_path + "/detection-results-info.png"
-    to_show = False
-    plot_color = 'forestgreen'
-    true_p_bar = count_true_positives
-    draw_plot_func(
-        det_counter_per_class,
-        len(det_counter_per_class),
-        window_title,
-        plot_title,
-        x_label,
-        output_path,
-        to_show,
-        plot_color,
-        true_p_bar
-        )
-
-"""
- Write number of detected objects per class to results.txt
-"""
-with open(results_files_path + "/results.txt", 'a') as results_file:
-    results_file.write("\n# Number of detected objects per class\n")
-    for class_name in sorted(dr_classes):
-        n_det = det_counter_per_class[class_name]
-        text = class_name + ": " + str(n_det)
-        text += " (tp:" + str(count_true_positives[class_name]) + ""
-        text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
-        results_file.write(text)
-
-"""
- Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
-"""
-if draw_plot:
-    window_title = "lamr"
-    plot_title = "log-average miss rate"
-    x_label = "log-average miss rate"
-    output_path = results_files_path + "/lamr.png"
-    to_show = False
-    plot_color = 'royalblue'
-    draw_plot_func(
-        lamr_dictionary,
-        n_classes,
-        window_title,
-        plot_title,
-        x_label,
-        output_path,
-        to_show,
-        plot_color,
-        ""
-        )
-
-"""
- Draw mAP plot (Show AP's of all classes in decreasing order)
-"""
-if draw_plot:
-    window_title = "mAP"
-    plot_title = "mAP = {0:.2f}%".format(mAP*100)
-    x_label = "Average Precision"
-    output_path = results_files_path + "/mAP.png"
-    to_show = True
-    plot_color = 'royalblue'
-    draw_plot_func(
-        ap_dictionary,
-        n_classes,
-        window_title,
-        plot_title,
-        x_label,
-        output_path,
-        to_show,
-        plot_color,
-        ""
-        )
+import os
+import xml.etree.ElementTree as ET
+
+from PIL import Image
+from tqdm import tqdm
+
+from utils.utils import get_classes
+from utils.utils_map import get_coco_map, get_map
+from yolo import YOLO
+
+if __name__ == "__main__":
+    '''
+    Recall和Precision不像AP是一个面积的概念，在门限值不同时，网络的Recall和Precision值是不同的。
+    map计算结果中的Recall和Precision代表的是当预测时，门限置信度为0.5时，所对应的Recall和Precision值。
+
+    此处获得的./map_out/detection-results/里面的txt的框的数量会比直接predict多一些，这是因为这里的门限低，
+    目的是为了计算不同门限条件下的Recall和Precision值，从而实现map的计算。
+    '''
+    #------------------------------------------------------------------------------------------------------------------#
+    #   map_mode用于指定该文件运行时计算的内容
+    #   map_mode为0代表整个map计算流程，包括获得预测结果、获得真实框、计算VOC_map。
+    #   map_mode为1代表仅仅获得预测结果。
+    #   map_mode为2代表仅仅获得真实框。
+    #   map_mode为3代表仅仅计算VOC_map。
+    #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
+    #-------------------------------------------------------------------------------------------------------------------#
+    map_mode        = 0
+    #-------------------------------------------------------#
+    #   此处的classes_path用于指定需要测量VOC_map的类别
+    #   一般情况下与训练和预测所用的classes_path一致即可
+    #-------------------------------------------------------#
+    classes_path    = 'model_data/voc_classes.txt'
+    #-------------------------------------------------------#
+    #   MINOVERLAP用于指定想要获得的mAP0.x
+    #   比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
+    #-------------------------------------------------------#
+    MINOVERLAP      = 0.5
+    #-------------------------------------------------------#
+    #   map_vis用于指定是否开启VOC_map计算的可视化
+    #-------------------------------------------------------#
+    map_vis         = False
+    #-------------------------------------------------------#
+    #   指向VOC数据集所在的文件夹
+    #   默认指向根目录下的VOC数据集
+    #-------------------------------------------------------#
+    VOCdevkit_path  = 'VOCdevkit'
+    #-------------------------------------------------------#
+    #   结果输出的文件夹，默认为map_out
+    #-------------------------------------------------------#
+    map_out_path    = 'map_out'
+
+    image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/test.txt")).read().strip().split()
+
+    if not os.path.exists(map_out_path):
+        os.makedirs(map_out_path)
+    if not os.path.exists(os.path.join(map_out_path, 'ground-truth')):
+        os.makedirs(os.path.join(map_out_path, 'ground-truth'))
+    if not os.path.exists(os.path.join(map_out_path, 'detection-results')):
+        os.makedirs(os.path.join(map_out_path, 'detection-results'))
+    if not os.path.exists(os.path.join(map_out_path, 'images-optional')):
+        os.makedirs(os.path.join(map_out_path, 'images-optional'))
+
+    class_names, _ = get_classes(classes_path)
+
+    if map_mode == 0 or map_mode == 1:
+        print("Load model.")
+        yolo = YOLO(confidence = 0.001, nms_iou = 0.5)
+        print("Load model done.")
+
+        print("Get predict result.")
+        for image_id in tqdm(image_ids):
+            image_path  = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/"+image_id+".jpg")
+            image       = Image.open(image_path)
+            if map_vis:
+                image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg"))
+            yolo.get_map_txt(image_id, image, class_names, map_out_path)
+        print("Get predict result done.")
+        
+    if map_mode == 0 or map_mode == 2:
+        print("Get ground truth result.")
+        for image_id in tqdm(image_ids):
+            with open(os.path.join(map_out_path, "ground-truth/"+image_id+".txt"), "w") as new_f:
+                root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/"+image_id+".xml")).getroot()
+                for obj in root.findall('object'):
+                    difficult_flag = False
+                    if obj.find('difficult')!=None:
+                        difficult = obj.find('difficult').text
+                        if int(difficult)==1:
+                            difficult_flag = True
+                    obj_name = obj.find('name').text
+                    if obj_name not in class_names:
+                        continue
+                    bndbox  = obj.find('bndbox')
+                    left    = bndbox.find('xmin').text
+                    top     = bndbox.find('ymin').text
+                    right   = bndbox.find('xmax').text
+                    bottom  = bndbox.find('ymax').text
+
+                    if difficult_flag:
+                        new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
+                    else:
+                        new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom))
+        print("Get ground truth result done.")
+
+    if map_mode == 0 or map_mode == 3:
+        print("Get map.")
+        get_map(MINOVERLAP, True, path = map_out_path)
+        print("Get map done.")
+
+    if map_mode == 4:
+        print("Get map.")
+        get_coco_map(class_names = class_names, path = map_out_path)
+        print("Get map done.")
diff --git a/kmeans_for_anchors.py b/kmeans_for_anchors.py
index 999f04f3f6ad98e1e32af43ac40cebc03514497a..b9c0dfa34f0fa345f60aa628fc0f2fb8613c0dda 100644
--- a/kmeans_for_anchors.py
+++ b/kmeans_for_anchors.py
@@ -24,32 +24,45 @@ def avg_iou(box,cluster):
     return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])
 
 def kmeans(box,k):
-    # 取出一共有多少框
+    #-------------------------------------------------------------#
+    #   取出一共有多少框
+    #-------------------------------------------------------------#
     row = box.shape[0]
     
-    # 每个框各个点的位置
+    #-------------------------------------------------------------#
+    #   每个框各个点的位置
+    #-------------------------------------------------------------#
     distance = np.empty((row,k))
     
-    # 最后的聚类位置
+    #-------------------------------------------------------------#
+    #   最后的聚类位置
+    #-------------------------------------------------------------#
     last_clu = np.zeros((row,))
 
     np.random.seed()
 
-    # 随机选5个当聚类中心
+    #-------------------------------------------------------------#
+    #   随机选5个当聚类中心
+    #-------------------------------------------------------------#
     cluster = box[np.random.choice(row,k,replace = False)]
-    # cluster = random.sample(row, k)
     while True:
-        # 计算每一行距离五个点的iou情况。
+        #-------------------------------------------------------------#
+        #   计算每一行距离五个点的iou情况。
+        #-------------------------------------------------------------#
         for i in range(row):
             distance[i] = 1 - cas_iou(box[i],cluster)
         
-        # 取出最小点
+        #-------------------------------------------------------------#
+        #   取出最小点
+        #-------------------------------------------------------------#
         near = np.argmin(distance,axis=1)
 
         if (last_clu == near).all():
             break
         
-        # 求每一个类的中位点
+        #-------------------------------------------------------------#
+        #   求每一个类的中位点
+        #-------------------------------------------------------------#
         for j in range(k):
             cluster[j] = np.median(
                 box[near == j],axis=0)
@@ -60,7 +73,9 @@ def kmeans(box,k):
 
 def load_data(path):
     data = []
-    # 对于每一个xml都寻找box
+    #-------------------------------------------------------------#
+    #   对于每一个xml都寻找box
+    #-------------------------------------------------------------#
     for xml_file in glob.glob('{}/*xml'.format(path)):
         tree = ET.parse(xml_file)
         height = int(tree.findtext('./size/height'))
@@ -68,7 +83,9 @@ def load_data(path):
         if height<=0 or width<=0:
             continue
         
-        # 对于每一个目标都获得它的宽高
+        #-------------------------------------------------------------#
+        #   对于每一个目标都获得它的宽高
+        #-------------------------------------------------------------#
         for obj in tree.iter('object'):
             xmin = int(float(obj.findtext('bndbox/xmin'))) / width
             ymin = int(float(obj.findtext('bndbox/ymin'))) / height
@@ -85,18 +102,26 @@ def load_data(path):
 
 
 if __name__ == '__main__':
-    # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
-    # 会生成yolo_anchors.txt
-    SIZE = 416
+    #-------------------------------------------------------------#
+    #   运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
+    #   会生成yolo_anchors.txt
+    #-------------------------------------------------------------#
+    SIZE        = 416
     anchors_num = 9
-    # 载入数据集，可以使用VOC的xml
-    path = r'./VOCdevkit/VOC2007/Annotations'
+    #-------------------------------------------------------------#
+    #   载入数据集，可以使用VOC的xml
+    #-------------------------------------------------------------#
+    path        = r'./VOCdevkit/VOC2007/Annotations'
     
-    # 载入所有的xml
-    # 存储格式为转化为比例后的width,height
+    #-------------------------------------------------------------#
+    #   载入所有的xml
+    #   存储格式为转化为比例后的width,height
+    #-------------------------------------------------------------#
     data = load_data(path)
     
-    # 使用k聚类算法
+    #-------------------------------------------------------------#
+    #   使用k聚类算法
+    #-------------------------------------------------------------#
     out = kmeans(data,anchors_num)
     out = out[np.argsort(out[:,0])]
     print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
diff --git a/nets/CSPdarknet.py b/nets/CSPdarknet.py
index b6f7965bfc6f7a17130f6976e41c7c3af14600fc..d339322e72f0fa47782cd86a0b1b3910d1405f4c 100644
--- a/nets/CSPdarknet.py
+++ b/nets/CSPdarknet.py
@@ -1,4 +1,5 @@
 import math
+from collections import OrderedDict
 
 import torch
 import torch.nn as nn
diff --git a/nets/__init__.py b/nets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4287ca8617970fa8fc025b75cb319c7032706910
--- /dev/null
+++ b/nets/__init__.py
@@ -0,0 +1 @@
+#
\ No newline at end of file
diff --git a/nets/yolo.py b/nets/yolo.py
new file mode 100644
index 0000000000000000000000000000000000000000..11a12734fbb978fde6d8720e0631d641c900c47c
--- /dev/null
+++ b/nets/yolo.py
@@ -0,0 +1,185 @@
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+
+from nets.CSPdarknet import darknet53
+
+
+def conv2d(filter_in, filter_out, kernel_size, stride=1):
+    pad = (kernel_size - 1) // 2 if kernel_size else 0
+    return nn.Sequential(OrderedDict([
+        ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=stride, padding=pad, bias=False)),
+        ("bn", nn.BatchNorm2d(filter_out)),
+        ("relu", nn.LeakyReLU(0.1)),
+    ]))
+
+#---------------------------------------------------#
+#   SPP结构，利用不同大小的池化核进行池化
+#   池化后堆叠
+#---------------------------------------------------#
+class SpatialPyramidPooling(nn.Module):
+    def __init__(self, pool_sizes=[5, 9, 13]):
+        super(SpatialPyramidPooling, self).__init__()
+
+        self.maxpools = nn.ModuleList([nn.MaxPool2d(pool_size, 1, pool_size//2) for pool_size in pool_sizes])
+
+    def forward(self, x):
+        features = [maxpool(x) for maxpool in self.maxpools[::-1]]
+        features = torch.cat(features + [x], dim=1)
+
+        return features
+
+#---------------------------------------------------#
+#   卷积 + 上采样
+#---------------------------------------------------#
+class Upsample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(Upsample, self).__init__()
+
+        self.upsample = nn.Sequential(
+            conv2d(in_channels, out_channels, 1),
+            nn.Upsample(scale_factor=2, mode='nearest')
+        )
+
+    def forward(self, x,):
+        x = self.upsample(x)
+        return x
+
+#---------------------------------------------------#
+#   三次卷积块
+#---------------------------------------------------#
+def make_three_conv(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+    )
+    return m
+
+#---------------------------------------------------#
+#   五次卷积块
+#---------------------------------------------------#
+def make_five_conv(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+        conv2d(filters_list[0], filters_list[1], 3),
+        conv2d(filters_list[1], filters_list[0], 1),
+    )
+    return m
+
+#---------------------------------------------------#
+#   最后获得yolov4的输出
+#---------------------------------------------------#
+def yolo_head(filters_list, in_filters):
+    m = nn.Sequential(
+        conv2d(in_filters, filters_list[0], 3),
+        nn.Conv2d(filters_list[0], filters_list[1], 1),
+    )
+    return m
+
+#---------------------------------------------------#
+#   yolo_body
+#---------------------------------------------------#
+class YoloBody(nn.Module):
+    def __init__(self, anchors_mask, num_classes):
+        super(YoloBody, self).__init__()
+        #---------------------------------------------------#   
+        #   生成CSPdarknet53的主干模型
+        #   获得三个有效特征层，他们的shape分别是：
+        #   52,52,256
+        #   26,26,512
+        #   13,13,1024
+        #---------------------------------------------------#
+        self.backbone = darknet53(None)
+
+        self.conv1      = make_three_conv([512,1024],1024)
+        self.SPP        = SpatialPyramidPooling()
+        self.conv2      = make_three_conv([512,1024],2048)
+
+        self.upsample1          = Upsample(512,256)
+        self.conv_for_P4        = conv2d(512,256,1)
+        self.make_five_conv1    = make_five_conv([256, 512],512)
+
+        self.upsample2          = Upsample(256,128)
+        self.conv_for_P3        = conv2d(256,128,1)
+        self.make_five_conv2    = make_five_conv([128, 256],256)
+
+        # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
+        self.yolo_head3         = yolo_head([256, len(anchors_mask[0]) * (5 + num_classes)],128)
+
+        self.down_sample1       = conv2d(128,256,3,stride=2)
+        self.make_five_conv3    = make_five_conv([256, 512],512)
+
+        # 3*(5+num_classes) = 3*(5+20) = 3*(4+1+20)=75
+        self.yolo_head2         = yolo_head([512, len(anchors_mask[1]) * (5 + num_classes)],256)
+
+        self.down_sample2       = conv2d(256,512,3,stride=2)
+        self.make_five_conv4    = make_five_conv([512, 1024],1024)
+
+        # 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
+        self.yolo_head1         = yolo_head([1024, len(anchors_mask[2]) * (5 + num_classes)],512)
+
+
+    def forward(self, x):
+        #  backbone
+        x2, x1, x0 = self.backbone(x)
+
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 
+        P5 = self.conv1(x0)
+        P5 = self.SPP(P5)
+        # 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
+        P5 = self.conv2(P5)
+
+        # 13,13,512 -> 13,13,256 -> 26,26,256
+        P5_upsample = self.upsample1(P5)
+        # 26,26,512 -> 26,26,256
+        P4 = self.conv_for_P4(x1)
+        # 26,26,256 + 26,26,256 -> 26,26,512
+        P4 = torch.cat([P4,P5_upsample],axis=1)
+        # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
+        P4 = self.make_five_conv1(P4)
+
+        # 26,26,256 -> 26,26,128 -> 52,52,128
+        P4_upsample = self.upsample2(P4)
+        # 52,52,256 -> 52,52,128
+        P3 = self.conv_for_P3(x2)
+        # 52,52,128 + 52,52,128 -> 52,52,256
+        P3 = torch.cat([P3,P4_upsample],axis=1)
+        # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
+        P3 = self.make_five_conv2(P3)
+
+        # 52,52,128 -> 26,26,256
+        P3_downsample = self.down_sample1(P3)
+        # 26,26,256 + 26,26,256 -> 26,26,512
+        P4 = torch.cat([P3_downsample,P4],axis=1)
+        # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
+        P4 = self.make_five_conv3(P4)
+
+        # 26,26,256 -> 13,13,512
+        P4_downsample = self.down_sample2(P4)
+        # 13,13,512 + 13,13,512 -> 13,13,1024
+        P5 = torch.cat([P4_downsample,P5],axis=1)
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
+        P5 = self.make_five_conv4(P5)
+
+        #---------------------------------------------------#
+        #   第三个特征层
+        #   y3=(batch_size,75,52,52)
+        #---------------------------------------------------#
+        out2 = self.yolo_head3(P3)
+        #---------------------------------------------------#
+        #   第二个特征层
+        #   y2=(batch_size,75,26,26)
+        #---------------------------------------------------#
+        out1 = self.yolo_head2(P4)
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   y1=(batch_size,75,13,13)
+        #---------------------------------------------------#
+        out0 = self.yolo_head1(P5)
+
+        return out0, out1, out2
+
diff --git a/nets/yolo_training.py b/nets/yolo_training.py
index e64a79700de085eb9aa703fa6c0050890a4309d3..cb0ba7d3ecd5de75fcf464eac32e7012691e98ca 100644
--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
@@ -1,431 +1,400 @@
-import os
-
-import math
-import numpy as np
-import scipy.signal
 import torch
 import torch.nn as nn
-from matplotlib import pyplot as plt
-
-def jaccard(_box_a, _box_b):
-    b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
-    b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
-    b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
-    b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
-    box_a = torch.zeros_like(_box_a)
-    box_b = torch.zeros_like(_box_b)
-    box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
-    box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
-    A = box_a.size(0)
-    B = box_b.size(0)
-    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
-                       box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
-    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
-                       box_b[:, :2].unsqueeze(0).expand(A, B, 2))
-    inter = torch.clamp((max_xy - min_xy), min=0)
-
-    inter = inter[:, :, 0] * inter[:, :, 1]
-    # 计算先验框和真实框各自的面积
-    area_a = ((box_a[:, 2]-box_a[:, 0]) *
-              (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
-    area_b = ((box_b[:, 2]-box_b[:, 0]) *
-              (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
-    # 求IOU
-    union = area_a + area_b - inter
-    return inter / union  # [A,B]
-    
-#---------------------------------------------------#
-#   平滑标签
-#---------------------------------------------------#
-def smooth_labels(y_true, label_smoothing,num_classes):
-    return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
-
-def box_ciou(b1, b2):
-    """
-    输入为：
-    ----------
-    b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
-    b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
-
-    返回为：
-    -------
-    ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
-    """
-    # 求出预测框左上角右下角
-    b1_xy = b1[..., :2]
-    b1_wh = b1[..., 2:4]
-    b1_wh_half = b1_wh/2.
-    b1_mins = b1_xy - b1_wh_half
-    b1_maxes = b1_xy + b1_wh_half
-    # 求出真实框左上角右下角
-    b2_xy = b2[..., :2]
-    b2_wh = b2[..., 2:4]
-    b2_wh_half = b2_wh/2.
-    b2_mins = b2_xy - b2_wh_half
-    b2_maxes = b2_xy + b2_wh_half
-
-    # 求真实框和预测框所有的iou
-    intersect_mins = torch.max(b1_mins, b2_mins)
-    intersect_maxes = torch.min(b1_maxes, b2_maxes)
-    intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
-    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
-    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
-    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
-    union_area = b1_area + b2_area - intersect_area
-    iou = intersect_area / torch.clamp(union_area,min = 1e-6)
+import math
+import numpy as np
 
-    # 计算中心的差距
-    center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1)
-    
-    # 找到包裹两个框的最小框的左上角和右下角
-    enclose_mins = torch.min(b1_mins, b2_mins)
-    enclose_maxes = torch.max(b1_maxes, b2_maxes)
-    enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
-    # 计算对角线距离
-    enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1)
-    ciou = iou - 1.0 * (center_distance) / torch.clamp(enclose_diagonal,min = 1e-6)
-    
-    v = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0]/torch.clamp(b1_wh[..., 1],min = 1e-6)) - torch.atan(b2_wh[..., 0]/torch.clamp(b2_wh[..., 1],min = 1e-6))), 2)
-    alpha = v / torch.clamp((1.0 - iou + v),min=1e-6)
-    ciou = ciou - alpha * v
-    return ciou
-  
-def clip_by_tensor(t,t_min,t_max):
-    t=t.float()
-    result = (t >= t_min).float() * t + (t < t_min).float() * t_min
-    result = (result <= t_max).float() * result + (result > t_max).float() * t_max
-    return result
+class YOLOLoss(nn.Module):
+    def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]], label_smoothing = 0):
+        super(YOLOLoss, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
+        #   26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
+        #   52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
+        #-----------------------------------------------------------#
+        self.anchors        = anchors
+        self.num_classes    = num_classes
+        self.bbox_attrs     = 5 + num_classes
+        self.input_shape    = input_shape
+        self.anchors_mask   = anchors_mask
+        self.label_smoothing = label_smoothing
+
+        self.ignore_threshold = 0.7
+        self.cuda = cuda
 
-def MSELoss(pred,target):
-    return (pred-target)**2
+    def clip_by_tensor(self, t, t_min, t_max):
+        t = t.float()
+        result = (t >= t_min).float() * t + (t < t_min).float() * t_min
+        result = (result <= t_max).float() * result + (result > t_max).float() * t_max
+        return result
 
-def BCELoss(pred,target):
-    epsilon = 1e-7
-    pred = clip_by_tensor(pred, epsilon, 1.0 - epsilon)
-    output = -target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
-    return output
+    def MSELoss(self, pred, target):
+        return torch.pow(pred - target, 2)
 
-class YOLOLoss(nn.Module):
-    def __init__(self, anchors, num_classes, img_size, label_smooth=0, cuda=True, normalize=True):
-        super(YOLOLoss, self).__init__()
-        self.anchors = anchors
-        self.num_anchors = len(anchors)
-        self.num_classes = num_classes
-        self.bbox_attrs = 5 + num_classes
-        self.img_size = img_size
-        self.feature_length = [img_size[0]//32,img_size[0]//16,img_size[0]//8]
-        self.label_smooth = label_smooth
+    def BCELoss(self, pred, target):
+        epsilon = 1e-7
+        pred    = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)
+        output  = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
+        return output
+        
+    def box_ciou(self, b1, b2):
+        """
+        输入为：
+        ----------
+        b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+        b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
+
+        返回为：
+        -------
+        ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
+        """
+        #----------------------------------------------------#
+        #   求出预测框左上角右下角
+        #----------------------------------------------------#
+        b1_xy       = b1[..., :2]
+        b1_wh       = b1[..., 2:4]
+        b1_wh_half  = b1_wh/2.
+        b1_mins     = b1_xy - b1_wh_half
+        b1_maxes    = b1_xy + b1_wh_half
+        #----------------------------------------------------#
+        #   求出真实框左上角右下角
+        #----------------------------------------------------#
+        b2_xy       = b2[..., :2]
+        b2_wh       = b2[..., 2:4]
+        b2_wh_half  = b2_wh/2.
+        b2_mins     = b2_xy - b2_wh_half
+        b2_maxes    = b2_xy + b2_wh_half
 
-        self.ignore_threshold = 0.5
-        self.lambda_conf = 1.0
-        self.lambda_cls = 1.0
-        self.lambda_loc = 1.0
-        self.cuda = cuda
-        self.normalize = normalize
+        #----------------------------------------------------#
+        #   求真实框和预测框所有的iou
+        #----------------------------------------------------#
+        intersect_mins  = torch.max(b1_mins, b2_mins)
+        intersect_maxes = torch.min(b1_maxes, b2_maxes)
+        intersect_wh    = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
+        intersect_area  = intersect_wh[..., 0] * intersect_wh[..., 1]
+        b1_area         = b1_wh[..., 0] * b1_wh[..., 1]
+        b2_area         = b2_wh[..., 0] * b2_wh[..., 1]
+        union_area      = b1_area + b2_area - intersect_area
+        iou             = intersect_area / torch.clamp(union_area,min = 1e-6)
 
-    def forward(self, input, targets=None):
         #----------------------------------------------------#
+        #   计算中心的差距
+        #----------------------------------------------------#
+        center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1)
+        
+        #----------------------------------------------------#
+        #   找到包裹两个框的最小框的左上角和右下角
+        #----------------------------------------------------#
+        enclose_mins    = torch.min(b1_mins, b2_mins)
+        enclose_maxes   = torch.max(b1_maxes, b2_maxes)
+        enclose_wh      = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
+        #----------------------------------------------------#
+        #   计算对角线距离
+        #----------------------------------------------------#
+        enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1)
+        ciou            = iou - 1.0 * (center_distance) / torch.clamp(enclose_diagonal,min = 1e-6)
+        
+        v       = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0] / torch.clamp(b1_wh[..., 1],min = 1e-6)) - torch.atan(b2_wh[..., 0] / torch.clamp(b2_wh[..., 1], min = 1e-6))), 2)
+        alpha   = v / torch.clamp((1.0 - iou + v), min=1e-6)
+        ciou    = ciou - alpha * v
+        return ciou
+
+    #---------------------------------------------------#
+    #   平滑标签
+    #---------------------------------------------------#
+    def smooth_labels(self, y_true, label_smoothing, num_classes):
+        return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes
+
+    def forward(self, l, input, targets=None):
+        #----------------------------------------------------#
+        #   l 代表使用的是第几个有效特征层
         #   input的shape为  bs, 3*(5+num_classes), 13, 13
         #                   bs, 3*(5+num_classes), 26, 26
         #                   bs, 3*(5+num_classes), 52, 52
+        #   targets 真实框的标签情况 [batch_size, num_gt, 5]
         #----------------------------------------------------#
-        
-        #-----------------------#
-        #   一共多少张图片
-        #-----------------------#
-        bs = input.size(0)
-        #-----------------------#
-        #   特征层的高
-        #-----------------------#
-        in_h = input.size(2)
-        #-----------------------#
-        #   特征层的宽
-        #-----------------------#
-        in_w = input.size(3)
-
+        #--------------------------------#
+        #   获得图片数量，特征层的高和宽
+        #--------------------------------#
+        bs      = input.size(0)
+        in_h    = input.size(2)
+        in_w    = input.size(3)
         #-----------------------------------------------------------------------#
         #   计算步长
         #   每一个特征点对应原来的图片上多少个像素点
+        #   
         #   如果特征层为13x13的话，一个特征点就对应原来的图片上的32个像素点
         #   如果特征层为26x26的话，一个特征点就对应原来的图片上的16个像素点
         #   如果特征层为52x52的话，一个特征点就对应原来的图片上的8个像素点
         #   stride_h = stride_w = 32、16、8
         #-----------------------------------------------------------------------#
-        stride_h = self.img_size[1] / in_h
-        stride_w = self.img_size[0] / in_w
-
-
+        stride_h = self.input_shape[0] / in_h
+        stride_w = self.input_shape[1] / in_w
         #-------------------------------------------------#
         #   此时获得的scaled_anchors大小是相对于特征层的
         #-------------------------------------------------#
-        scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
-        
+        scaled_anchors  = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
         #-----------------------------------------------#
         #   输入的input一共有三个，他们的shape分别是
+        #   bs, 3 * (5+num_classes), 13, 13 => bs, 3, 5 + num_classes, 13, 13 => batch_size, 3, 13, 13, 5 + num_classes
+
         #   batch_size, 3, 13, 13, 5 + num_classes
         #   batch_size, 3, 26, 26, 5 + num_classes
         #   batch_size, 3, 52, 52, 5 + num_classes
         #-----------------------------------------------#
-        prediction = input.view(bs, int(self.num_anchors/3),
-                                self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
+        prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
         
-        # 获得置信度，是否有物体
+        #-----------------------------------------------#
+        #   先验框的中心位置的调整参数
+        #-----------------------------------------------#
+        x = torch.sigmoid(prediction[..., 0])
+        y = torch.sigmoid(prediction[..., 1])
+        #-----------------------------------------------#
+        #   先验框的宽高调整参数
+        #-----------------------------------------------#
+        w = prediction[..., 2]
+        h = prediction[..., 3]
+        #-----------------------------------------------#
+        #   获得置信度，是否有物体
+        #-----------------------------------------------#
         conf = torch.sigmoid(prediction[..., 4])
-        # 种类置信度
+        #-----------------------------------------------#
+        #   种类置信度
+        #-----------------------------------------------#
         pred_cls = torch.sigmoid(prediction[..., 5:])
 
-        #---------------------------------------------------------------#
-        #   找到哪些先验框内部包含物体
-        #   利用真实框和先验框计算交并比
-        #   mask        batch_size, 3, in_h, in_w   有目标的特征点
-        #   noobj_mask  batch_size, 3, in_h, in_w   无目标的特征点
-        #   t_box       batch_size, 3, in_h, in_w, 4   中心宽高的真实值
-        #   tconf       batch_size, 3, in_h, in_w   置信度真实值
-        #   tcls        batch_size, 3, in_h, in_w, num_classes  种类真实值
-        #----------------------------------------------------------------#
-        mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y = self.get_target(targets, scaled_anchors,in_w, in_h,self.ignore_threshold)
+        #-----------------------------------------------#
+        #   获得网络应该有的预测结果
+        #-----------------------------------------------#
+        y_true, noobj_mask, box_loss_scale = self.get_target(l, targets, scaled_anchors, in_h, in_w)
 
         #---------------------------------------------------------------#
         #   将预测结果进行解码，判断预测结果和真实值的重合程度
         #   如果重合程度过大则忽略，因为这些特征点属于预测比较准确的特征点
         #   作为负样本不合适
         #----------------------------------------------------------------#
-        noobj_mask, pred_boxes_for_ciou = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask)
+        noobj_mask, pred_boxes = self.get_ignore(l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask)
 
         if self.cuda:
-            mask, noobj_mask = mask.cuda(), noobj_mask.cuda()
-            box_loss_scale_x, box_loss_scale_y= box_loss_scale_x.cuda(), box_loss_scale_y.cuda()
-            tconf, tcls = tconf.cuda(), tcls.cuda()
-            pred_boxes_for_ciou = pred_boxes_for_ciou.cuda()
-            t_box = t_box.cuda()
+            y_true          = y_true.cuda()
+            noobj_mask      = noobj_mask.cuda()
+            box_loss_scale  = box_loss_scale.cuda()
+        #-----------------------------------------------------------#
+        #   reshape_y_true[...,2:3]和reshape_y_true[...,3:4]
+        #   表示真实框的宽高，二者均在0-1之间
+        #   真实框越大，比重越小，小框的比重更大。
+        #-----------------------------------------------------------#
+        box_loss_scale = 2 - box_loss_scale
 
-        box_loss_scale = 2 - box_loss_scale_x * box_loss_scale_y
         #---------------------------------------------------------------#
         #   计算预测结果和真实结果的CIOU
         #----------------------------------------------------------------#
-        ciou = (1 - box_ciou( pred_boxes_for_ciou[mask.bool()], t_box[mask.bool()]))* box_loss_scale[mask.bool()]
-        loss_loc = torch.sum(ciou)
-
-        # 计算置信度的loss
-        loss_conf = torch.sum(BCELoss(conf, mask) * mask) + \
-                    torch.sum(BCELoss(conf, mask) * noobj_mask)
-                    
-        loss_cls = torch.sum(BCELoss(pred_cls[mask == 1], smooth_labels(tcls[mask == 1],self.label_smooth,self.num_classes)))
-        
-        loss = loss_conf * self.lambda_conf + loss_cls * self.lambda_cls + loss_loc * self.lambda_loc
-
-        if self.normalize:
-            num_pos = torch.sum(mask)
-            num_pos = torch.max(num_pos, torch.ones_like(num_pos))
-        else:
-            num_pos = bs/3
-
+        ciou        = (1 - self.box_ciou(pred_boxes[y_true[..., 4] == 1], y_true[..., :4][y_true[..., 4] == 1])) * box_loss_scale[y_true[..., 4] == 1]
+        loss_loc    = torch.sum(ciou)
+        #-----------------------------------------------------------#
+        #   计算置信度的loss
+        #-----------------------------------------------------------#
+        loss_conf   = torch.sum(self.BCELoss(conf, y_true[..., 4]) * y_true[..., 4]) + \
+                      torch.sum(self.BCELoss(conf, y_true[..., 4]) * noobj_mask)
+
+        loss_cls    = torch.sum(self.BCELoss(pred_cls[y_true[..., 4] == 1], self.smooth_labels(y_true[..., 5:][y_true[..., 4] == 1], self.label_smoothing, self.num_classes)))
+
+        loss        = loss_loc + loss_conf + loss_cls
+        num_pos = torch.sum(y_true[..., 4])
+        num_pos = torch.max(num_pos, torch.ones_like(num_pos))
         return loss, num_pos
 
-    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
+    def calculate_iou(self, _box_a, _box_b):
+        #-----------------------------------------------------------#
+        #   计算真实框的左上角和右下角
+        #-----------------------------------------------------------#
+        b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
+        b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
+        #-----------------------------------------------------------#
+        #   计算先验框获得的预测框的左上角和右下角
+        #-----------------------------------------------------------#
+        b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
+        b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
+
+        #-----------------------------------------------------------#
+        #   将真实框和预测框都转化成左上角右下角的形式
+        #-----------------------------------------------------------#
+        box_a = torch.zeros_like(_box_a)
+        box_b = torch.zeros_like(_box_b)
+        box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
+        box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
+
+        #-----------------------------------------------------------#
+        #   A为真实框的数量，B为先验框的数量
+        #-----------------------------------------------------------#
+        A = box_a.size(0)
+        B = box_b.size(0)
+
+        #-----------------------------------------------------------#
+        #   计算交的面积
+        #-----------------------------------------------------------#
+        max_xy  = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+        min_xy  = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+        inter   = torch.clamp((max_xy - min_xy), min=0)
+        inter   = inter[:, :, 0] * inter[:, :, 1]
+        #-----------------------------------------------------------#
+        #   计算预测框和真实框各自的面积
+        #-----------------------------------------------------------#
+        area_a = ((box_a[:, 2]-box_a[:, 0]) * (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+        area_b = ((box_b[:, 2]-box_b[:, 0]) * (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
+        #-----------------------------------------------------------#
+        #   求IOU
+        #-----------------------------------------------------------#
+        union = area_a + area_b - inter
+        return inter / union  # [A,B]
+    
+    def get_target(self, l, targets, anchors, in_h, in_w):
         #-----------------------------------------------------#
         #   计算一共有多少张图片
         #-----------------------------------------------------#
-        bs = len(target)
-        #-------------------------------------------------------#
-        #   获得当前特征层先验框所属的编号，方便后面对先验框筛选
-        #-------------------------------------------------------#
-        anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
-        subtract_index = [0,3,6][self.feature_length.index(in_w)]
-        #-------------------------------------------------------#
-        #   创建全是0或者全是1的阵列
-        #-------------------------------------------------------#
-        mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-
-        tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        t_box = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, 4, requires_grad=False)
-        tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False)
-
-        box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
-        for b in range(bs):
-            if len(target[b])==0:
+        bs              = len(targets)
+        #-----------------------------------------------------#
+        #   用于选取哪些先验框不包含物体
+        #-----------------------------------------------------#
+        noobj_mask      = torch.ones(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
+        #-----------------------------------------------------#
+        #   让网络更加去关注小目标
+        #-----------------------------------------------------#
+        box_loss_scale  = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
+        #-----------------------------------------------------#
+        #   batch_size, 3, 13, 13, 5 + num_classes
+        #-----------------------------------------------------#
+        y_true          = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, self.bbox_attrs, requires_grad = False)
+        for b in range(bs):            
+            if len(targets[b])==0:
                 continue
+            batch_target = torch.zeros_like(targets[b])
             #-------------------------------------------------------#
             #   计算出正样本在特征层上的中心点
             #-------------------------------------------------------#
-            gxs = target[b][:, 0:1] * in_w
-            gys = target[b][:, 1:2] * in_h
-            
-            #-------------------------------------------------------#
-            #   计算出正样本相对于特征层的宽高
-            #-------------------------------------------------------#
-            gws = target[b][:, 2:3] * in_w
-            ghs = target[b][:, 3:4] * in_h
-
-            #-------------------------------------------------------#
-            #   计算出正样本属于特征层的哪个特征点
-            #-------------------------------------------------------#
-            gis = torch.floor(gxs)
-            gjs = torch.floor(gys)
+            batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
+            batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
+            batch_target[:, 4] = targets[b][:, 4]
+            batch_target = batch_target.cpu()
             
             #-------------------------------------------------------#
             #   将真实框转换一个形式
             #   num_true_box, 4
             #-------------------------------------------------------#
-            gt_box = torch.FloatTensor(torch.cat([torch.zeros_like(gws), torch.zeros_like(ghs), gws, ghs], 1))
-            
+            gt_box          = torch.FloatTensor(torch.cat((torch.zeros((batch_target.size(0), 2)), batch_target[:, 2:4]), 1))
             #-------------------------------------------------------#
             #   将先验框转换一个形式
             #   9, 4
             #-------------------------------------------------------#
-            anchor_shapes = torch.FloatTensor(torch.cat((torch.zeros((self.num_anchors, 2)), torch.FloatTensor(anchors)), 1))
+            anchor_shapes   = torch.FloatTensor(torch.cat((torch.zeros((len(anchors), 2)), torch.FloatTensor(anchors)), 1))
             #-------------------------------------------------------#
             #   计算交并比
-            #   num_true_box, 9
+            #   self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况
+            #   best_ns:
+            #   [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]
             #-------------------------------------------------------#
-            anch_ious = jaccard(gt_box, anchor_shapes)
+            best_ns = torch.argmax(self.calculate_iou(gt_box, anchor_shapes), dim=-1)
 
-            #-------------------------------------------------------#
-            #   计算重合度最大的先验框是哪个
-            #   num_true_box, 
-            #-------------------------------------------------------#
-            best_ns = torch.argmax(anch_ious,dim=-1)
-            for i, best_n in enumerate(best_ns):
-                if best_n not in anchor_index:
+            for t, best_n in enumerate(best_ns):
+                if best_n not in self.anchors_mask[l]:
                     continue
-                #-------------------------------------------------------------#
-                #   取出各类坐标：
-                #   gi和gj代表的是真实框对应的特征点的x轴y轴坐标
-                #   gx和gy代表真实框的x轴和y轴坐标
-                #   gw和gh代表真实框的宽和高
-                #-------------------------------------------------------------#
-                gi = gis[i].long()
-                gj = gjs[i].long()
-                gx = gxs[i]
-                gy = gys[i]
-                gw = gws[i]
-                gh = ghs[i]
-                if (gj < in_h) and (gi < in_w):
-                    best_n = best_n - subtract_index
-                    #----------------------------------------#
-                    #   noobj_mask代表无目标的特征点
-                    #----------------------------------------#
-                    noobj_mask[b, best_n, gj, gi] = 0
-                    #----------------------------------------#
-                    #   mask代表有目标的特征点
-                    #----------------------------------------#
-                    mask[b, best_n, gj, gi] = 1
-                    #----------------------------------------#
-                    #   tx、ty代表中心的真实值
-                    #----------------------------------------#
-                    tx[b, best_n, gj, gi] = gx
-                    ty[b, best_n, gj, gi] = gy
-                    #----------------------------------------#
-                    #   tw、th代表宽高的真实值
-                    #----------------------------------------#
-                    tw[b, best_n, gj, gi] = gw
-                    th[b, best_n, gj, gi] = gh
-                    #----------------------------------------#
-                    #   用于获得xywh的比例
-                    #   大目标loss权重小，小目标loss权重大
-                    #----------------------------------------#
-                    box_loss_scale_x[b, best_n, gj, gi] = target[b][i, 2]
-                    box_loss_scale_y[b, best_n, gj, gi] = target[b][i, 3]
-                    #----------------------------------------#
-                    #   tconf代表物体置信度
-                    #----------------------------------------#
-                    tconf[b, best_n, gj, gi] = 1
-                    #----------------------------------------#
-                    #   tcls代表种类置信度
-                    #----------------------------------------#
-                    tcls[b, best_n, gj, gi, target[b][i, 4].long()] = 1
-                else:
-                    print('Step {0} out of bound'.format(b))
-                    print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w))
-                    continue
-        t_box[...,0] = tx
-        t_box[...,1] = ty
-        t_box[...,2] = tw
-        t_box[...,3] = th
-        return mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y
-
-
-    def get_ignore(self,prediction,target,scaled_anchors,in_w, in_h,noobj_mask):
+                #----------------------------------------#
+                #   判断这个先验框是当前特征点的哪一个先验框
+                #----------------------------------------#
+                k = self.anchors_mask[l].index(best_n)
+                #----------------------------------------#
+                #   获得真实框属于哪个网格点
+                #----------------------------------------#
+                i = torch.floor(batch_target[t, 0]).long()
+                j = torch.floor(batch_target[t, 1]).long()
+                #----------------------------------------#
+                #   取出真实框的种类
+                #----------------------------------------#
+                c = batch_target[t, 4].long()
+                
+                #----------------------------------------#
+                #   noobj_mask代表无目标的特征点
+                #----------------------------------------#
+                noobj_mask[b, k, j, i] = 0
+                #----------------------------------------#
+                #   tx、ty代表中心调整参数的真实值
+                #----------------------------------------#
+                y_true[b, k, j, i, 0] = batch_target[t, 0]
+                y_true[b, k, j, i, 1] = batch_target[t, 1]
+                y_true[b, k, j, i, 2] = batch_target[t, 2]
+                y_true[b, k, j, i, 3] = batch_target[t, 3]
+                y_true[b, k, j, i, 4] = 1
+                y_true[b, k, j, i, c + 5] = 1
+                #----------------------------------------#
+                #   用于获得xywh的比例
+                #   大目标loss权重小，小目标loss权重大
+                #----------------------------------------#
+                box_loss_scale[b, k, j, i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h
+        return y_true, noobj_mask, box_loss_scale
+
+    def get_ignore(self, l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask):
         #-----------------------------------------------------#
         #   计算一共有多少张图片
         #-----------------------------------------------------#
-        bs = len(target)
-        #-------------------------------------------------------#
-        #   获得当前特征层先验框所属的编号，方便后面对先验框筛选
-        #-------------------------------------------------------#
-        anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
-        scaled_anchors = np.array(scaled_anchors)[anchor_index]
-        
-        # 先验框的中心位置的调整参数
-        x = torch.sigmoid(prediction[..., 0])  
-        y = torch.sigmoid(prediction[..., 1])
-        # 先验框的宽高调整参数
-        w = prediction[..., 2]  # Width
-        h = prediction[..., 3]  # Height
+        bs = len(targets)
 
         FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
-        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
-
-        # 生成网格，先验框中心，网格左上角
+        LongTensor  = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+        #-----------------------------------------------------#
+        #   生成网格，先验框中心，网格左上角
+        #-----------------------------------------------------#
         grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_h, 1).repeat(
-            int(bs*self.num_anchors/3), 1, 1).view(x.shape).type(FloatTensor)
+            int(bs * len(self.anchors_mask[l])), 1, 1).view(x.shape).type(FloatTensor)
         grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_w, 1).t().repeat(
-            int(bs*self.num_anchors/3), 1, 1).view(y.shape).type(FloatTensor)
+            int(bs * len(self.anchors_mask[l])), 1, 1).view(y.shape).type(FloatTensor)
 
         # 生成先验框的宽高
-        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
-        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
+        scaled_anchors_l = np.array(scaled_anchors)[self.anchors_mask[l]]
+        anchor_w = FloatTensor(scaled_anchors_l).index_select(1, LongTensor([0]))
+        anchor_h = FloatTensor(scaled_anchors_l).index_select(1, LongTensor([1]))
         
         anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
         anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
-        
         #-------------------------------------------------------#
         #   计算调整后的先验框中心与宽高
         #-------------------------------------------------------#
-        pred_boxes = FloatTensor(prediction[..., :4].shape)
-        pred_boxes[..., 0] = x + grid_x
-        pred_boxes[..., 1] = y + grid_y
-        pred_boxes[..., 2] = torch.exp(w) * anchor_w
-        pred_boxes[..., 3] = torch.exp(h) * anchor_h
-        for i in range(bs):
-            pred_boxes_for_ignore = pred_boxes[i]
+        pred_boxes_x    = torch.unsqueeze(x + grid_x, -1)
+        pred_boxes_y    = torch.unsqueeze(y + grid_y, -1)
+        pred_boxes_w    = torch.unsqueeze(torch.exp(w) * anchor_w, -1)
+        pred_boxes_h    = torch.unsqueeze(torch.exp(h) * anchor_h, -1)
+        pred_boxes      = torch.cat([pred_boxes_x, pred_boxes_y, pred_boxes_w, pred_boxes_h], dim = -1)
+        for b in range(bs):           
             #-------------------------------------------------------#
             #   将预测结果转换一个形式
             #   pred_boxes_for_ignore      num_anchors, 4
             #-------------------------------------------------------#
-            pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4)
+            pred_boxes_for_ignore = pred_boxes[b].view(-1, 4)
             #-------------------------------------------------------#
             #   计算真实框，并把真实框转换成相对于特征层的大小
             #   gt_box      num_true_box, 4
             #-------------------------------------------------------#
-            if len(target[i]) > 0:
-                gx = target[i][:, 0:1] * in_w
-                gy = target[i][:, 1:2] * in_h
-                gw = target[i][:, 2:3] * in_w
-                gh = target[i][:, 3:4] * in_h
-                gt_box = torch.FloatTensor(torch.cat([gx, gy, gw, gh],-1)).type(FloatTensor)
-
+            if len(targets[b]) > 0:
+                batch_target = torch.zeros_like(targets[b])
+                #-------------------------------------------------------#
+                #   计算出正样本在特征层上的中心点
+                #-------------------------------------------------------#
+                batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
+                batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
+                batch_target = batch_target[:, :4]
                 #-------------------------------------------------------#
                 #   计算交并比
                 #   anch_ious       num_true_box, num_anchors
                 #-------------------------------------------------------#
-                anch_ious = jaccard(gt_box, pred_boxes_for_ignore)
+                anch_ious = self.calculate_iou(batch_target, pred_boxes_for_ignore)
                 #-------------------------------------------------------#
                 #   每个先验框对应真实框的最大重合度
                 #   anch_ious_max   num_anchors
                 #-------------------------------------------------------#
-                anch_ious_max, _ = torch.max(anch_ious,dim=0)
-                anch_ious_max = anch_ious_max.view(pred_boxes[i].size()[:3])
-                noobj_mask[i][anch_ious_max>self.ignore_threshold] = 0
+                anch_ious_max, _    = torch.max(anch_ious, dim = 0)
+                anch_ious_max       = anch_ious_max.view(pred_boxes[b].size()[:3])
+                noobj_mask[b][anch_ious_max > self.ignore_threshold] = 0
         return noobj_mask, pred_boxes
 
-def weights_init(net, init_type='normal', init_gain=0.02):
+def weights_init(net, init_type='normal', init_gain = 0.02):
     def init_func(m):
         classname = m.__class__.__name__
         if hasattr(m, 'weight') and classname.find('Conv') != -1:
@@ -444,51 +413,3 @@ def weights_init(net, init_type='normal', init_gain=0.02):
             torch.nn.init.constant_(m.bias.data, 0.0)
     print('initialize network with %s type' % init_type)
     net.apply(init_func)
-
-class LossHistory():
-    def __init__(self, log_dir):
-        import datetime
-        curr_time = datetime.datetime.now()
-        time_str = datetime.datetime.strftime(curr_time,'%Y_%m_%d_%H_%M_%S')
-        self.log_dir    = log_dir
-        self.time_str   = time_str
-        self.save_path  = os.path.join(self.log_dir, "loss_" + str(self.time_str))
-        self.losses     = []
-        self.val_loss   = []
-        
-        os.makedirs(self.save_path)
-
-    def append_loss(self, loss, val_loss):
-        self.losses.append(loss)
-        self.val_loss.append(val_loss)
-        with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
-            f.write(str(loss))
-            f.write("\n")
-        with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
-            f.write(str(val_loss))
-            f.write("\n")
-        self.loss_plot()
-
-    def loss_plot(self):
-        iters = range(len(self.losses))
-
-        plt.figure()
-        plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
-        plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
-        try:
-            if len(self.losses) < 25:
-                num = 5
-            else:
-                num = 15
-            
-            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
-            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
-        except:
-            pass
-
-        plt.grid(True)
-        plt.xlabel('Epoch')
-        plt.ylabel('Loss')
-        plt.legend(loc="upper right")
-
-        plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
diff --git a/predict.py b/predict.py
index 65a8aa1c6651a9c8dd98a7baf3b2701cbc52ed95..15bd461186d0ba77c4c66fcb8fe212ebc5e354cf 100644
--- a/predict.py
+++ b/predict.py
@@ -1,8 +1,7 @@
-#----------------------------------------------------#
-#   对视频中的predict.py进行了修改，
-#   将单张图片预测、摄像头检测和FPS测试功能
+#-----------------------------------------------------------------------#
+#   predict.py将单张图片预测、摄像头检测、FPS测试和目录遍历检测等功能
 #   整合到了一个py文件中，通过指定mode进行模式的修改。
-#----------------------------------------------------#
+#-----------------------------------------------------------------------#
 import time
 
 import cv2
@@ -13,33 +12,44 @@ from yolo import YOLO
 
 if __name__ == "__main__":
     yolo = YOLO()
-    #-------------------------------------------------------------------------#
+    #----------------------------------------------------------------------------------------------------------#
     #   mode用于指定测试的模式：
-    #   'predict'表示单张图片预测
-    #   'video'表示视频检测
-    #   'fps'表示测试fps
-    #-------------------------------------------------------------------------#
+    #   'predict'表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
+    #   'video'表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
+    #   'fps'表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
+    #   'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
+    #----------------------------------------------------------------------------------------------------------#
     mode = "predict"
     #-------------------------------------------------------------------------#
     #   video_path用于指定视频的路径，当video_path=0时表示检测摄像头
     #   video_save_path表示视频保存的路径，当video_save_path=""时表示不保存
     #   video_fps用于保存的视频的fps
     #   video_path、video_save_path和video_fps仅在mode='video'时有效
-    #   保存视频时需要ctrl+c退出才会完成完整的保存步骤，不可直接结束程序。
+    #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
     #-------------------------------------------------------------------------#
     video_path      = 0
     video_save_path = ""
     video_fps       = 25.0
+    #-------------------------------------------------------------------------#
+    #   test_interval用于指定测量fps的时候，图片检测的次数
+    #   理论上test_interval越大，fps越准确。
+    #-------------------------------------------------------------------------#
+    test_interval   = 100
+    #-------------------------------------------------------------------------#
+    #   dir_origin_path指定了用于检测的图片的文件夹路径
+    #   dir_save_path指定了检测完图片的保存路径
+    #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
+    #-------------------------------------------------------------------------#
+    dir_origin_path = "img/"
+    dir_save_path   = "img_out/"
 
     if mode == "predict":
         '''
-        1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
-        具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
-        2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
-        3、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
-        4、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
+        1、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
+        2、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
+        3、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
         在原图上利用矩阵的方式进行截取。
-        5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
+        4、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
         比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
         '''
         while True:
@@ -54,11 +64,11 @@ if __name__ == "__main__":
                 r_image.show()
 
     elif mode == "video":
-        capture=cv2.VideoCapture(video_path)
+        capture = cv2.VideoCapture(video_path)
         if video_save_path!="":
-            fourcc = cv2.VideoWriter_fourcc(*'XVID')
-            size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
-            out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
+            fourcc  = cv2.VideoWriter_fourcc(*'XVID')
+            size    = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+            out     = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
 
         fps = 0.0
         while(True):
@@ -91,9 +101,23 @@ if __name__ == "__main__":
         cv2.destroyAllWindows()
 
     elif mode == "fps":
-        test_interval = 100
         img = Image.open('img/street.jpg')
         tact_time = yolo.get_FPS(img, test_interval)
         print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
+
+    elif mode == "dir_predict":
+        import os
+        from tqdm import tqdm
+
+        img_names = os.listdir(dir_origin_path)
+        for img_name in tqdm(img_names):
+            if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
+                image_path  = os.path.join(dir_origin_path, img_name)
+                image       = Image.open(image_path)
+                r_image     = yolo.detect_image(image)
+                if not os.path.exists(dir_save_path):
+                    os.makedirs(dir_save_path)
+                r_image.save(os.path.join(dir_save_path, img_name))
+                
     else:
-        raise AssertionError("Please specify the correct mode: 'predict', 'video' or 'fps'.")
+        raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
diff --git a/summary.py b/summary.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a36d41ea6d12b36d403e87d371fb0132c7dde4e
--- /dev/null
+++ b/summary.py
@@ -0,0 +1,13 @@
+#--------------------------------------------#
+#   该部分代码用于看网络结构
+#--------------------------------------------#
+import torch
+from torchsummary import summary
+
+from nets.yolo import YoloBody
+
+if __name__ == "__main__":
+    # 需要使用device来指定网络在GPU还是CPU运行
+    device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    m       = YoloBody([[6, 7, 8], [3, 4, 5], [0, 1, 2]], 80).to(device)
+    summary(m, input_size=(3, 416, 416))
diff --git a/train.py b/train.py
index 56a70e7e4b58ac5141a9108edfe8340f47e26669..3563d416cbf6c8c90bd73fa09aa436a5ddc26bb2 100644
--- a/train.py
+++ b/train.py
@@ -6,326 +6,209 @@ import torch
 import torch.backends.cudnn as cudnn
 import torch.optim as optim
 from torch.utils.data import DataLoader
-from tqdm import tqdm
 
-from nets.yolo4 import YoloBody
-from nets.yolo_training import LossHistory, YOLOLoss, weights_init
+from nets.yolo import YoloBody
+from nets.yolo_training import YOLOLoss, weights_init
+from utils.callbacks import LossHistory
 from utils.dataloader import YoloDataset, yolo_dataset_collate
+from utils.utils import get_anchors, get_classes
+from utils.utils_fit import fit_one_epoch
 
-
-#---------------------------------------------------#
-#   获得类和先验框
-#---------------------------------------------------#
-def get_classes(classes_path):
-    '''loads the classes'''
-    with open(classes_path) as f:
-        class_names = f.readlines()
-    class_names = [c.strip() for c in class_names]
-    return class_names
-
-def get_anchors(anchors_path):
-    '''loads the anchors from a file'''
-    with open(anchors_path) as f:
-        anchors = f.readline()
-    anchors = [float(x) for x in anchors.split(',')]
-    return np.array(anchors).reshape([-1,3,2])[::-1,:,:]
-
-def get_lr(optimizer):
-    for param_group in optimizer.param_groups:
-        return param_group['lr']
-
-        
-def fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
-    if Tensorboard:
-        global train_tensorboard_step, val_tensorboard_step
-    total_loss = 0
-    val_loss = 0
-
-    net.train()
-    print('Start Train')
-    with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
-        for iteration, batch in enumerate(gen):
-            if iteration >= epoch_size:
-                break
-            images, targets = batch[0], batch[1]
-            with torch.no_grad():
-                if cuda:
-                    images  = torch.from_numpy(images).type(torch.FloatTensor).cuda()
-                    targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
-                else:
-                    images  = torch.from_numpy(images).type(torch.FloatTensor)
-                    targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
-            #----------------------#
-            #   清零梯度
-            #----------------------#
-            optimizer.zero_grad()
-            #----------------------#
-            #   前向传播
-            #----------------------#
-            outputs = net(images)
-            losses = []
-            num_pos_all = 0
-            #----------------------#
-            #   计算损失
-            #----------------------#
-            for i in range(3):
-                loss_item, num_pos = yolo_loss(outputs[i], targets)
-                losses.append(loss_item)
-                num_pos_all += num_pos
-
-            loss = sum(losses) / num_pos_all
-            total_loss += loss.item()
-
-            #----------------------#
-            #   反向传播
-            #----------------------#
-            loss.backward()
-            optimizer.step()
-
-            if Tensorboard:
-                # 将loss写入tensorboard，每一步都写
-                writer.add_scalar('Train_loss', loss, train_tensorboard_step)
-                train_tensorboard_step += 1
-
-            pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), 
-                                'lr'        : get_lr(optimizer)})
-            pbar.update(1)
-
-    # 将loss写入tensorboard，下面注释的是每个世代保存一次
-    # if Tensorboard:
-    #     writer.add_scalar('Train_loss', total_loss/(iteration+1), epoch)
-    net.eval()
-    print('Start Validation')
-    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
-        for iteration, batch in enumerate(genval):
-            if iteration >= epoch_size_val:
-                break
-            images_val, targets_val = batch[0], batch[1]
-
-            with torch.no_grad():
-                if cuda:
-                    images_val  = torch.from_numpy(images_val).type(torch.FloatTensor).cuda()
-                    targets_val = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets_val]
-                else:
-                    images_val  = torch.from_numpy(images_val).type(torch.FloatTensor)
-                    targets_val = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets_val]
-                optimizer.zero_grad()
-
-                outputs = net(images_val)
-                losses = []
-                num_pos_all = 0
-                for i in range(3):
-                    loss_item, num_pos = yolo_loss(outputs[i], targets_val)
-                    losses.append(loss_item)
-                    num_pos_all += num_pos
-                loss = sum(losses) / num_pos_all
-                val_loss += loss.item()
-
-            # 将loss写入tensorboard, 下面注释的是每一步都写
-            # if Tensorboard:
-            #     writer.add_scalar('Val_loss', loss, val_tensorboard_step)
-            #     val_tensorboard_step += 1
-            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
-            pbar.update(1)
-            
-    # 将loss写入tensorboard，每个世代保存一次
-    if Tensorboard:
-        writer.add_scalar('Val_loss',val_loss / (epoch_size_val+1), epoch)
-    loss_history.append_loss(total_loss/(epoch_size+1), val_loss/(epoch_size_val+1))
-    print('Finish Validation')
-    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
-    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
-    print('Saving state, iter:', str(epoch+1))
-    torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
-
-#----------------------------------------------------#
-#   检测精度mAP和pr曲线计算参考视频
-#   https://www.bilibili.com/video/BV1zE411u7Vw
-#----------------------------------------------------#
 if __name__ == "__main__":
-    #-------------------------------#
-    #   是否使用Tensorboard
-    #-------------------------------#
-    Tensorboard = False
     #-------------------------------#
     #   是否使用Cuda
     #   没有GPU可以设置成False
     #-------------------------------#
     Cuda = True
+    #--------------------------------------------------------#
+    #   训练前一定要修改classes_path，使其对应自己的数据集
+    #--------------------------------------------------------#
+    classes_path    = 'model_data/voc_classes.txt'
+    #---------------------------------------------------------------------#
+    #   anchors_path代表先验框对应的txt文件，一般不修改。
+    #   anchors_mask用于帮助代码找到对应的先验框，一般不修改。
+    #---------------------------------------------------------------------#
+    anchors_path    = 'model_data/yolo_anchors.txt'
+    anchors_mask    = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+    #------------------------------------------------------------------------------------------------------#
+    #   权值文件请看README，百度网盘下载。数据的预训练权重对不同数据集是通用的，因为特征是通用的
+    #   预训练权重对于99%的情况都必须要用，不用的话权值太过随机，特征提取效果不明显，网络训练的结果也不会好。
+    #   训练自己的数据集时提示维度不匹配正常，预测的东西都不一样了自然维度不匹配
+    #   如果想要断点续练就将model_path设置成logs文件夹下已经训练的权值文件。 
+    #------------------------------------------------------------------------------------------------------#
+    model_path      = 'model_data/yolo4_weights.pth'
     #------------------------------------------------------#
-    #   是否对损失进行归一化，用于改变loss的大小
-    #   用于决定计算最终loss是除上batch_size还是除上正样本数量
+    #   输入的shape大小，一定要是32的倍数
     #------------------------------------------------------#
-    normalize = False
-    #-------------------------------#
-    #   输入的shape大小
-    #   显存比较小可以使用416x416
-    #   显存比较大可以使用608x608
-    #-------------------------------#
-    input_shape = (416,416)
-    #----------------------------------------------------#
-    #   classes和anchor的路径，非常重要
-    #   训练前一定要修改classes_path，使其对应自己的数据集
-    #----------------------------------------------------#
-    anchors_path = 'model_data/yolo_anchors.txt'
-    classes_path = 'model_data/voc_classes.txt'   
+    input_shape     = [416, 416]
     #------------------------------------------------------#
     #   Yolov4的tricks应用
     #   mosaic 马赛克数据增强 True or False 
     #   实际测试时mosaic数据增强并不稳定，所以默认为False
-    #   Cosine_scheduler 余弦退火学习率 True or False
+    #   Cosine_lr 余弦退火学习率 True or False
     #   label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
     #------------------------------------------------------#
-    mosaic = False
-    Cosine_lr = False
-    smoooth_label = 0
+    mosaic              = False
+    Cosine_lr           = False
+    label_smoothing     = 0
+
+    #----------------------------------------------------#
+    #   训练分为两个阶段，分别是冻结阶段和解冻阶段。
+    #   显存不足与数据集大小无关，提示显存不足请调小batch_size。
+    #   受到BatchNorm层影响，batch_size最小为2，不能为1。
+    #----------------------------------------------------#
+    #----------------------------------------------------#
+    #   冻结阶段训练参数
+    #   此时模型的主干被冻结了，特征提取网络不发生改变
+    #   占用的显存较小，仅对网络进行微调
+    #----------------------------------------------------#
+    Init_Epoch          = 0
+    Freeze_Epoch        = 50
+    Freeze_batch_size   = 8
+    Freeze_lr           = 1e-3
+    #----------------------------------------------------#
+    #   解冻阶段训练参数
+    #   此时模型的主干不被冻结了，特征提取网络会发生改变
+    #   占用的显存较大，网络所有的参数都会发生改变
+    #----------------------------------------------------#
+    UnFreeze_Epoch      = 100
+    Unfreeze_batch_size = 4
+    Unfreeze_lr         = 1e-4
+    #------------------------------------------------------#
+    #   是否进行冻结训练，默认先冻结主干训练后解冻训练。
+    #------------------------------------------------------#
+    Freeze_Train        = True
+    #------------------------------------------------------#
+    #   用于设置是否使用多线程读取数据
+    #   开启后会加快数据读取速度，但是会占用更多内存
+    #   内存较小的电脑可以设置为2或者0  
+    #------------------------------------------------------#
+    num_workers         = 4
+    #----------------------------------------------------#
+    #   获得图片路径和标签
+    #----------------------------------------------------#
+    train_annotation_path   = '2007_train.txt'
+    val_annotation_path     = '2007_val.txt'
 
     #----------------------------------------------------#
     #   获取classes和anchor
     #----------------------------------------------------#
-    class_names = get_classes(classes_path)
-    anchors = get_anchors(anchors_path)
-    num_classes = len(class_names)
+    class_names, num_classes = get_classes(classes_path)
+    anchors, num_anchors     = get_anchors(anchors_path)
 
     #------------------------------------------------------#
     #   创建yolo模型
-    #   训练前一定要修改classes_path和对应的txt文件
     #------------------------------------------------------#
-    model = YoloBody(len(anchors[0]), num_classes)
+    model = YoloBody(anchors_mask, num_classes)
     weights_init(model)
-
     #------------------------------------------------------#
     #   权值文件请看README，百度网盘下载
     #------------------------------------------------------#
-    model_path = "model_data/yolo4_weights.pth"
-    print('Loading weights into state dict...')
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model_dict = model.state_dict()
-    pretrained_dict = torch.load(model_path, map_location=device)
-    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
+    print('Load weights {}.'.format(model_path))
+    device          = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model_dict      = model.state_dict()
+    pretrained_dict = torch.load(model_path, map_location = device)
+    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
     model_dict.update(pretrained_dict)
     model.load_state_dict(model_dict)
-    print('Finished!')
-
-    net = model.train()
 
+    model_train = model.train()
     if Cuda:
-        net = torch.nn.DataParallel(model)
+        model_train = torch.nn.DataParallel(model)
         cudnn.benchmark = True
-        net = net.cuda()
+        model_train = model_train.cuda()
 
-    yolo_loss    = YOLOLoss(np.reshape(anchors,[-1,2]), num_classes, (input_shape[1], input_shape[0]), smoooth_label, Cuda, normalize)
+    yolo_loss    = YOLOLoss(anchors, num_classes, input_shape, Cuda, anchors_mask, label_smoothing)
     loss_history = LossHistory("logs/")
 
-    #----------------------------------------------------#
-    #   获得图片路径和标签
-    #----------------------------------------------------#
-    annotation_path = '2007_train.txt'
-    #----------------------------------------------------------------------#
-    #   验证集的划分在train.py代码里面进行
-    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
-    #   当前划分方式下，验证集和训练集的比例为1:9
-    #----------------------------------------------------------------------#
-    val_split = 0.1
-    with open(annotation_path) as f:
-        lines = f.readlines()
-    np.random.seed(10101)
-    np.random.shuffle(lines)
-    np.random.seed(None)
-    num_val = int(len(lines)*val_split)
-    num_train = len(lines) - num_val
-
-    if Tensorboard:
-        from tensorboardX import SummaryWriter
-        writer = SummaryWriter(log_dir='logs',flush_secs=60)
-        if Cuda:
-            graph_inputs = torch.randn(1,3,input_shape[0],input_shape[1]).type(torch.FloatTensor).cuda()
-        else:
-            graph_inputs = torch.randn(1,3,input_shape[0],input_shape[1]).type(torch.FloatTensor)
-        writer.add_graph(model, graph_inputs)
-        train_tensorboard_step  = 1
-        val_tensorboard_step    = 1
+    #---------------------------#
+    #   读取数据集对应的txt
+    #---------------------------#
+    with open(train_annotation_path) as f:
+        train_lines = f.readlines()
+    with open(val_annotation_path) as f:
+        val_lines   = f.readlines()
+    num_train   = len(train_lines)
+    num_val     = len(val_lines)
 
     #------------------------------------------------------#
     #   主干特征提取网络特征通用，冻结训练可以加快训练速度
     #   也可以在训练初期防止权值被破坏。
     #   Init_Epoch为起始世代
     #   Freeze_Epoch为冻结训练的世代
-    #   Epoch总训练世代
+    #   UnFreeze_Epoch总训练世代
     #   提示OOM或者显存不足请调小Batch_size
     #------------------------------------------------------#
     if True:
-        lr              = 1e-3
-        Batch_size      = 4
-        Init_Epoch      = 0
-        Freeze_Epoch    = 50
+        batch_size  = Freeze_batch_size
+        lr          = Freeze_lr
+        start_epoch = Init_Epoch
+        end_epoch   = Freeze_Epoch
         
-        #----------------------------------------------------------------------------#
-        #   我在实际测试时，发现optimizer的weight_decay起到了反作用，
-        #   所以去除掉了weight_decay，大家也可以开起来试试，一般是weight_decay=5e-4
-        #----------------------------------------------------------------------------#
-        optimizer       = optim.Adam(net.parameters(),lr)
+        optimizer       = optim.Adam(model_train.parameters(), lr, weight_decay = 5e-4)
         if Cosine_lr:
             lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
         else:
             lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92)
 
-        train_dataset   = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
-        val_dataset     = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
-        gen             = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
-                                drop_last=True, collate_fn=yolo_dataset_collate)
-        gen_val         = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
-                                drop_last=True, collate_fn=yolo_dataset_collate)
-
-        epoch_size      = num_train // Batch_size
-        epoch_size_val  = num_val // Batch_size
+        train_dataset   = YoloDataset(train_lines, input_shape, num_classes, mosaic=mosaic, train = True)
+        val_dataset     = YoloDataset(val_lines, input_shape, num_classes, mosaic=False, train = False)
+        gen             = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
+                                    drop_last=True, collate_fn=yolo_dataset_collate)
+        gen_val         = DataLoader(val_dataset  , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
+                                    drop_last=True, collate_fn=yolo_dataset_collate)
+                        
+        epoch_step      = num_train // batch_size
+        epoch_step_val  = num_val // batch_size
         
-        if epoch_size == 0 or epoch_size_val == 0:
+        if epoch_step == 0 or epoch_step_val == 0:
             raise ValueError("数据集过小，无法进行训练，请扩充数据集。")
+
         #------------------------------------#
         #   冻结一定部分训练
         #------------------------------------#
-        for param in model.backbone.parameters():
-            param.requires_grad = False
+        if Freeze_Train:
+            for param in model.backbone.parameters():
+                param.requires_grad = False
 
-        for epoch in range(Init_Epoch,Freeze_Epoch):
-            fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,gen_val,Freeze_Epoch,Cuda)
+        for epoch in range(start_epoch, end_epoch):
+            fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, 
+                    epoch_step, epoch_step_val, gen, gen_val, end_epoch, Cuda)
             lr_scheduler.step()
-
+            
     if True:
-        lr              = 1e-4
-        Batch_size      = 2
-        Freeze_Epoch    = 50
-        Unfreeze_Epoch  = 100
-
-        #----------------------------------------------------------------------------#
-        #   我在实际测试时，发现optimizer的weight_decay起到了反作用，
-        #   所以去除掉了weight_decay，大家也可以开起来试试，一般是weight_decay=5e-4
-        #----------------------------------------------------------------------------#
-        optimizer       = optim.Adam(net.parameters(),lr)
+        batch_size  = Unfreeze_batch_size
+        lr          = Unfreeze_lr
+        start_epoch = Freeze_Epoch
+        end_epoch   = UnFreeze_Epoch
+        
+        optimizer       = optim.Adam(model_train.parameters(), lr, weight_decay = 5e-4)
         if Cosine_lr:
             lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
         else:
             lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92)
 
-        train_dataset   = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), mosaic=mosaic, is_train=True)
-        val_dataset     = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), mosaic=False, is_train=False)
-        gen             = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
-                                drop_last=True, collate_fn=yolo_dataset_collate)
-        gen_val         = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
-                                drop_last=True, collate_fn=yolo_dataset_collate)
-
-        epoch_size      = num_train // Batch_size
-        epoch_size_val  = num_val // Batch_size
+        train_dataset   = YoloDataset(train_lines, input_shape, num_classes, mosaic=mosaic, train = True)
+        val_dataset     = YoloDataset(val_lines, input_shape, num_classes, mosaic=False, train = False)
+        gen             = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
+                                    drop_last=True, collate_fn=yolo_dataset_collate)
+        gen_val         = DataLoader(val_dataset  , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True, 
+                                    drop_last=True, collate_fn=yolo_dataset_collate)
+                        
+        epoch_step      = num_train // batch_size
+        epoch_step_val  = num_val // batch_size
         
-        if epoch_size == 0 or epoch_size_val == 0:
+        if epoch_step == 0 or epoch_step_val == 0:
             raise ValueError("数据集过小，无法进行训练，请扩充数据集。")
+
         #------------------------------------#
-        #   解冻后训练
+        #   冻结一定部分训练
         #------------------------------------#
-        for param in model.backbone.parameters():
-            param.requires_grad = True
-
-        for epoch in range(Freeze_Epoch,Unfreeze_Epoch):
-            fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,gen_val,Unfreeze_Epoch,Cuda)
-            lr_scheduler.step()
+        if Freeze_Train:
+            for param in model.backbone.parameters():
+                param.requires_grad = False
+
+        for epoch in range(start_epoch, end_epoch):
+            fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, 
+                    epoch_step, epoch_step_val, gen, gen_val, end_epoch, Cuda)
+            lr_scheduler.step()
\ No newline at end of file
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4287ca8617970fa8fc025b75cb319c7032706910
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1 @@
+#
\ No newline at end of file
diff --git a/utils/callbacks.py b/utils/callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..0103c42b01c60563d323e0020b5478240443b4d3
--- /dev/null
+++ b/utils/callbacks.py
@@ -0,0 +1,56 @@
+import os
+
+import scipy.signal
+from matplotlib import pyplot as plt
+
+
+class LossHistory():
+    def __init__(self, log_dir):
+        import datetime
+        curr_time = datetime.datetime.now()
+        time_str = datetime.datetime.strftime(curr_time,'%Y_%m_%d_%H_%M_%S')
+        self.log_dir    = log_dir
+        self.time_str   = time_str
+        self.save_path  = os.path.join(self.log_dir, "loss_" + str(self.time_str))
+        self.losses     = []
+        self.val_loss   = []
+        
+        os.makedirs(self.save_path)
+
+    def append_loss(self, loss, val_loss):
+        self.losses.append(loss)
+        self.val_loss.append(val_loss)
+        with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
+            f.write(str(loss))
+            f.write("\n")
+        with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
+            f.write(str(val_loss))
+            f.write("\n")
+        self.loss_plot()
+
+    def loss_plot(self):
+        iters = range(len(self.losses))
+
+        plt.figure()
+        plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
+        plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
+        try:
+            if len(self.losses) < 25:
+                num = 5
+            else:
+                num = 15
+            
+            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
+            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
+        except:
+            pass
+
+        plt.grid(True)
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend(loc="upper right")
+
+        plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
+
+        plt.cla()
+        plt.close("all")
diff --git a/utils/dataloader.py b/utils/dataloader.py
index 6ea068d20d9022dcaf9ce415d428b08bdbc691f5..71bb94f178a45d543300f53b10875da9f97f84e3 100644
--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -1,35 +1,71 @@
+from random import sample, shuffle
+
 import cv2
 import numpy as np
 from PIL import Image
 from torch.utils.data.dataset import Dataset
 
-from utils.utils import merge_bboxes
+from utils.utils import cvtColor, preprocess_input
 
 
 class YoloDataset(Dataset):
-    def __init__(self, train_lines, image_size, mosaic=True, is_train=True):
+    def __init__(self, annotation_lines, input_shape, num_classes, mosaic, train):
         super(YoloDataset, self).__init__()
-
-        self.train_lines = train_lines
-        self.train_batches = len(train_lines)
-        self.image_size = image_size
-        self.mosaic = mosaic
-        self.flag = True
-        self.is_train = is_train
+        self.annotation_lines   = annotation_lines
+        self.input_shape        = input_shape
+        self.num_classes        = num_classes
+        self.length             = len(self.annotation_lines)
+        self.mosaic             = mosaic
+        self.train              = train
 
     def __len__(self):
-        return self.train_batches
+        return self.length
+
+    def __getitem__(self, index):
+        index       = index % self.length
+        #---------------------------------------------------#
+        #   训练时进行数据的随机增强
+        #   验证时不进行数据的随机增强
+        #---------------------------------------------------#
+        if self.mosaic:
+            if self.rand() < 0.5:
+                lines = sample(self.annotation_lines, 3)
+                lines.append(self.annotation_lines[index])
+                shuffle(lines)
+                image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)
+            else:
+                image, box  = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
+        else:
+            image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
+        image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
+        box         = np.array(box, dtype=np.float32)
+        if len(box) != 0:
+            box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
+            box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
+
+            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
+            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
+        return image, box
 
     def rand(self, a=0, b=1):
-        return np.random.rand() * (b - a) + a
+        return np.random.rand()*(b-a) + a
 
     def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
-        """实时数据增强的随机预处理"""
-        line = annotation_line.split()
-        image = Image.open(line[0])
-        iw, ih = image.size
-        h, w = input_shape
-        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
+        line    = annotation_line.split()
+        #------------------------------#
+        #   读取图像并转换成RGB图像
+        #------------------------------#
+        image   = Image.open(line[0])
+        image   = cvtColor(image)
+        #------------------------------#
+        #   获得图像的高宽与目标高宽
+        #------------------------------#
+        iw, ih  = image.size
+        h, w    = input_shape
+        #------------------------------#
+        #   获得预测框
+        #------------------------------#
+        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
 
         if not random:
             scale = min(w/iw, h/ih)
@@ -38,56 +74,64 @@ class YoloDataset(Dataset):
             dx = (w-nw)//2
             dy = (h-nh)//2
 
-            image = image.resize((nw,nh), Image.BICUBIC)
-            new_image = Image.new('RGB', (w,h), (128,128,128))
+            #---------------------------------#
+            #   将图像多余的部分加上灰条
+            #---------------------------------#
+            image       = image.resize((nw,nh), Image.BICUBIC)
+            new_image   = Image.new('RGB', (w,h), (128,128,128))
             new_image.paste(image, (dx, dy))
-            image_data = np.array(new_image, np.float32)
+            image_data  = np.array(new_image, np.float32)
 
-            # 调整目标框坐标
-            box_data = np.zeros((len(box), 5))
-            if len(box) > 0:
+            #---------------------------------#
+            #   对真实框进行调整
+            #---------------------------------#
+            if len(box)>0:
                 np.random.shuffle(box)
-                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
-                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
-                box[:, 0:2][box[:, 0:2] < 0] = 0
-                box[:, 2][box[:, 2] > w] = w
-                box[:, 3][box[:, 3] > h] = h
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
                 box_w = box[:, 2] - box[:, 0]
                 box_h = box[:, 3] - box[:, 1]
-                box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
-                box_data = np.zeros((len(box), 5))
-                box_data[:len(box)] = box
-
-            return image_data, box_data
-
-        # 调整图片大小
-        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
+                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
+
+            return image_data, box
+                
+        #------------------------------------------#
+        #   对图像进行缩放并且进行长和宽的扭曲
+        #------------------------------------------#
+        new_ar = w/h * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
         scale = self.rand(.25, 2)
         if new_ar < 1:
-            nh = int(scale * h)
-            nw = int(nh * new_ar)
+            nh = int(scale*h)
+            nw = int(nh*new_ar)
         else:
-            nw = int(scale * w)
-            nh = int(nw / new_ar)
-        image = image.resize((nw, nh), Image.BICUBIC)
-
-        # 放置图片
-        dx = int(self.rand(0, w - nw))
-        dy = int(self.rand(0, h - nh))
-        new_image = Image.new('RGB', (w, h),
-                              (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
+            nw = int(scale*w)
+            nh = int(nw/new_ar)
+        image = image.resize((nw,nh), Image.BICUBIC)
+
+        #------------------------------------------#
+        #   将图像多余的部分加上灰条
+        #------------------------------------------#
+        dx = int(self.rand(0, w-nw))
+        dy = int(self.rand(0, h-nh))
+        new_image = Image.new('RGB', (w,h), (128,128,128))
         new_image.paste(image, (dx, dy))
         image = new_image
 
-        # 是否翻转图片
-        flip = self.rand() < .5
-        if flip:
-            image = image.transpose(Image.FLIP_LEFT_RIGHT)
+        #------------------------------------------#
+        #   翻转图像
+        #------------------------------------------#
+        flip = self.rand()<.5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
 
-        # 色域变换
+        #------------------------------------------#
+        #   色域扭曲
+        #------------------------------------------#
         hue = self.rand(-hue, hue)
-        sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
-        val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
+        sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
+        val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)
         x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
         x[..., 0] += hue*360
         x[..., 0][x[..., 0]>1] -= 1
@@ -99,112 +143,134 @@ class YoloDataset(Dataset):
         x[x<0] = 0
         image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
 
-        # 调整目标框坐标
-        box_data = np.zeros((len(box), 5))
-        if len(box) > 0:
+        #---------------------------------#
+        #   对真实框进行调整
+        #---------------------------------#
+        if len(box)>0:
             np.random.shuffle(box)
-            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
-            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
-            if flip:
-                box[:, [0, 2]] = w - box[:, [2, 0]]
-            box[:, 0:2][box[:, 0:2] < 0] = 0
-            box[:, 2][box[:, 2] > w] = w
-            box[:, 3][box[:, 3] > h] = h
+            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+            if flip: box[:, [0,2]] = w - box[:, [2,0]]
+            box[:, 0:2][box[:, 0:2]<0] = 0
+            box[:, 2][box[:, 2]>w] = w
+            box[:, 3][box[:, 3]>h] = h
             box_w = box[:, 2] - box[:, 0]
             box_h = box[:, 3] - box[:, 1]
-            box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
-            box_data = np.zeros((len(box), 5))
-            box_data[:len(box)] = box
-
-        return image_data, box_data
-
-    def get_random_data_with_Mosaic(self, annotation_line, input_shape, hue=.1, sat=1.5, val=1.5):
+            box = box[np.logical_and(box_w>1, box_h>1)] 
+        
+        return image_data, box
+    
+    def merge_bboxes(self, bboxes, cutx, cuty):
+        merge_bbox = []
+        for i in range(len(bboxes)):
+            for box in bboxes[i]:
+                tmp_box = []
+                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
+
+                if i == 0:
+                    if y1 > cuty or x1 > cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y2 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x2 = cutx
+
+                if i == 1:
+                    if y2 < cuty or x1 > cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y1 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x2 = cutx
+
+                if i == 2:
+                    if y2 < cuty or x2 < cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y1 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x1 = cutx
+
+                if i == 3:
+                    if y1 > cuty or x2 < cutx:
+                        continue
+                    if y2 >= cuty and y1 <= cuty:
+                        y2 = cuty
+                    if x2 >= cutx and x1 <= cutx:
+                        x1 = cutx
+                tmp_box.append(x1)
+                tmp_box.append(y1)
+                tmp_box.append(x2)
+                tmp_box.append(y2)
+                tmp_box.append(box[-1])
+                merge_bbox.append(tmp_box)
+        return merge_bbox
+
+    def get_random_data_with_Mosaic(self, annotation_line, input_shape, max_boxes=100, hue=.1, sat=1.5, val=1.5):
         h, w = input_shape
-        min_offset_x = 0.3
-        min_offset_y = 0.3
-        scale_low = 1 - min(min_offset_x, min_offset_y)
-        scale_high = scale_low + 0.2
-
-        image_datas = []
-        box_datas = []
-        index = 0
-
-        place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]
-        place_y = [0, int(h * min_offset_y), int(h * min_offset_y), 0]
+        min_offset_x = self.rand(0.25, 0.75)
+        min_offset_y = self.rand(0.25, 0.75)
+
+        nws     = [ int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1)), int(w * self.rand(0.4, 1))]
+        nhs     = [ int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1)), int(h * self.rand(0.4, 1))]
+        
+        place_x = [int(w*min_offset_x) - nws[0], int(w*min_offset_x) - nws[1], int(w*min_offset_x), int(w*min_offset_x)]
+        place_y = [int(h*min_offset_y) - nhs[0], int(h*min_offset_y), int(h*min_offset_y), int(h*min_offset_y) - nhs[3]]
+
+        image_datas = [] 
+        box_datas   = []
+        index       = 0
         for line in annotation_line:
             # 每一行进行分割
             line_content = line.split()
             # 打开图片
             image = Image.open(line_content[0])
-            image = image.convert("RGB")
+            image = cvtColor(image)
+            
             # 图片的大小
             iw, ih = image.size
             # 保存框的位置
-            box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])
-
+            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
+            
             # 是否翻转图片
-            flip = self.rand() < .5
-            if flip and len(box) > 0:
+            flip = self.rand()<.5
+            if flip and len(box)>0:
                 image = image.transpose(Image.FLIP_LEFT_RIGHT)
-                box[:, [0, 2]] = iw - box[:, [2, 0]]
-
-            # 对输入进来的图片进行缩放
-            new_ar = w / h
-            scale = self.rand(scale_low, scale_high)
-            if new_ar < 1:
-                nh = int(scale * h)
-                nw = int(nh * new_ar)
-            else:
-                nw = int(scale * w)
-                nh = int(nw / new_ar)
-            image = image.resize((nw, nh), Image.BICUBIC)
-
-            # 进行色域变换
-            hue = self.rand(-hue, hue)
-            sat = self.rand(1, sat) if self.rand() < .5 else 1 / self.rand(1, sat)
-            val = self.rand(1, val) if self.rand() < .5 else 1 / self.rand(1, val)
-            x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
-            x[..., 0] += hue*360
-            x[..., 0][x[..., 0]>1] -= 1
-            x[..., 0][x[..., 0]<0] += 1
-            x[..., 1] *= sat
-            x[..., 2] *= val
-            x[x[:,:, 0]>360, 0] = 360
-            x[:, :, 1:][x[:, :, 1:]>1] = 1
-            x[x<0] = 0
-            image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) # numpy array, 0 to 1
-
-            image = Image.fromarray((image * 255).astype(np.uint8))
+                box[:, [0,2]] = iw - box[:, [2,0]]
+
+            nw = nws[index] 
+            nh = nhs[index] 
+            image = image.resize((nw,nh), Image.BICUBIC)
+
             # 将图片进行放置，分别对应四张分割图片的位置
             dx = place_x[index]
             dy = place_y[index]
-            new_image = Image.new('RGB', (w, h),
-                                  (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
+            new_image = Image.new('RGB', (w,h), (128,128,128))
             new_image.paste(image, (dx, dy))
             image_data = np.array(new_image)
 
             index = index + 1
             box_data = []
             # 对box进行重新处理
-            if len(box) > 0:
+            if len(box)>0:
                 np.random.shuffle(box)
-                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
-                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
-                box[:, 0:2][box[:, 0:2] < 0] = 0
-                box[:, 2][box[:, 2] > w] = w
-                box[:, 3][box[:, 3] > h] = h
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
                 box_w = box[:, 2] - box[:, 0]
                 box_h = box[:, 3] - box[:, 1]
-                box = box[np.logical_and(box_w > 1, box_h > 1)]
-                box_data = np.zeros((len(box), 5))
+                box = box[np.logical_and(box_w>1, box_h>1)]
+                box_data = np.zeros((len(box),5))
                 box_data[:len(box)] = box
-
+            
             image_datas.append(image_data)
             box_datas.append(box_data)
 
         # 将图片分割，放在一起
-        cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))
-        cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))
+        cutx = int(w * min_offset_x)
+        cuty = int(h * min_offset_y)
 
         new_image = np.zeros([h, w, 3])
         new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
@@ -212,47 +278,26 @@ class YoloDataset(Dataset):
         new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
         new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
 
+        # 进行色域变换
+        hue = self.rand(-hue, hue)
+        sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
+        val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)
+        x = cv2.cvtColor(np.array(new_image/255,np.float32), cv2.COLOR_RGB2HSV)
+        x[..., 0] += hue*360
+        x[..., 0][x[..., 0]>1] -= 1
+        x[..., 0][x[..., 0]<0] += 1
+        x[..., 1] *= sat
+        x[..., 2] *= val
+        x[x[:, :, 0]>360, 0] = 360
+        x[:, :, 1:][x[:, :, 1:]>1] = 1
+        x[x<0] = 0
+        new_image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
+
         # 对框进行进一步的处理
-        new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))
+        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
 
         return new_image, new_boxes
 
-    def __getitem__(self, index):
-        lines = self.train_lines
-        n = self.train_batches
-        index = index % n
-        if self.mosaic:
-            if self.flag and (index + 4) < n:
-                img, y = self.get_random_data_with_Mosaic(lines[index:index + 4], self.image_size[0:2])
-            else:
-                img, y = self.get_random_data(lines[index], self.image_size[0:2], random=self.is_train)
-            self.flag = bool(1-self.flag)
-        else:
-            img, y = self.get_random_data(lines[index], self.image_size[0:2], random=self.is_train)
-
-        if len(y) != 0:
-            # 从坐标转换成0~1的百分比
-            boxes = np.array(y[:, :4], dtype=np.float32)
-            boxes[:, 0] = boxes[:, 0] / self.image_size[1]
-            boxes[:, 1] = boxes[:, 1] / self.image_size[0]
-            boxes[:, 2] = boxes[:, 2] / self.image_size[1]
-            boxes[:, 3] = boxes[:, 3] / self.image_size[0]
-
-            boxes = np.maximum(np.minimum(boxes, 1), 0)
-            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
-            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
-
-            boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2
-            boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2
-            y = np.concatenate([boxes, y[:, -1:]], axis=-1)
-
-        img = np.array(img, dtype=np.float32)
-
-        tmp_inp = np.transpose(img / 255.0, (2, 0, 1))
-        tmp_targets = np.array(y, dtype=np.float32)
-        return tmp_inp, tmp_targets
-
-
 # DataLoader中collate_fn使用
 def yolo_dataset_collate(batch):
     images = []
@@ -261,5 +306,4 @@ def yolo_dataset_collate(batch):
         images.append(img)
         bboxes.append(box)
     images = np.array(images)
-    return images, bboxes
-
+    return images, bboxes
\ No newline at end of file
diff --git a/utils/utils.py b/utils/utils.py
index f842779534926ef58544c63a5a554b7157362e71..4c122c21174e44e9287393d2fda95500ba271783 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,374 +1,62 @@
-from __future__ import division
-
 import numpy as np
-import torch
-import torch.nn as nn
 from PIL import Image
-from torchvision.ops import nms
-
-
-class DecodeBox(nn.Module):
-    def __init__(self, anchors, num_classes, img_size):
-        super(DecodeBox, self).__init__()
-        #-----------------------------------------------------------#
-        #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
-        #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
-        #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
-        #-----------------------------------------------------------#
-        self.anchors = anchors
-        self.num_anchors = len(anchors)
-        self.num_classes = num_classes
-        self.bbox_attrs = 5 + num_classes
-        self.img_size = img_size
-
-    def forward(self, input):
-        #-----------------------------------------------#
-        #   输入的input一共有三个，他们的shape分别是
-        #   batch_size, 255, 13, 13
-        #   batch_size, 255, 26, 26
-        #   batch_size, 255, 52, 52
-        #-----------------------------------------------#
-        batch_size = input.size(0)
-        input_height = input.size(2)
-        input_width = input.size(3)
-
-        #-----------------------------------------------#
-        #   输入为416x416时
-        #   stride_h = stride_w = 32、16、8
-        #-----------------------------------------------#
-        stride_h = self.img_size[1] / input_height
-        stride_w = self.img_size[0] / input_width
-        #-------------------------------------------------#
-        #   此时获得的scaled_anchors大小是相对于特征层的
-        #-------------------------------------------------#
-        scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors]
-
-        #-----------------------------------------------#
-        #   输入的input一共有三个，他们的shape分别是
-        #   batch_size, 3, 13, 13, 85
-        #   batch_size, 3, 26, 26, 85
-        #   batch_size, 3, 52, 52, 85
-        #-----------------------------------------------#
-        prediction = input.view(batch_size, self.num_anchors,
-                                self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
-
-        # 先验框的中心位置的调整参数
-        x = torch.sigmoid(prediction[..., 0])  
-        y = torch.sigmoid(prediction[..., 1])
-        # 先验框的宽高调整参数
-        w = prediction[..., 2]
-        h = prediction[..., 3]
-        # 获得置信度，是否有物体
-        conf = torch.sigmoid(prediction[..., 4])
-        # 种类置信度
-        pred_cls = torch.sigmoid(prediction[..., 5:])
-
-        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
-        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
-
-        #----------------------------------------------------------#
-        #   生成网格，先验框中心，网格左上角 
-        #   batch_size,3,13,13
-        #----------------------------------------------------------#
-        grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
-            batch_size * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
-        grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
-            batch_size * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
-
-        #----------------------------------------------------------#
-        #   按照网格格式生成先验框的宽高
-        #   batch_size,3,13,13
-        #----------------------------------------------------------#
-        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
-        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
-        anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
-        anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
-
-        #----------------------------------------------------------#
-        #   利用预测结果对先验框进行调整
-        #   首先调整先验框的中心，从先验框中心向右下角偏移
-        #   再调整先验框的宽高。
-        #----------------------------------------------------------#
-        pred_boxes = FloatTensor(prediction[..., :4].shape)
-        pred_boxes[..., 0] = x.data + grid_x
-        pred_boxes[..., 1] = y.data + grid_y
-        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
-        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
-
-        # fig = plt.figure()
-        # ax = fig.add_subplot(121)
-        # if input_height==13:
-        #     plt.ylim(0,13)
-        #     plt.xlim(0,13)
-        # elif input_height==26:
-        #     plt.ylim(0,26)
-        #     plt.xlim(0,26)
-        # elif input_height==52:
-        #     plt.ylim(0,52)
-        #     plt.xlim(0,52)
-        # plt.scatter(grid_x.cpu(),grid_y.cpu())
-
-        # anchor_left = grid_x - anchor_w/2 
-        # anchor_top = grid_y - anchor_h/2 
-
-        # rect1 = plt.Rectangle([anchor_left[0,0,5,5],anchor_top[0,0,5,5]],anchor_w[0,0,5,5],anchor_h[0,0,5,5],color="r",fill=False)
-        # rect2 = plt.Rectangle([anchor_left[0,1,5,5],anchor_top[0,1,5,5]],anchor_w[0,1,5,5],anchor_h[0,1,5,5],color="r",fill=False)
-        # rect3 = plt.Rectangle([anchor_left[0,2,5,5],anchor_top[0,2,5,5]],anchor_w[0,2,5,5],anchor_h[0,2,5,5],color="r",fill=False)
-
-        # ax.add_patch(rect1)
-        # ax.add_patch(rect2)
-        # ax.add_patch(rect3)
-
-        # ax = fig.add_subplot(122)
-        # if input_height==13:
-        #     plt.ylim(0,13)
-        #     plt.xlim(0,13)
-        # elif input_height==26:
-        #     plt.ylim(0,26)
-        #     plt.xlim(0,26)
-        # elif input_height==52:
-        #     plt.ylim(0,52)
-        #     plt.xlim(0,52)
-        # plt.scatter(grid_x.cpu(),grid_y.cpu())
-        # plt.scatter(pred_boxes[0,:,5,5,0].cpu(),pred_boxes[0,:,5,5,1].cpu(),c='r')
-
-        # pre_left = pred_boxes[...,0] - pred_boxes[...,2]/2 
-        # pre_top = pred_boxes[...,1] - pred_boxes[...,3]/2 
-
-        # rect1 = plt.Rectangle([pre_left[0,0,5,5],pre_top[0,0,5,5]],pred_boxes[0,0,5,5,2],pred_boxes[0,0,5,5,3],color="r",fill=False)
-        # rect2 = plt.Rectangle([pre_left[0,1,5,5],pre_top[0,1,5,5]],pred_boxes[0,1,5,5,2],pred_boxes[0,1,5,5,3],color="r",fill=False)
-        # rect3 = plt.Rectangle([pre_left[0,2,5,5],pre_top[0,2,5,5]],pred_boxes[0,2,5,5,2],pred_boxes[0,2,5,5,3],color="r",fill=False)
-
-        # ax.add_patch(rect1)
-        # ax.add_patch(rect2)
-        # ax.add_patch(rect3)
-
-        # plt.show()
-
-        #----------------------------------------------------------#
-        #   将输出结果调整成相对于输入图像大小
-        #----------------------------------------------------------#
-        _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor)
-        output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale,
-                            conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
-        return output.data
-        
-def letterbox_image(image, size):
-    iw, ih = image.size
-    w, h = size
-    scale = min(w/iw, h/ih)
-    nw = int(iw*scale)
-    nh = int(ih*scale)
-
-    image = image.resize((nw,nh), Image.BICUBIC)
-    new_image = Image.new('RGB', size, (128,128,128))
-    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
-    return new_image
-
-def yolo_correct_boxes(top, left, bottom, right, input_shape, image_shape):
-    new_shape = image_shape*np.min(input_shape/image_shape)
-
-    offset = (input_shape-new_shape)/2./input_shape
-    scale = input_shape/new_shape
-
-    box_yx = np.concatenate(((top+bottom)/2,(left+right)/2),axis=-1)/input_shape
-    box_hw = np.concatenate((bottom-top,right-left),axis=-1)/input_shape
-
-    box_yx = (box_yx - offset) * scale
-    box_hw *= scale
 
-    box_mins = box_yx - (box_hw / 2.)
-    box_maxes = box_yx + (box_hw / 2.)
-    boxes =  np.concatenate([
-        box_mins[:, 0:1],
-        box_mins[:, 1:2],
-        box_maxes[:, 0:1],
-        box_maxes[:, 1:2]
-    ],axis=-1)
-    boxes *= np.concatenate([image_shape, image_shape],axis=-1)
-    return boxes
-
-def bbox_iou(box1, box2, x1y1x2y2=True):
-    """
-        计算IOU
-    """
-    if not x1y1x2y2:
-        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
-        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
-        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
-        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
+#---------------------------------------------------------#
+#   将图像转换成RGB图像，防止灰度图在预测时报错。
+#   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+#---------------------------------------------------------#
+def cvtColor(image):
+    if len(np.shape(image)) == 3 and np.shape(image)[-2] == 3:
+        return image 
     else:
-        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
-        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
-
-    inter_rect_x1 = torch.max(b1_x1, b2_x1)
-    inter_rect_y1 = torch.max(b1_y1, b2_y1)
-    inter_rect_x2 = torch.min(b1_x2, b2_x2)
-    inter_rect_y2 = torch.min(b1_y2, b2_y2)
-
-    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * \
-                 torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)
-                 
-    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
-    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
-
-    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
-
-    return iou
-
-
-def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
-    #----------------------------------------------------------#
-    #   将预测结果的格式转换成左上角右下角的格式。
-    #   prediction  [batch_size, num_anchors, 85]
-    #----------------------------------------------------------#
-    box_corner = prediction.new(prediction.shape)
-    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
-    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
-    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
-    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
-    prediction[:, :, :4] = box_corner[:, :, :4]
-
-    output = [None for _ in range(len(prediction))]
-    for image_i, image_pred in enumerate(prediction):
-        #----------------------------------------------------------#
-        #   对种类预测部分取max。
-        #   class_conf  [num_anchors, 1]    种类置信度
-        #   class_pred  [num_anchors, 1]    种类
-        #----------------------------------------------------------#
-        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
-
-        #----------------------------------------------------------#
-        #   利用置信度进行第一轮筛选
-        #----------------------------------------------------------#
-        conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
-
-        #----------------------------------------------------------#
-        #   根据置信度进行预测结果的筛选
-        #----------------------------------------------------------#
-        image_pred = image_pred[conf_mask]
-        class_conf = class_conf[conf_mask]
-        class_pred = class_pred[conf_mask]
-        if not image_pred.size(0):
-            continue
-        #-------------------------------------------------------------------------#
-        #   detections  [num_anchors, 7]
-        #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
-        #-------------------------------------------------------------------------#
-        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
-
-        #------------------------------------------#
-        #   获得预测结果中包含的所有种类
-        #------------------------------------------#
-        unique_labels = detections[:, -1].cpu().unique()
-
-        if prediction.is_cuda:
-            unique_labels = unique_labels.cuda()
-            detections = detections.cuda()
-
-        for c in unique_labels:
-            #------------------------------------------#
-            #   获得某一类得分筛选后全部的预测结果
-            #------------------------------------------#
-            detections_class = detections[detections[:, -1] == c]
-
-            #------------------------------------------#
-            #   使用官方自带的非极大抑制会速度更快一些！
-            #------------------------------------------#
-            keep = nms(
-                detections_class[:, :4],
-                detections_class[:, 4] * detections_class[:, 5],
-                nms_thres
-            )
-            max_detections = detections_class[keep]
-            
-            # # 按照存在物体的置信度排序
-            # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
-            # detections_class = detections_class[conf_sort_index]
-            # # 进行非极大抑制
-            # max_detections = []
-            # while detections_class.size(0):
-            #     # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉
-            #     max_detections.append(detections_class[0].unsqueeze(0))
-            #     if len(detections_class) == 1:
-            #         break
-            #     ious = bbox_iou(max_detections[-1], detections_class[1:])
-            #     detections_class = detections_class[1:][ious < nms_thres]
-            # # 堆叠
-            # max_detections = torch.cat(max_detections).data
-            
-            # Add max detections to outputs
-            output[image_i] = max_detections if output[image_i] is None else torch.cat(
-                (output[image_i], max_detections))
-
-    return output
-
-
-def merge_bboxes(bboxes, cutx, cuty):
-    merge_bbox = []
-    for i in range(len(bboxes)):
-        for box in bboxes[i]:
-            tmp_box = []
-            x1,y1,x2,y2 = box[0], box[1], box[2], box[3]
-
-            if i == 0:
-                if y1 > cuty or x1 > cutx:
-                    continue
-                if y2 >= cuty and y1 <= cuty:
-                    y2 = cuty
-                    if y2-y1 < 5:
-                        continue
-                if x2 >= cutx and x1 <= cutx:
-                    x2 = cutx
-                    if x2-x1 < 5:
-                        continue
-                
-            if i == 1:
-                if y2 < cuty or x1 > cutx:
-                    continue
-
-                if y2 >= cuty and y1 <= cuty:
-                    y1 = cuty
-                    if y2-y1 < 5:
-                        continue
-                
-                if x2 >= cutx and x1 <= cutx:
-                    x2 = cutx
-                    if x2-x1 < 5:
-                        continue
-
-            if i == 2:
-                if y2 < cuty or x2 < cutx:
-                    continue
-
-                if y2 >= cuty and y1 <= cuty:
-                    y1 = cuty
-                    if y2-y1 < 5:
-                        continue
-
-                if x2 >= cutx and x1 <= cutx:
-                    x1 = cutx
-                    if x2-x1 < 5:
-                        continue
-
-            if i == 3:
-                if y1 > cuty or x2 < cutx:
-                    continue
-
-                if y2 >= cuty and y1 <= cuty:
-                    y2 = cuty
-                    if y2-y1 < 5:
-                        continue
-
-                if x2 >= cutx and x1 <= cutx:
-                    x1 = cutx
-                    if x2-x1 < 5:
-                        continue
+        image = image.convert('RGB')
+        return image 
+
+#---------------------------------------------------#
+#   对输入图像进行resize
+#---------------------------------------------------#
+def resize_image(image, size, letterbox_image):
+    iw, ih  = image.size
+    w, h    = size
+    if letterbox_image:
+        scale   = min(w/iw, h/ih)
+        nw      = int(iw*scale)
+        nh      = int(ih*scale)
+
+        image   = image.resize((nw,nh), Image.BICUBIC)
+        new_image = Image.new('RGB', size, (128,128,128))
+        new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+    else:
+        new_image = image.resize((w, h), Image.BICUBIC)
+    return new_image
 
-            tmp_box.append(x1)
-            tmp_box.append(y1)
-            tmp_box.append(x2)
-            tmp_box.append(y2)
-            tmp_box.append(box[-1])
-            merge_bbox.append(tmp_box)
-    return merge_bbox
+#---------------------------------------------------#
+#   获得类
+#---------------------------------------------------#
+def get_classes(classes_path):
+    with open(classes_path, encoding='utf-8') as f:
+        class_names = f.readlines()
+    class_names = [c.strip() for c in class_names]
+    return class_names, len(class_names)
+
+#---------------------------------------------------#
+#   获得先验框
+#---------------------------------------------------#
+def get_anchors(anchors_path):
+    '''loads the anchors from a file'''
+    with open(anchors_path, encoding='utf-8') as f:
+        anchors = f.readline()
+    anchors = [float(x) for x in anchors.split(',')]
+    anchors = np.array(anchors).reshape(-1, 2)
+    return anchors, len(anchors)
+
+#---------------------------------------------------#
+#   获得学习率
+#---------------------------------------------------#
+def get_lr(optimizer):
+    for param_group in optimizer.param_groups:
+        return param_group['lr']
+
+def preprocess_input(image):
+    image /= 255.0
+    return image
\ No newline at end of file
diff --git a/utils/utils_bbox.py b/utils/utils_bbox.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5549c848dc324df1a7bfbeeb68526b9585ea586
--- /dev/null
+++ b/utils/utils_bbox.py
@@ -0,0 +1,227 @@
+import torch
+import torch.nn as nn
+from torchvision.ops import nms
+import numpy as np
+
+class DecodeBox():
+    def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]):
+        super(DecodeBox, self).__init__()
+        self.anchors        = anchors
+        self.num_classes    = num_classes
+        self.bbox_attrs     = 5 + num_classes
+        self.input_shape    = input_shape
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
+        #   26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
+        #   52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
+        #-----------------------------------------------------------#
+        self.anchors_mask   = anchors_mask
+
+    def decode_box(self, inputs):
+        outputs = []
+        for i, input in enumerate(inputs):
+            #-----------------------------------------------#
+            #   输入的input一共有三个，他们的shape分别是
+            #   batch_size, 255, 13, 13
+            #   batch_size, 255, 26, 26
+            #   batch_size, 255, 52, 52
+            #-----------------------------------------------#
+            batch_size      = input.size(0)
+            input_height    = input.size(2)
+            input_width     = input.size(3)
+
+            #-----------------------------------------------#
+            #   输入为416x416时
+            #   stride_h = stride_w = 32、16、8
+            #-----------------------------------------------#
+            stride_h = self.input_shape[0] / input_height
+            stride_w = self.input_shape[1] / input_width
+            #-------------------------------------------------#
+            #   此时获得的scaled_anchors大小是相对于特征层的
+            #-------------------------------------------------#
+            scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]]
+
+            #-----------------------------------------------#
+            #   输入的input一共有三个，他们的shape分别是
+            #   batch_size, 3, 13, 13, 85
+            #   batch_size, 3, 26, 26, 85
+            #   batch_size, 3, 52, 52, 85
+            #-----------------------------------------------#
+            prediction = input.view(batch_size, len(self.anchors_mask[i]),
+                                    self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()
+
+            #-----------------------------------------------#
+            #   先验框的中心位置的调整参数
+            #-----------------------------------------------#
+            x = torch.sigmoid(prediction[..., 0])  
+            y = torch.sigmoid(prediction[..., 1])
+            #-----------------------------------------------#
+            #   先验框的宽高调整参数
+            #-----------------------------------------------#
+            w = prediction[..., 2]
+            h = prediction[..., 3]
+            #-----------------------------------------------#
+            #   获得置信度，是否有物体
+            #-----------------------------------------------#
+            conf        = torch.sigmoid(prediction[..., 4])
+            #-----------------------------------------------#
+            #   种类置信度
+            #-----------------------------------------------#
+            pred_cls    = torch.sigmoid(prediction[..., 5:])
+
+            FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
+            LongTensor  = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
+
+            #----------------------------------------------------------#
+            #   生成网格，先验框中心，网格左上角 
+            #   batch_size,3,13,13
+            #----------------------------------------------------------#
+            grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
+                batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor)
+            grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
+                batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor)
+
+            #----------------------------------------------------------#
+            #   按照网格格式生成先验框的宽高
+            #   batch_size,3,13,13
+            #----------------------------------------------------------#
+            anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
+            anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
+            anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
+            anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)
+
+            #----------------------------------------------------------#
+            #   利用预测结果对先验框进行调整
+            #   首先调整先验框的中心，从先验框中心向右下角偏移
+            #   再调整先验框的宽高。
+            #----------------------------------------------------------#
+            pred_boxes          = FloatTensor(prediction[..., :4].shape)
+            pred_boxes[..., 0]  = x.data + grid_x
+            pred_boxes[..., 1]  = y.data + grid_y
+            pred_boxes[..., 2]  = torch.exp(w.data) * anchor_w
+            pred_boxes[..., 3]  = torch.exp(h.data) * anchor_h
+
+            #----------------------------------------------------------#
+            #   将输出结果归一化成小数的形式
+            #----------------------------------------------------------#
+            _scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor)
+            output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale,
+                                conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
+            outputs.append(output.data)
+        return outputs
+
+    def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
+        #-----------------------------------------------------------------#
+        #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
+        #-----------------------------------------------------------------#
+        box_yx = box_xy[..., ::-1]
+        box_hw = box_wh[..., ::-1]
+        input_shape = np.array(input_shape)
+        image_shape = np.array(image_shape)
+
+        if letterbox_image:
+            #-----------------------------------------------------------------#
+            #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
+            #   new_shape指的是宽高缩放情况
+            #-----------------------------------------------------------------#
+            new_shape = np.round(image_shape * np.min(input_shape/image_shape))
+            offset  = (input_shape - new_shape)/2./input_shape
+            scale   = input_shape/new_shape
+
+            box_yx  = (box_yx - offset) * scale
+            box_hw *= scale
+
+        box_mins    = box_yx - (box_hw / 2.)
+        box_maxes   = box_yx + (box_hw / 2.)
+        boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
+        boxes *= np.concatenate([image_shape, image_shape], axis=-1)
+        return boxes
+
+    def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4):
+        #----------------------------------------------------------#
+        #   将预测结果的格式转换成左上角右下角的格式。
+        #   prediction  [batch_size, num_anchors, 85]
+        #----------------------------------------------------------#
+        box_corner          = prediction.new(prediction.shape)
+        box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
+        box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
+        box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
+        box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
+        prediction[:, :, :4] = box_corner[:, :, :4]
+
+        output = [None for _ in range(len(prediction))]
+        for i, image_pred in enumerate(prediction):
+            #----------------------------------------------------------#
+            #   对种类预测部分取max。
+            #   class_conf  [num_anchors, 1]    种类置信度
+            #   class_pred  [num_anchors, 1]    种类
+            #----------------------------------------------------------#
+            class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)
+
+            #----------------------------------------------------------#
+            #   利用置信度进行第一轮筛选
+            #----------------------------------------------------------#
+            conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()
+
+            #----------------------------------------------------------#
+            #   根据置信度进行预测结果的筛选
+            #----------------------------------------------------------#
+            image_pred = image_pred[conf_mask]
+            class_conf = class_conf[conf_mask]
+            class_pred = class_pred[conf_mask]
+            if not image_pred.size(0):
+                continue
+            #-------------------------------------------------------------------------#
+            #   detections  [num_anchors, 7]
+            #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
+            #-------------------------------------------------------------------------#
+            detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)
+
+            #------------------------------------------#
+            #   获得预测结果中包含的所有种类
+            #------------------------------------------#
+            unique_labels = detections[:, -1].cpu().unique()
+
+            if prediction.is_cuda:
+                unique_labels = unique_labels.cuda()
+                detections = detections.cuda()
+
+            for c in unique_labels:
+                #------------------------------------------#
+                #   获得某一类得分筛选后全部的预测结果
+                #------------------------------------------#
+                detections_class = detections[detections[:, -1] == c]
+
+                #------------------------------------------#
+                #   使用官方自带的非极大抑制会速度更快一些！
+                #------------------------------------------#
+                keep = nms(
+                    detections_class[:, :4],
+                    detections_class[:, 4] * detections_class[:, 5],
+                    nms_thres
+                )
+                max_detections = detections_class[keep]
+                
+                # # 按照存在物体的置信度排序
+                # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True)
+                # detections_class = detections_class[conf_sort_index]
+                # # 进行非极大抑制
+                # max_detections = []
+                # while detections_class.size(0):
+                #     # 取出这一类置信度最高的，一步一步往下判断，判断重合程度是否大于nms_thres，如果是则去除掉
+                #     max_detections.append(detections_class[0].unsqueeze(0))
+                #     if len(detections_class) == 1:
+                #         break
+                #     ious = bbox_iou(max_detections[-1], detections_class[1:])
+                #     detections_class = detections_class[1:][ious < nms_thres]
+                # # 堆叠
+                # max_detections = torch.cat(max_detections).data
+                
+                # Add max detections to outputs
+                output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections))
+            
+            if output[i] is not None:
+                output[i]           = output[i].cpu().numpy()
+                box_xy, box_wh      = (output[i][:, 0:2] + output[i][:, 2:4])/2, output[i][:, 2:4] - output[i][:, 0:2]
+                output[i][:, :4]    = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
+        return output
diff --git a/utils/utils_fit.py b/utils/utils_fit.py
new file mode 100644
index 0000000000000000000000000000000000000000..53dcc334d53824522f73b5894bc9cfa18e318c30
--- /dev/null
+++ b/utils/utils_fit.py
@@ -0,0 +1,102 @@
+import torch
+from tqdm import tqdm
+
+from utils.utils import get_lr
+        
+def fit_one_epoch(model_train, model, yolo_loss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, Epoch, cuda):
+    loss        = 0
+    val_loss    = 0
+
+    model_train.train()
+    print('Start Train')
+    with tqdm(total=epoch_step,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
+        for iteration, batch in enumerate(gen):
+            if iteration >= epoch_step:
+                break
+
+            images, targets = batch[0], batch[1]
+            with torch.no_grad():
+                if cuda:
+                    images  = torch.from_numpy(images).type(torch.FloatTensor).cuda()
+                    targets = [torch.from_numpy(ann).type(torch.FloatTensor).cuda() for ann in targets]
+                else:
+                    images  = torch.from_numpy(images).type(torch.FloatTensor)
+                    targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
+            #----------------------#
+            #   清零梯度
+            #----------------------#
+            optimizer.zero_grad()
+            #----------------------#
+            #   前向传播
+            #----------------------#
+            outputs         = model_train(images)
+
+            loss_value_all  = 0
+            num_pos_all     = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
+            for l in range(len(outputs)):
+                loss_item, num_pos = yolo_loss(l, outputs[l], targets)
+                loss_value_all  += loss_item
+                num_pos_all     += num_pos
+            loss_value = loss_value_all / num_pos_all
+
+            #----------------------#
+            #   反向传播
+            #----------------------#
+            loss_value.backward()
+            optimizer.step()
+
+            loss += loss_value.item()
+            
+            pbar.set_postfix(**{'loss'  : loss / (iteration + 1), 
+                                'lr'    : get_lr(optimizer)})
+            pbar.update(1)
+
+    print('Finish Train')
+
+    model_train.eval()
+    print('Start Validation')
+    with tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
+        for iteration, batch in enumerate(gen_val):
+            if iteration >= epoch_step_val:
+                break
+            images, targets = batch[0], batch[1]
+            with torch.no_grad():
+                if cuda:
+                    images  = torch.from_numpy(images).type(torch.FloatTensor).cuda()
+                    targets = [torch.from_numpy(ann).type(torch.FloatTensor).cuda() for ann in targets]
+                else:
+                    images  = torch.from_numpy(images).type(torch.FloatTensor)
+                    targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
+                #----------------------#
+                #   清零梯度
+                #----------------------#
+                optimizer.zero_grad()
+                #----------------------#
+                #   前向传播
+                #----------------------#
+                outputs         = model_train(images)
+
+                loss_value_all  = 0
+                num_pos_all     = 0
+                #----------------------#
+                #   计算损失
+                #----------------------#
+                for l in range(len(outputs)):
+                    loss_item, num_pos = yolo_loss(l, outputs[l], targets)
+                    loss_value_all  += loss_item
+                    num_pos_all     += num_pos
+                loss_value  = loss_value_all / num_pos_all
+
+            val_loss += loss_value.item()
+            pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)})
+            pbar.update(1)
+
+    print('Finish Validation')
+    
+    loss_history.append_loss(loss / epoch_step, val_loss / epoch_step_val)
+    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
+    print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val))
+    torch.save(model.state_dict(), 'logs/ep%03d-loss%.3f-val_loss%.3f.pth' % (epoch + 1, loss / epoch_step, val_loss / epoch_step_val))
diff --git a/utils/utils_map.py b/utils/utils_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..45aba747fdcd742b7098b31029dd8df9384be699
--- /dev/null
+++ b/utils/utils_map.py
@@ -0,0 +1,897 @@
+import glob
+import json
+import math
+import operator
+import os
+import shutil
+import sys
+
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+
+'''
+    0,0 ------> x (width)
+     |
+     |  (Left,Top)
+     |      *_________
+     |      |         |
+            |         |
+     y      |_________|
+  (height)            *
+                (Right,Bottom)
+'''
+
+def log_average_miss_rate(precision, fp_cumsum, num_images):
+    """
+        log-average miss rate:
+            Calculated by averaging miss rates at 9 evenly spaced FPPI points
+            between 10e-2 and 10e0, in log-space.
+
+        output:
+                lamr | log-average miss rate
+                mr | miss rate
+                fppi | false positives per image
+
+        references:
+            [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the
+               State of the Art." Pattern Analysis and Machine Intelligence, IEEE
+               Transactions on 34.4 (2012): 743 - 761.
+    """
+
+    if precision.size == 0:
+        lamr = 0
+        mr = 1
+        fppi = 0
+        return lamr, mr, fppi
+
+    fppi = fp_cumsum / float(num_images)
+    mr = (1 - precision)
+
+    fppi_tmp = np.insert(fppi, 0, -1.0)
+    mr_tmp = np.insert(mr, 0, 1.0)
+
+    ref = np.logspace(-2.0, 0.0, num = 9)
+    for i, ref_i in enumerate(ref):
+        j = np.where(fppi_tmp <= ref_i)[-1][-1]
+        ref[i] = mr_tmp[j]
+
+    lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref))))
+
+    return lamr, mr, fppi
+
+"""
+ throw error and exit
+"""
+def error(msg):
+    print(msg)
+    sys.exit(0)
+
+"""
+ check if the number is a float between 0.0 and 1.0
+"""
+def is_float_between_0_and_1(value):
+    try:
+        val = float(value)
+        if val > 0.0 and val < 1.0:
+            return True
+        else:
+            return False
+    except ValueError:
+        return False
+
+"""
+ Calculate the AP given the recall and precision array
+    1st) We compute a version of the measured precision/recall curve with
+         precision monotonically decreasing
+    2nd) We compute the AP as the area under this curve by numerical integration.
+"""
+def voc_ap(rec, prec):
+    """
+    --- Official matlab code VOC2012---
+    mrec=[0 ; rec ; 1];
+    mpre=[0 ; prec ; 0];
+    for i=numel(mpre)-1:-1:1
+            mpre(i)=max(mpre(i),mpre(i+1));
+    end
+    i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    rec.insert(0, 0.0) # insert 0.0 at begining of list
+    rec.append(1.0) # insert 1.0 at end of list
+    mrec = rec[:]
+    prec.insert(0, 0.0) # insert 0.0 at begining of list
+    prec.append(0.0) # insert 0.0 at end of list
+    mpre = prec[:]
+    """
+     This part makes the precision monotonically decreasing
+        (goes from the end to the beginning)
+        matlab: for i=numel(mpre)-1:-1:1
+                    mpre(i)=max(mpre(i),mpre(i+1));
+    """
+    for i in range(len(mpre)-2, -1, -1):
+        mpre[i] = max(mpre[i], mpre[i+1])
+    """
+     This part creates a list of indexes where the recall changes
+        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    """
+    i_list = []
+    for i in range(1, len(mrec)):
+        if mrec[i] != mrec[i-1]:
+            i_list.append(i) # if it was matlab would be i + 1
+    """
+     The Average Precision (AP) is the area under the curve
+        (numerical integration)
+        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    ap = 0.0
+    for i in i_list:
+        ap += ((mrec[i]-mrec[i-1])*mpre[i])
+    return ap, mrec, mpre
+
+
+"""
+ Convert the lines of a file to a list
+"""
+def file_lines_to_list(path):
+    # open txt file lines to a list
+    with open(path) as f:
+        content = f.readlines()
+    # remove whitespace characters like `\n` at the end of each line
+    content = [x.strip() for x in content]
+    return content
+
+"""
+ Draws text in image
+"""
+def draw_text_in_image(img, text, pos, color, line_width):
+    font = cv2.FONT_HERSHEY_PLAIN
+    fontScale = 1
+    lineType = 1
+    bottomLeftCornerOfText = pos
+    cv2.putText(img, text,
+            bottomLeftCornerOfText,
+            font,
+            fontScale,
+            color,
+            lineType)
+    text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0]
+    return img, (line_width + text_width)
+
+"""
+ Plot - adjust axes
+"""
+def adjust_axes(r, t, fig, axes):
+    # get text width for re-scaling
+    bb = t.get_window_extent(renderer=r)
+    text_width_inches = bb.width / fig.dpi
+    # get axis width in inches
+    current_fig_width = fig.get_figwidth()
+    new_fig_width = current_fig_width + text_width_inches
+    propotion = new_fig_width / current_fig_width
+    # get axis limit
+    x_lim = axes.get_xlim()
+    axes.set_xlim([x_lim[0], x_lim[1]*propotion])
+
+"""
+ Draw plot using Matplotlib
+"""
+def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
+    # sort the dictionary by decreasing value, into a list of tuples
+    sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
+    # unpacking the list of tuples into two lists
+    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
+    # 
+    if true_p_bar != "":
+        """
+         Special case to draw in:
+            - green -> TP: True Positives (object detected and matches ground-truth)
+            - red -> FP: False Positives (object detected but does not match ground-truth)
+            - orange -> FN: False Negatives (object not detected but present in the ground-truth)
+        """
+        fp_sorted = []
+        tp_sorted = []
+        for key in sorted_keys:
+            fp_sorted.append(dictionary[key] - true_p_bar[key])
+            tp_sorted.append(true_p_bar[key])
+        plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
+        plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
+        # add legend
+        plt.legend(loc='lower right')
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            fp_val = fp_sorted[i]
+            tp_val = tp_sorted[i]
+            fp_str_val = " " + str(fp_val)
+            tp_str_val = fp_str_val + " " + str(tp_val)
+            # trick to paint multicolor with offset:
+            # first paint everything and then repaint the first number
+            t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
+            plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    else:
+        plt.barh(range(n_classes), sorted_values, color=plot_color)
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            str_val = " " + str(val) # add a space before
+            if val < 1.0:
+                str_val = " {0:.2f}".format(val)
+            t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
+            # re-set axes to show number inside the figure
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    # set window title
+    fig.canvas.set_window_title(window_title)
+    # write classes in y axis
+    tick_font_size = 12
+    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
+    """
+     Re-scale height accordingly
+    """
+    init_height = fig.get_figheight()
+    # comput the matrix height in points and inches
+    dpi = fig.dpi
+    height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
+    height_in = height_pt / dpi
+    # compute the required figure height 
+    top_margin = 0.15 # in percentage of the figure height
+    bottom_margin = 0.05 # in percentage of the figure height
+    figure_height = height_in / (1 - top_margin - bottom_margin)
+    # set new height
+    if figure_height > init_height:
+        fig.set_figheight(figure_height)
+
+    # set plot title
+    plt.title(plot_title, fontsize=14)
+    # set axis titles
+    # plt.xlabel('classes')
+    plt.xlabel(x_label, fontsize='large')
+    # adjust size of window
+    fig.tight_layout()
+    # save the plot
+    fig.savefig(output_path)
+    # show image
+    if to_show:
+        plt.show()
+    # close the plot
+    plt.close()
+
+def get_map(MINOVERLAP, draw_plot, path = './map_out'):
+    GT_PATH             = os.path.join(path, 'ground-truth')
+    DR_PATH             = os.path.join(path, 'detection-results')
+    IMG_PATH            = os.path.join(path, 'images-optional')
+    TEMP_FILES_PATH     = os.path.join(path, '.temp_files')
+    RESULTS_FILES_PATH  = os.path.join(path, 'results')
+
+    show_animation = True
+    if os.path.exists(IMG_PATH): 
+        for dirpath, dirnames, files in os.walk(IMG_PATH):
+            if not files:
+                show_animation = False
+    else:
+        show_animation = False
+
+    if not os.path.exists(TEMP_FILES_PATH):
+        os.makedirs(TEMP_FILES_PATH)
+        
+    if os.path.exists(RESULTS_FILES_PATH):
+        shutil.rmtree(RESULTS_FILES_PATH)
+    if draw_plot:
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP"))
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1"))
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall"))
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision"))
+    if show_animation:
+        os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one"))
+
+    ground_truth_files_list = glob.glob(GT_PATH + '/*.txt')
+    if len(ground_truth_files_list) == 0:
+        error("Error: No ground-truth files found!")
+    ground_truth_files_list.sort()
+    gt_counter_per_class     = {}
+    counter_images_per_class = {}
+
+    for txt_file in ground_truth_files_list:
+        file_id     = txt_file.split(".txt", 1)[0]
+        file_id     = os.path.basename(os.path.normpath(file_id))
+        temp_path   = os.path.join(DR_PATH, (file_id + ".txt"))
+        if not os.path.exists(temp_path):
+            error_msg = "Error. File not found: {}\n".format(temp_path)
+            error(error_msg)
+        lines_list      = file_lines_to_list(txt_file)
+        bounding_boxes  = []
+        is_difficult    = False
+        already_seen_classes = []
+        for line in lines_list:
+            try:
+                if "difficult" in line:
+                    class_name, left, top, right, bottom, _difficult = line.split()
+                    is_difficult = True
+                else:
+                    class_name, left, top, right, bottom = line.split()
+            except:
+                if "difficult" in line:
+                    line_split  = line.split()
+                    _difficult  = line_split[-1]
+                    bottom      = line_split[-2]
+                    right       = line_split[-3]
+                    top         = line_split[-4]
+                    left        = line_split[-5]
+                    class_name  = ""
+                    for name in line_split[:-5]:
+                        class_name += name + " "
+                    class_name  = class_name[:-1]
+                    is_difficult = True
+                else:
+                    line_split  = line.split()
+                    bottom      = line_split[-1]
+                    right       = line_split[-2]
+                    top         = line_split[-3]
+                    left        = line_split[-4]
+                    class_name  = ""
+                    for name in line_split[:-4]:
+                        class_name += name + " "
+                    class_name = class_name[:-1]
+
+            bbox = left + " " + top + " " + right + " " + bottom
+            if is_difficult:
+                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False, "difficult":True})
+                is_difficult = False
+            else:
+                bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
+                if class_name in gt_counter_per_class:
+                    gt_counter_per_class[class_name] += 1
+                else:
+                    gt_counter_per_class[class_name] = 1
+
+                if class_name not in already_seen_classes:
+                    if class_name in counter_images_per_class:
+                        counter_images_per_class[class_name] += 1
+                    else:
+                        counter_images_per_class[class_name] = 1
+                    already_seen_classes.append(class_name)
+
+        with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile:
+            json.dump(bounding_boxes, outfile)
+
+    gt_classes  = list(gt_counter_per_class.keys())
+    gt_classes  = sorted(gt_classes)
+    n_classes   = len(gt_classes)
+
+    dr_files_list = glob.glob(DR_PATH + '/*.txt')
+    dr_files_list.sort()
+    for class_index, class_name in enumerate(gt_classes):
+        bounding_boxes = []
+        for txt_file in dr_files_list:
+            file_id = txt_file.split(".txt",1)[0]
+            file_id = os.path.basename(os.path.normpath(file_id))
+            temp_path = os.path.join(GT_PATH, (file_id + ".txt"))
+            if class_index == 0:
+                if not os.path.exists(temp_path):
+                    error_msg = "Error. File not found: {}\n".format(temp_path)
+                    error(error_msg)
+            lines = file_lines_to_list(txt_file)
+            for line in lines:
+                try:
+                    tmp_class_name, confidence, left, top, right, bottom = line.split()
+                except:
+                    line_split      = line.split()
+                    bottom          = line_split[-1]
+                    right           = line_split[-2]
+                    top             = line_split[-3]
+                    left            = line_split[-4]
+                    confidence      = line_split[-5]
+                    tmp_class_name  = ""
+                    for name in line_split[:-5]:
+                        tmp_class_name += name + " "
+                    tmp_class_name  = tmp_class_name[:-1]
+
+                if tmp_class_name == class_name:
+                    bbox = left + " " + top + " " + right + " " +bottom
+                    bounding_boxes.append({"confidence":confidence, "file_id":file_id, "bbox":bbox})
+
+        bounding_boxes.sort(key=lambda x:float(x['confidence']), reverse=True)
+        with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile:
+            json.dump(bounding_boxes, outfile)
+
+    sum_AP = 0.0
+    ap_dictionary = {}
+    lamr_dictionary = {}
+    with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file:
+        results_file.write("# AP and precision/recall per class\n")
+        count_true_positives = {}
+
+        for class_index, class_name in enumerate(gt_classes):
+            count_true_positives[class_name] = 0
+            dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json"
+            dr_data = json.load(open(dr_file))
+
+            nd          = len(dr_data)
+            tp          = [0] * nd
+            fp          = [0] * nd
+            score       = [0] * nd
+            score05_idx = 0
+            for idx, detection in enumerate(dr_data):
+                file_id     = detection["file_id"]
+                score[idx]  = float(detection["confidence"])
+                if score[idx] > 0.5:
+                    score05_idx = idx
+
+                if show_animation:
+                    ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*")
+                    if len(ground_truth_img) == 0:
+                        error("Error. Image not found with id: " + file_id)
+                    elif len(ground_truth_img) > 1:
+                        error("Error. Multiple image with id: " + file_id)
+                    else:
+                        img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0])
+                        img_cumulative_path = RESULTS_FILES_PATH + "/images/" + ground_truth_img[0]
+                        if os.path.isfile(img_cumulative_path):
+                            img_cumulative = cv2.imread(img_cumulative_path)
+                        else:
+                            img_cumulative = img.copy()
+                        bottom_border = 60
+                        BLACK = [0, 0, 0]
+                        img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK)
+
+                gt_file             = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
+                ground_truth_data   = json.load(open(gt_file))
+                ovmax       = -1
+                gt_match    = -1
+                bb          = [float(x) for x in detection["bbox"].split()]
+                for obj in ground_truth_data:
+                    if obj["class_name"] == class_name:
+                        bbgt    = [ float(x) for x in obj["bbox"].split() ]
+                        bi      = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
+                        iw      = bi[2] - bi[0] + 1
+                        ih      = bi[3] - bi[1] + 1
+                        if iw > 0 and ih > 0:
+                            ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
+                                            + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
+                            ov = iw * ih / ua
+                            if ov > ovmax:
+                                ovmax = ov
+                                gt_match = obj
+
+                if show_animation:
+                    status = "NO MATCH FOUND!" 
+                    
+                min_overlap = MINOVERLAP
+                if ovmax >= min_overlap:
+                    if "difficult" not in gt_match:
+                        if not bool(gt_match["used"]):
+                            tp[idx] = 1
+                            gt_match["used"] = True
+                            count_true_positives[class_name] += 1
+                            with open(gt_file, 'w') as f:
+                                    f.write(json.dumps(ground_truth_data))
+                            if show_animation:
+                                status = "MATCH!"
+                        else:
+                            fp[idx] = 1
+                            if show_animation:
+                                status = "REPEATED MATCH!"
+                else:
+                    fp[idx] = 1
+                    if ovmax > 0:
+                        status = "INSUFFICIENT OVERLAP"
+
+                """
+                Draw image to show animation
+                """
+                if show_animation:
+                    height, widht = img.shape[:2]
+                    white           = (255,255,255)
+                    light_blue      = (255,200,100)
+                    green           = (0,255,0)
+                    light_red       = (30,30,255)
+                    margin          = 10
+                    # 1nd line
+                    v_pos           = int(height - margin - (bottom_border / 2.0))
+                    text            = "Image: " + ground_truth_img[0] + " "
+                    img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
+                    text            = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " "
+                    img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, line_width)
+                    if ovmax != -1:
+                        color       = light_red
+                        if status   == "INSUFFICIENT OVERLAP":
+                            text    = "IoU: {0:.2f}% ".format(ovmax*100) + "< {0:.2f}% ".format(min_overlap*100)
+                        else:
+                            text    = "IoU: {0:.2f}% ".format(ovmax*100) + ">= {0:.2f}% ".format(min_overlap*100)
+                            color   = green
+                        img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
+                    # 2nd line
+                    v_pos           += int(bottom_border / 2.0)
+                    rank_pos        = str(idx+1)
+                    text            = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format(float(detection["confidence"])*100)
+                    img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0)
+                    color           = light_red
+                    if status == "MATCH!":
+                        color = green
+                    text            = "Result: " + status + " "
+                    img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width)
+
+                    font = cv2.FONT_HERSHEY_SIMPLEX
+                    if ovmax > 0: 
+                        bbgt = [ int(round(float(x))) for x in gt_match["bbox"].split() ]
+                        cv2.rectangle(img,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
+                        cv2.rectangle(img_cumulative,(bbgt[0],bbgt[1]),(bbgt[2],bbgt[3]),light_blue,2)
+                        cv2.putText(img_cumulative, class_name, (bbgt[0],bbgt[1] - 5), font, 0.6, light_blue, 1, cv2.LINE_AA)
+                    bb = [int(i) for i in bb]
+                    cv2.rectangle(img,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
+                    cv2.rectangle(img_cumulative,(bb[0],bb[1]),(bb[2],bb[3]),color,2)
+                    cv2.putText(img_cumulative, class_name, (bb[0],bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA)
+
+                    cv2.imshow("Animation", img)
+                    cv2.waitKey(20) 
+                    output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str(idx) + ".jpg"
+                    cv2.imwrite(output_img_path, img)
+                    cv2.imwrite(img_cumulative_path, img_cumulative)
+
+            cumsum = 0
+            for idx, val in enumerate(fp):
+                fp[idx] += cumsum
+                cumsum += val
+                
+            cumsum = 0
+            for idx, val in enumerate(tp):
+                tp[idx] += cumsum
+                cumsum += val
+
+            rec = tp[:]
+            for idx, val in enumerate(tp):
+                rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1)
+
+            prec = tp[:]
+            for idx, val in enumerate(tp):
+                prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1)
+
+            ap, mrec, mprec = voc_ap(rec[:], prec[:])
+            F1  = np.array(rec)*np.array(prec)*2 / np.where((np.array(prec)+np.array(rec))==0, 1, (np.array(prec)+np.array(rec)))
+
+            sum_AP  += ap
+            text    = "{0:.2f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
+
+            if len(prec)>0:
+                F1_text         = "{0:.2f}".format(F1[score05_idx]) + " = " + class_name + " F1 "
+                Recall_text     = "{0:.2f}%".format(rec[score05_idx]*100) + " = " + class_name + " Recall "
+                Precision_text  = "{0:.2f}%".format(prec[score05_idx]*100) + " = " + class_name + " Precision "
+            else:
+                F1_text         = "0.00" + " = " + class_name + " F1 " 
+                Recall_text     = "0.00%" + " = " + class_name + " Recall " 
+                Precision_text  = "0.00%" + " = " + class_name + " Precision " 
+
+            rounded_prec    = [ '%.2f' % elem for elem in prec ]
+            rounded_rec     = [ '%.2f' % elem for elem in rec ]
+            results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
+            if len(prec)>0:
+                print(text + "\t||\tscore_threhold=0.5 : " + "F1=" + "{0:.2f}".format(F1[score05_idx])\
+                    + " ; Recall=" + "{0:.2f}%".format(rec[score05_idx]*100) + " ; Precision=" + "{0:.2f}%".format(prec[score05_idx]*100))
+            else:
+                print(text + "\t||\tscore_threhold=0.5 : F1=0.00% ; Recall=0.00% ; Precision=0.00%")
+            ap_dictionary[class_name] = ap
+
+            n_images = counter_images_per_class[class_name]
+            lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images)
+            lamr_dictionary[class_name] = lamr
+
+            if draw_plot:
+                plt.plot(rec, prec, '-o')
+                area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
+                area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
+                plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
+
+                fig = plt.gcf()
+                fig.canvas.set_window_title('AP ' + class_name)
+
+                plt.title('class: ' + text)
+                plt.xlabel('Recall')
+                plt.ylabel('Precision')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05]) 
+                fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png")
+                plt.cla()
+
+                plt.plot(score, F1, "-", color='orangered')
+                plt.title('class: ' + F1_text + "\nscore_threhold=0.5")
+                plt.xlabel('Score_Threhold')
+                plt.ylabel('F1')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png")
+                plt.cla()
+
+                plt.plot(score, rec, "-H", color='gold')
+                plt.title('class: ' + Recall_text + "\nscore_threhold=0.5")
+                plt.xlabel('Score_Threhold')
+                plt.ylabel('Recall')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png")
+                plt.cla()
+
+                plt.plot(score, prec, "-s", color='palevioletred')
+                plt.title('class: ' + Precision_text + "\nscore_threhold=0.5")
+                plt.xlabel('Score_Threhold')
+                plt.ylabel('Precision')
+                axes = plt.gca()
+                axes.set_xlim([0.0,1.0])
+                axes.set_ylim([0.0,1.05])
+                fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png")
+                plt.cla()
+                
+        if show_animation:
+            cv2.destroyAllWindows()
+
+        results_file.write("\n# mAP of all classes\n")
+        mAP     = sum_AP / n_classes
+        text    = "mAP = {0:.2f}%".format(mAP*100)
+        results_file.write(text + "\n")
+        print(text)
+
+    shutil.rmtree(TEMP_FILES_PATH)
+
+    """
+    Count total of detection-results
+    """
+    det_counter_per_class = {}
+    for txt_file in dr_files_list:
+        lines_list = file_lines_to_list(txt_file)
+        for line in lines_list:
+            class_name = line.split()[0]
+            if class_name in det_counter_per_class:
+                det_counter_per_class[class_name] += 1
+            else:
+                det_counter_per_class[class_name] = 1
+    dr_classes = list(det_counter_per_class.keys())
+
+    """
+    Write number of ground-truth objects per class to results.txt
+    """
+    with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
+        results_file.write("\n# Number of ground-truth objects per class\n")
+        for class_name in sorted(gt_counter_per_class):
+            results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n")
+
+    """
+    Finish counting true positives
+    """
+    for class_name in dr_classes:
+        if class_name not in gt_classes:
+            count_true_positives[class_name] = 0
+
+    """
+    Write number of detected objects per class to results.txt
+    """
+    with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file:
+        results_file.write("\n# Number of detected objects per class\n")
+        for class_name in sorted(dr_classes):
+            n_det = det_counter_per_class[class_name]
+            text = class_name + ": " + str(n_det)
+            text += " (tp:" + str(count_true_positives[class_name]) + ""
+            text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n"
+            results_file.write(text)
+
+    """
+    Plot the total number of occurences of each class in the ground-truth
+    """
+    if draw_plot:
+        window_title = "ground-truth-info"
+        plot_title = "ground-truth\n"
+        plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
+        x_label = "Number of objects per class"
+        output_path = RESULTS_FILES_PATH + "/ground-truth-info.png"
+        to_show = False
+        plot_color = 'forestgreen'
+        draw_plot_func(
+            gt_counter_per_class,
+            n_classes,
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            '',
+            )
+
+    """
+    Plot the total number of occurences of each class in the "detection-results" folder
+    """
+    if draw_plot:
+        window_title = "detection-results-info"
+        # Plot title
+        plot_title = "detection-results\n"
+        plot_title += "(" + str(len(dr_files_list)) + " files and "
+        count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
+        plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
+        # end Plot title
+        x_label = "Number of objects per class"
+        output_path = RESULTS_FILES_PATH + "/detection-results-info.png"
+        to_show = False
+        plot_color = 'forestgreen'
+        true_p_bar = count_true_positives
+        draw_plot_func(
+            det_counter_per_class,
+            len(det_counter_per_class),
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            true_p_bar
+            )
+
+    """
+    Draw log-average miss rate plot (Show lamr of all classes in decreasing order)
+    """
+    if draw_plot:
+        window_title = "lamr"
+        plot_title = "log-average miss rate"
+        x_label = "log-average miss rate"
+        output_path = RESULTS_FILES_PATH + "/lamr.png"
+        to_show = False
+        plot_color = 'royalblue'
+        draw_plot_func(
+            lamr_dictionary,
+            n_classes,
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            ""
+            )
+
+    """
+    Draw mAP plot (Show AP's of all classes in decreasing order)
+    """
+    if draw_plot:
+        window_title = "mAP"
+        plot_title = "mAP = {0:.2f}%".format(mAP*100)
+        x_label = "Average Precision"
+        output_path = RESULTS_FILES_PATH + "/mAP.png"
+        to_show = True
+        plot_color = 'royalblue'
+        draw_plot_func(
+            ap_dictionary,
+            n_classes,
+            window_title,
+            plot_title,
+            x_label,
+            output_path,
+            to_show,
+            plot_color,
+            ""
+            )
+
+def preprocess_gt(gt_path, class_names):
+    image_ids   = os.listdir(gt_path)
+    results = {}
+
+    images = []
+    bboxes = []
+    for i, image_id in enumerate(image_ids):
+        lines_list      = file_lines_to_list(os.path.join(gt_path, image_id))
+        boxes_per_image = []
+        image           = {}
+        image_id        = os.path.splitext(image_id)[0]
+        image['file_name'] = image_id + '.jpg'
+        image['width']     = 1
+        image['height']    = 1
+        image['id']        = i
+
+        for line in lines_list:
+            difficult = 0 
+            if "difficult" in line:
+                line_split  = line.split()
+                left, top, right, bottom, _difficult = line_split[-5:]
+                class_name  = ""
+                for name in line_split[:-5]:
+                    class_name += name + " "
+                class_name  = class_name[:-1]
+                difficult = 1
+            else:
+                line_split  = line.split()
+                left, top, right, bottom = line_split[-4:]
+                class_name  = ""
+                for name in line_split[:-4]:
+                    class_name += name + " "
+                class_name = class_name[:-1]
+            
+            left, top, right, bottom = float(left), float(top), float(right), float(bottom)
+            cls_id  = class_names.index(class_name) + 1
+            bbox    = [left, top, right - left, bottom - top, difficult, int(image_id), cls_id, (right - left) * (bottom - top) - 10.0]
+            boxes_per_image.append(bbox)
+        images.append(image)
+        bboxes.extend(boxes_per_image)
+    results['images']        = images
+
+    categories = []
+    for i, cls in enumerate(class_names):
+        category = {}
+        category['supercategory']   = cls
+        category['name']            = cls
+        category['id']              = i
+        categories.append(category)
+    results['categories']   = categories
+
+    annotations = []
+    for i, box in enumerate(bboxes):
+        annotation = {}
+        annotation['area']        = box[-1]
+        annotation['category_id'] = box[-2]
+        annotation['image_id']    = box[-3]
+        annotation['iscrowd']     = box[-4]
+        annotation['bbox']        = box[:4]
+        annotation['id']          = i
+        annotations.append(annotation)
+    results['annotations'] = annotations
+    return results
+
+def preprocess_dr(dr_path, class_names):
+    image_ids = os.listdir(dr_path)
+    results = []
+    for image_id in image_ids:
+        lines_list      = file_lines_to_list(os.path.join(dr_path, image_id))
+        image_id        = os.path.splitext(image_id)[0]
+        for line in lines_list:
+            line_split  = line.split()
+            confidence, left, top, right, bottom = line_split[-5:]
+            class_name  = ""
+            for name in line_split[:-5]:
+                class_name += name + " "
+            class_name  = class_name[:-1]
+            left, top, right, bottom = float(left), float(top), float(right), float(bottom)
+            result                  = {}
+            result["image_id"]      = int(image_id)
+            result["category_id"]   = class_names.index(class_name) + 1
+            result["bbox"]          = [left, top, right - left, bottom - top]
+            result["score"]         = float(confidence)
+            results.append(result)
+    return results
+ 
+def get_coco_map(class_names, path):
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+    
+    GT_PATH     = os.path.join(path, 'ground-truth')
+    DR_PATH     = os.path.join(path, 'detection-results')
+    COCO_PATH   = os.path.join(path, 'coco_eval')
+
+    if not os.path.exists(COCO_PATH):
+        os.makedirs(COCO_PATH)
+
+    GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json')
+    DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json')
+
+    with open(GT_JSON_PATH, "w") as f:
+        results_gt  = preprocess_gt(GT_PATH, class_names)
+        json.dump(results_gt, f, indent=4)
+
+    with open(DR_JSON_PATH, "w") as f:
+        results_dr  = preprocess_dr(DR_PATH, class_names)
+        json.dump(results_dr, f, indent=4)
+
+    cocoGt      = COCO(GT_JSON_PATH)
+    cocoDt      = cocoGt.loadRes(DR_JSON_PATH)
+    cocoEval    = COCOeval(cocoGt, cocoDt, 'bbox') 
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    cocoEval.summarize()
diff --git a/voc_annotation.py b/voc_annotation.py
index 324ef31547a18b6e129d87eac1aee92825b6df72..25efc8177c9f6ed848e9e73407960f48b90d4967 100644
--- a/voc_annotation.py
+++ b/voc_annotation.py
@@ -1,19 +1,42 @@
-#---------------------------------------------#
-#   运行前一定要修改classes
+import os
+import random
+import xml.etree.ElementTree as ET
+
+from utils.utils import get_classes
+
+#--------------------------------------------------------------------------------------------------------------------------------#
+#   annotation_mode用于指定该文件运行时计算的内容
+#   annotation_mode为0代表整个标签处理过程，包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
+#   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
+#   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
+#--------------------------------------------------------------------------------------------------------------------------------#
+annotation_mode     = 0
+#-------------------------------------------------------------------#
+#   必须要修改，用于生成2007_train.txt、2007_val.txt的目标信息
+#   与训练和预测所用的classes_path一致即可
 #   如果生成的2007_train.txt里面没有目标信息
 #   那么就是因为classes没有设定正确
-#---------------------------------------------#
-import xml.etree.ElementTree as ET
-from os import getcwd
+#   仅在annotation_mode为0和2的时候有效
+#-------------------------------------------------------------------#
+classes_path        = 'model_data/voc_classes.txt'
+#--------------------------------------------------------------------------------------------------------------------------------#
+#   trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1 
+#   train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1 
+#   仅在annotation_mode为0和1的时候有效
+#--------------------------------------------------------------------------------------------------------------------------------#
+trainval_percent    = 0.9
+train_percent       = 0.9
+#-------------------------------------------------------#
+#   指向VOC数据集所在的文件夹
+#   默认指向根目录下的VOC数据集
+#-------------------------------------------------------#
+VOCdevkit_path  = 'VOCdevkit'
 
-sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
-#-----------------------------------------------------#
-#   这里设定的classes顺序要和model_data里的txt一样
-#-----------------------------------------------------#
-classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+VOCdevkit_sets  = [('2007', 'train'), ('2007', 'val')]
+classes, _      = get_classes(classes_path)
 
 def convert_annotation(year, image_id, list_file):
-    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id), encoding='utf-8')
+    in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml'%(year, image_id)), encoding='utf-8')
     tree=ET.parse(in_file)
     root = tree.getroot()
 
@@ -28,14 +51,59 @@ def convert_annotation(year, image_id, list_file):
         xmlbox = obj.find('bndbox')
         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
+        
+if __name__ == "__main__":
+    random.seed(0)
+    if annotation_mode == 0 or annotation_mode == 1:
+        print("Generate txt in ImageSets.")
+        xmlfilepath     = os.path.join(VOCdevkit_path, 'VOC2007/Annotations')
+        saveBasePath    = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main')
+        temp_xml        = os.listdir(xmlfilepath)
+        total_xml       = []
+        for xml in temp_xml:
+            if xml.endswith(".xml"):
+                total_xml.append(xml)
+
+        num     = len(total_xml)  
+        list    = range(num)  
+        tv      = int(num*trainval_percent)  
+        tr      = int(tv*train_percent)  
+        trainval= random.sample(list,tv)  
+        train   = random.sample(trainval,tr)  
+        
+        print("train and val size",tv)
+        print("train size",tr)
+        ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
+        ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
+        ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
+        fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
+        
+        for i in list:  
+            name=total_xml[i][:-4]+'\n'  
+            if i in trainval:  
+                ftrainval.write(name)  
+                if i in train:  
+                    ftrain.write(name)  
+                else:  
+                    fval.write(name)  
+            else:  
+                ftest.write(name)  
+        
+        ftrainval.close()  
+        ftrain.close()  
+        fval.close()  
+        ftest.close()
+        print("Generate txt in ImageSets done.")
 
-wd = getcwd()
+    if annotation_mode == 0 or annotation_mode == 2:
+        print("Generate 2007_train.txt and 2007_val.txt for train.")
+        for year, image_set in VOCdevkit_sets:
+            image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt'%(year, image_set)), encoding='utf-8').read().strip().split()
+            list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
+            for image_id in image_ids:
+                list_file.write('%s/VOC%s/JPEGImages/%s.jpg'%(os.path.abspath(VOCdevkit_path), year, image_id))
 
-for year, image_set in sets:
-    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set), encoding='utf-8').read().strip().split()
-    list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
-    for image_id in image_ids:
-        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
-        convert_annotation(year, image_id, list_file)
-        list_file.write('\n')
-    list_file.close()
+                convert_annotation(year, image_id, list_file)
+                list_file.write('\n')
+            list_file.close()
+        print("Generate 2007_train.txt and 2007_val.txt for train done.")
diff --git a/yolo.py b/yolo.py
index 4b513dde39339b201d0af4d2a308c3756a43e49e..e29d2dd81f5531e829e6cb831b88a6022bd4572f 100644
--- a/yolo.py
+++ b/yolo.py
@@ -1,6 +1,3 @@
-#-------------------------------------#
-#       创建YOLO类
-#-------------------------------------#
 import colorsys
 import os
 import time
@@ -8,33 +5,53 @@ import time
 import numpy as np
 import torch
 import torch.nn as nn
-from PIL import Image, ImageDraw, ImageFont
+from PIL import ImageDraw, ImageFont
 
-from nets.yolo4 import YoloBody
-from utils.utils import (DecodeBox, letterbox_image, non_max_suppression,
-                         yolo_correct_boxes)
+from nets.yolo import YoloBody
+from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input,
+                         resize_image)
+from utils.utils_bbox import DecodeBox
 
-
-#--------------------------------------------#
-#   使用自己训练好的模型预测需要修改2个参数
-#   model_path和classes_path都需要修改！
-#   如果出现shape不匹配，一定要注意
-#   训练时的model_path和classes_path参数的修改
-#--------------------------------------------#
+'''
+训练自己的数据集必看注释！
+'''
 class YOLO(object):
     _defaults = {
+        #--------------------------------------------------------------------------#
+        #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
+        #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
+        #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
+        #--------------------------------------------------------------------------#
         "model_path"        : 'model_data/yolo4_weights.pth',
-        "anchors_path"      : 'model_data/yolo_anchors.txt',
         "classes_path"      : 'model_data/coco_classes.txt',
-        "model_image_size"  : (416, 416, 3),
+        #---------------------------------------------------------------------#
+        #   anchors_path代表先验框对应的txt文件，一般不修改。
+        #   anchors_mask用于帮助代码找到对应的先验框，一般不修改。
+        #---------------------------------------------------------------------#
+        "anchors_path"      : 'model_data/yolo_anchors.txt',
+        "anchors_mask"      : [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
+        #---------------------------------------------------------------------#
+        #   输入图片的大小，必须为32的倍数。
+        #---------------------------------------------------------------------#
+        "input_shape"       : [416, 416],
+        #---------------------------------------------------------------------#
+        #   只有得分大于置信度的预测框会被保留下来
+        #---------------------------------------------------------------------#
         "confidence"        : 0.5,
-        "iou"               : 0.3,
-        "cuda"              : True,
+        #---------------------------------------------------------------------#
+        #   非极大抑制所用到的nms_iou大小
+        #---------------------------------------------------------------------#
+        "nms_iou"           : 0.3,
         #---------------------------------------------------------------------#
         #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
         #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
         #---------------------------------------------------------------------#
         "letterbox_image"   : False,
+        #-------------------------------#
+        #   是否使用Cuda
+        #   没有GPU可以设置成False
+        #-------------------------------#
+        "cuda"              : True,
     }
 
     @classmethod
@@ -49,165 +66,106 @@ class YOLO(object):
     #---------------------------------------------------#
     def __init__(self, **kwargs):
         self.__dict__.update(self._defaults)
-        self.class_names = self._get_class()
-        self.anchors = self._get_anchors()
-        self.generate()
+        for name, value in kwargs.items():
+            setattr(self, name, value)
+            
+        #---------------------------------------------------#
+        #   获得种类和先验框的数量
+        #---------------------------------------------------#
+        self.class_names, self.num_classes  = get_classes(self.classes_path)
+        self.anchors, self.num_anchors      = get_anchors(self.anchors_path)
+        self.bbox_util                      = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask)
 
-    #---------------------------------------------------#
-    #   获得所有的分类
-    #---------------------------------------------------#
-    def _get_class(self):
-        classes_path = os.path.expanduser(self.classes_path)
-        with open(classes_path) as f:
-            class_names = f.readlines()
-        class_names = [c.strip() for c in class_names]
-        return class_names
-    
-    #---------------------------------------------------#
-    #   获得所有的先验框
-    #---------------------------------------------------#
-    def _get_anchors(self):
-        anchors_path = os.path.expanduser(self.anchors_path)
-        with open(anchors_path) as f:
-            anchors = f.readline()
-        anchors = [float(x) for x in anchors.split(',')]
-        return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]
+        #---------------------------------------------------#
+        #   画框设置不同的颜色
+        #---------------------------------------------------#
+        hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
+        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
+        self.generate()
 
     #---------------------------------------------------#
     #   生成模型
     #---------------------------------------------------#
     def generate(self):
         #---------------------------------------------------#
-        #   建立yolov4模型
+        #   建立yolo模型，载入yolo模型的权重
         #---------------------------------------------------#
-        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()
+        self.net    = YoloBody(self.anchors_mask, self.num_classes)
+        device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.net.load_state_dict(torch.load(self.model_path, map_location=device))
+        self.net    = self.net.eval()
+        print('{} model, anchors, and classes loaded.'.format(self.model_path))
 
-        #---------------------------------------------------#
-        #   载入yolov4模型的权重
-        #---------------------------------------------------#
-        print('Loading weights into state dict...')
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        state_dict = torch.load(self.model_path, map_location=device)
-        self.net.load_state_dict(state_dict)
-        print('Finished!')
-        
         if self.cuda:
             self.net = nn.DataParallel(self.net)
             self.net = self.net.cuda()
 
-        #---------------------------------------------------#
-        #   建立三个特征层解码用的工具
-        #---------------------------------------------------#
-        self.yolo_decodes = []
-        for i in range(3):
-            self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names),  (self.model_image_size[1], self.model_image_size[0])))
-
-        print('{} model, anchors, and classes loaded.'.format(self.model_path))
-        # 画框设置不同的颜色
-        hsv_tuples = [(x / len(self.class_names), 1., 1.)
-                      for x in range(len(self.class_names))]
-        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
-        self.colors = list(
-            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
-                self.colors))
-
     #---------------------------------------------------#
     #   检测图片
     #---------------------------------------------------#
     def detect_image(self, image):
+        #---------------------------------------------------#
+        #   计算输入图片的高和宽
+        #---------------------------------------------------#
+        image_shape = np.array(np.shape(image)[0:2])
         #---------------------------------------------------------#
         #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
         #---------------------------------------------------------#
-        image = image.convert('RGB')
-
-        image_shape = np.array(np.shape(image)[0:2])
+        image       = cvtColor(image)
         #---------------------------------------------------------#
         #   给图像增加灰条，实现不失真的resize
         #   也可以直接resize进行识别
         #---------------------------------------------------------#
-        if self.letterbox_image:
-            crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        else:
-            crop_img = image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
-        photo = np.array(crop_img,dtype = np.float32) / 255.0
-        photo = np.transpose(photo, (2, 0, 1))
+        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
         #---------------------------------------------------------#
         #   添加上batch_size维度
         #---------------------------------------------------------#
-        images = [photo]
+        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
 
         with torch.no_grad():
-            images = torch.from_numpy(np.asarray(images))
+            images = torch.from_numpy(image_data)
             if self.cuda:
                 images = images.cuda()
-
             #---------------------------------------------------------#
             #   将图像输入网络当中进行预测！
             #---------------------------------------------------------#
             outputs = self.net(images)
-            output_list = []
-            for i in range(3):
-                output_list.append(self.yolo_decodes[i](outputs[i]))
-
+            outputs = self.bbox_util.decode_box(outputs)
             #---------------------------------------------------------#
             #   将预测框进行堆叠，然后进行非极大抑制
             #---------------------------------------------------------#
-            output = torch.cat(output_list, 1)
-            batch_detections = non_max_suppression(output, len(self.class_names),
-                                                    conf_thres=self.confidence,
-                                                    nms_thres=self.iou)
-
-            #---------------------------------------------------------#
-            #   如果没有检测出物体，返回原图
-            #---------------------------------------------------------#
-            try:
-                batch_detections = batch_detections[0].cpu().numpy()
-            except:
+            results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
+                        image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
+                                                    
+            if results[0] is None: 
                 return image
-            
-            #---------------------------------------------------------#
-            #   对预测框进行得分筛选
-            #---------------------------------------------------------#
-            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
-            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-            top_label = np.array(batch_detections[top_index,-1],np.int32)
-            top_bboxes = np.array(batch_detections[top_index,:4])
-            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
 
-            #-----------------------------------------------------------------#
-            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
-            #   因此生成的top_bboxes是相对于有灰条的图像的
-            #   我们需要对其进行修改，去除灰条的部分。
-            #-----------------------------------------------------------------#
-            if self.letterbox_image:
-                boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
-            else:
-                top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
-                top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
-                top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
-                top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
-                boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
-                
-        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
-
-        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
-
-        for i, c in enumerate(top_label):
-            predicted_class = self.class_names[c]
-            score = top_conf[i]
+            top_label   = np.array(results[0][:, 6], dtype = 'int32')
+            top_conf    = results[0][:, 4] * results[0][:, 5]
+            top_boxes   = results[0][:, :4]
+        #---------------------------------------------------------#
+        #   设置字体与边框厚度
+        #---------------------------------------------------------#
+        font        = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
+        thickness   = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
+        
+        #---------------------------------------------------------#
+        #   图像绘制
+        #---------------------------------------------------------#
+        for i, c in list(enumerate(top_label)):
+            predicted_class = self.class_names[int(c)]
+            box             = top_boxes[i]
+            score           = top_conf[i]
 
-            top, left, bottom, right = boxes[i]
-            top = top - 5
-            left = left - 5
-            bottom = bottom + 5
-            right = right + 5
+            top, left, bottom, right = box
 
-            top = max(0, np.floor(top + 0.5).astype('int32'))
-            left = max(0, np.floor(left + 0.5).astype('int32'))
-            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
-            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
+            top     = max(0, np.floor(top).astype('int32'))
+            left    = max(0, np.floor(left).astype('int32'))
+            bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
+            right   = min(image.size[0], np.floor(right).astype('int32'))
 
-            # 画框框
             label = '{} {:.2f}'.format(predicted_class, score)
             draw = ImageDraw.Draw(image)
             label_size = draw.textsize(label, font)
@@ -220,100 +178,113 @@ class YOLO(object):
                 text_origin = np.array([left, top + 1])
 
             for i in range(thickness):
-                draw.rectangle(
-                    [left + i, top + i, right - i, bottom - i],
-                    outline=self.colors[self.class_names.index(predicted_class)])
-            draw.rectangle(
-                [tuple(text_origin), tuple(text_origin + label_size)],
-                fill=self.colors[self.class_names.index(predicted_class)])
+                draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
+            draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
             draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
             del draw
+
         return image
 
     def get_FPS(self, image, test_interval):
-        # 调整图片使其符合输入要求
         image_shape = np.array(np.shape(image)[0:2])
-
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
         #---------------------------------------------------------#
         #   给图像增加灰条，实现不失真的resize
         #   也可以直接resize进行识别
         #---------------------------------------------------------#
-        if self.letterbox_image:
-            crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        else:
-            crop_img = image.convert('RGB')
-            crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
-        photo = np.array(crop_img,dtype = np.float32) / 255.0
-        photo = np.transpose(photo, (2, 0, 1))
+        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
         #---------------------------------------------------------#
         #   添加上batch_size维度
         #---------------------------------------------------------#
-        images = [photo]
+        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
 
         with torch.no_grad():
-            images = torch.from_numpy(np.asarray(images))
+            images = torch.from_numpy(image_data)
             if self.cuda:
                 images = images.cuda()
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
             outputs = self.net(images)
-            output_list = []
-            for i in range(3):
-                output_list.append(self.yolo_decodes[i](outputs[i]))
-            output = torch.cat(output_list, 1)
-            batch_detections = non_max_suppression(output, len(self.class_names),
-                                                    conf_thres=self.confidence,
-                                                    nms_thres=self.iou)
-            try:
-                batch_detections = batch_detections[0].cpu().numpy()
-                top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-                top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-                top_label = np.array(batch_detections[top_index,-1],np.int32)
-                top_bboxes = np.array(batch_detections[top_index,:4])
-                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-                
-                if self.letterbox_image:
-                    boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
-                else:
-                    top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
-                    top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
-                    top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
-                    top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
-                    boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
-            
-            except:
-                pass
-                
+            outputs = self.bbox_util.decode_box(outputs)
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
+            results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
+                        image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou)
+                                                    
         t1 = time.time()
         for _ in range(test_interval):
             with torch.no_grad():
+                #---------------------------------------------------------#
+                #   将图像输入网络当中进行预测！
+                #---------------------------------------------------------#
                 outputs = self.net(images)
-                output_list = []
-                for i in range(3):
-                    output_list.append(self.yolo_decodes[i](outputs[i]))
-                output = torch.cat(output_list, 1)
-                batch_detections = non_max_suppression(output, len(self.class_names),
-                                                        conf_thres=self.confidence,
-                                                        nms_thres=self.iou)
-                try:
-                    batch_detections = batch_detections[0].cpu().numpy()
-                    top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-                    top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-                    top_label = np.array(batch_detections[top_index,-1],np.int32)
-                    top_bboxes = np.array(batch_detections[top_index,:4])
-                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-                   
-                    if self.letterbox_image:
-                        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
-                    else:
-                        top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
-                        top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
-                        top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
-                        top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
-                        boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
-                
-                except:
-                    pass
-
+                outputs = self.bbox_util.decode_box(outputs)
+                #---------------------------------------------------------#
+                #   将预测框进行堆叠，然后进行非极大抑制
+                #---------------------------------------------------------#
+                results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
+                            image_shape, self.letterbox_image, conf_thres=self.confidence, nms_thres=self.nms_iou)
+                            
         t2 = time.time()
         tact_time = (t2 - t1) / test_interval
         return tact_time
 
+    def get_map_txt(self, image_id, image, class_names, map_out_path):
+        f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
+        image_shape = np.array(np.shape(image)[0:2])
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)
+
+        with torch.no_grad():
+            images = torch.from_numpy(image_data)
+            if self.cuda:
+                images = images.cuda()
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
+            outputs = self.net(images)
+            outputs = self.bbox_util.decode_box(outputs)
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
+            results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
+                        image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
+                                                    
+            if results[0] is None: 
+                return 
+
+            top_label   = np.array(results[0][:, 6], dtype = 'int32')
+            top_conf    = results[0][:, 4] * results[0][:, 5]
+            top_boxes   = results[0][:, :4]
+
+        for i, c in list(enumerate(top_label)):
+            predicted_class = self.class_names[int(c)]
+            box             = top_boxes[i]
+            score           = str(top_conf[i])
+
+            top, left, bottom, right = box
+            if predicted_class not in class_names:
+                continue
+
+            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
+
+        f.close()
+        return