修复val

915b2ccf · _白鹭先生_ · 7f94270d · 915b2ccf · 915b2ccf · 915b2ccf
隐藏空白更改
内联并排

Showing with 85 addition and 24 deletion

get_map.py get_map.py +20 -9

train.py train.py +3 -3

utils/dataloader.py utils/dataloader.py +41 -1

yolo.py yolo.py +21 -11

未找到文件。
--- a/get_map.py
+++ b/get_map.py
@@ -3,9 +3,10 @@ import xml.etree.ElementTree as ET

 from PIL import Image
 from tqdm import tqdm
-
+import numpy as np
 from utils.utils import get_classes
 from utils.utils_map import get_coco_map, get_map
+from utils.utils_rbox import poly2hbb
 from yolo import YOLO

 if __name__ == "__main__":
@@ -24,12 +25,12 @@ if __name__ == "__main__":
    #   map_mode为3代表仅仅计算VOC_map。
    #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行
    #-------------------------------------------------------------------------------------------------------------------#
-    map_mode        = 0
+    map_mode        = 3
    #--------------------------------------------------------------------------------------#
    #   此处的classes_path用于指定需要测量VOC_map的类别
    #   一般情况下与训练和预测所用的classes_path一致即可
    #--------------------------------------------------------------------------------------#
-    classes_path    = 'model_data/voc_classes.txt'
+    classes_path    = 'model_data/ssdd_classes.txt'
    #--------------------------------------------------------------------------------------#
    #   MINOVERLAP用于指定想要获得的mAP0.x，mAP0.x的意义是什么请同学们百度一下。
    #   比如计算mAP0.75，可以设定MINOVERLAP = 0.75。
@@ -115,12 +116,22 @@ if __name__ == "__main__":
                    obj_name = obj.find('name').text
                    if obj_name not in class_names:
                        continue
-                    bndbox  = obj.find('bndbox')
-                    left    = bndbox.find('xmin').text
-                    top     = bndbox.find('ymin').text
-                    right   = bndbox.find('xmax').text
-                    bottom  = bndbox.find('ymax').text
-
+                    bndbox  = obj.find('rotated_bndbox')
+                    x1      = bndbox.find('x1').text
+                    y1      = bndbox.find('y1').text
+                    x2      = bndbox.find('x2').text
+                    y2      = bndbox.find('y2').text
+                    x3      = bndbox.find('x3').text
+                    y3      = bndbox.find('y3').text
+                    x4      = bndbox.find('x4').text
+                    y4      = bndbox.find('y4').text
+                    poly    = np.array([[x1, y1, x2, y2, x3, y3, x4, y4]], dtype=np.int32)
+                    hbb     = poly2hbb(poly)
+                    xc, yc, w, h = hbb[0]
+                    left   = xc - w/2
+                    top    = yc - h/2
+                    right  = xc + w/2
+                    bottom = yc + h/2
                    if difficult_flag:
                        new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom))
                    else:

--- a/train.py
+++ b/train.py
@@ -195,7 +195,7 @@ if __name__ == "__main__":
    #   Init_lr         模型的最大学习率
    #   Min_lr          模型的最小学习率，默认为最大学习率的0.01
    #------------------------------------------------------------------#
-    Init_lr             = 1e-2
+    Init_lr             = 1e-3
    Min_lr              = Init_lr * 0.01
    #------------------------------------------------------------------#
    #   optimizer_type  使用到的优化器种类，可选的有adam、sgd
@@ -205,9 +205,9 @@ if __name__ == "__main__":
    #   weight_decay    权值衰减，可防止过拟合
    #                   adam会导致weight_decay错误，使用adam时建议设置为0。
    #------------------------------------------------------------------#
-    optimizer_type      = "sgd"
+    optimizer_type      = "adam"
    momentum            = 0.937
-    weight_decay        = 5e-4
+    weight_decay        = 0
    #------------------------------------------------------------------#
    #   lr_decay_type   使用到的学习率下降方式，可选的有step、cos
    #------------------------------------------------------------------#

--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -101,8 +101,48 @@ class YoloDataset(Dataset):
        #   图像调整
        #---------------------------------#
        image       = image.resize((w,h), Image.BICUBIC)
-        image_data  = np.array(image, np.float32)
+        image_data  = np.array(image, np.uint8)
+        #------------------------------------------#
+        #   翻转图像
+        #------------------------------------------#
+        flip = self.rand()<.5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
+        
+        image_data      = np.array(image, np.uint8)
+        #---------------------------------#
+        #   对图像进行色域变换
+        #   计算色域变换的参数
+        #---------------------------------#
+        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
+        #---------------------------------#
+        #   将图像转到HSV上
+        #---------------------------------#
+        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
+        dtype           = image_data.dtype
+        #---------------------------------#
+        #   应用变换
+        #---------------------------------#
+        x       = np.arange(0, 256, dtype=r.dtype)
+        lut_hue = ((x * r[0]) % 180).astype(dtype)
+        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

+        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
+        #---------------------------------#
+        #   对真实框进行调整
+        #---------------------------------#
+        if len(rbox)>0:
+            np.random.shuffle(rbox)
+            if flip: 
+                rbox[:, 0] = 1 - rbox[:, 0]
+                rbox[:, 4] *= -1
+        # 查看旋转框是否正确
+        # draw = ImageDraw.Draw(image)
+        # polys = rbox2poly(rbox[..., :5])*w
+        # for poly in polys:
+        #     draw.polygon(xy=list(poly))
+        # image.show()
        return image_data, rbox
    
    def merge_bboxes(self, bboxes, cutx, cuty):

--- a/yolo.py
+++ b/yolo.py
@@ -11,7 +11,7 @@ from nets.yolo import YoloBody
 from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input,
                         resize_image, show_config)
 from utils.utils_bbox import DecodeBox
-from utils.utils_rbox import rbox2poly
+from utils.utils_rbox import rbox2poly, poly2hbb
 '''
 训练自己的数据集必看注释！
 '''
@@ -25,7 +25,7 @@ class YOLO(object):
        #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
        #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
        #--------------------------------------------------------------------------#
-        "model_path"        : 'logs/best_epoch_weights.pth',
+        "model_path"        : 'model_data/ep010-loss0.039-val_loss0.032.pth',
        "classes_path"      : 'model_data/ssdd_classes.txt',
        #---------------------------------------------------------------------#
        #   anchors_path代表先验框对应的txt文件，一般不修改。
@@ -46,7 +46,7 @@ class YOLO(object):
        #---------------------------------------------------------------------#
        #   只有得分大于置信度的预测框会被保留下来
        #---------------------------------------------------------------------#
-        "confidence"        : 0.05,
+        "confidence"        : 0.3,
        #---------------------------------------------------------------------#
        #   非极大抑制所用到的nms_iou大小
        #---------------------------------------------------------------------#
@@ -55,7 +55,7 @@ class YOLO(object):
        #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
        #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
        #---------------------------------------------------------------------#
-        "letterbox_image"   : True,
+        "letterbox_image"   : False,
        #-------------------------------#
        #   是否使用Cuda
        #   没有GPU可以设置成False
@@ -198,7 +198,7 @@ class YOLO(object):
            text_origin = np.array([poly[0], poly[1]], np.int32)

            draw.polygon(xy=polygon_list, outline=self.colors[c])
-            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
+            draw.text(text_origin, str(label,'UTF-8'), fill=self.colors[c], font=font)
            del draw

        return image
@@ -378,16 +378,26 @@ class YOLO(object):
            if results[0] is None: 
                return 

-            top_label   = np.array(results[0][:, 6], dtype = 'int32')
-            top_conf    = results[0][:, 4] * results[0][:, 5]
-            top_boxes   = results[0][:, :4]
-
+            top_label   = np.array(results[0][:, 7], dtype = 'int32')
+            top_conf    = results[0][:, 5] * results[0][:, 6]
+            top_rboxes  = results[0][:, :5]
+            top_polys   = rbox2poly(top_rboxes)
+            #---------------------------------------------------------#
+            #   将归一化的预测结果变为真实的预测框
+            #---------------------------------------------------------#
+            top_polys[..., [0, 2, 4, 6]] *= image_shape[1]
+            top_polys[..., [1, 3, 5, 7]] *= image_shape[0]
+            top_hbbs    = poly2hbb(top_polys)
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
-            box             = top_boxes[i]
+            hbb             = top_hbbs[i]
            score           = str(top_conf[i])

-            top, left, bottom, right = box
+            xc, yc, w, h = hbb
+            left   = xc - w/2
+            top    = yc - h/2
+            right  = xc + w/2
+            bottom = yc + h/2
            if predicted_class not in class_names:
                continue