新增了图像随机缩放

e7dcbb0a · _白鹭先生_ · 3a3529f2 · e7dcbb0a · e7dcbb0a · e7dcbb0a
隐藏空白更改
内联并排

Showing with 91 addition and 41 deletion

train.py train.py +1 -1

utils/callbacks.py utils/callbacks.py +2 -5

utils/dataloader.py utils/dataloader.py +83 -24

yolo.py yolo.py +5 -11

未找到文件。
--- a/train.py
+++ b/train.py
@@ -41,7 +41,7 @@ if __name__ == "__main__":
    #   Cuda    是否使用Cuda
    #           没有GPU可以设置成False
    #---------------------------------#
-    Cuda            = True
+    Cuda            = False
    #---------------------------------------------------------------------#
    #   distributed     用于指定是否使用单机多卡分布式运行
    #                   终端指令仅支持Ubuntu。CUDA_VISIBLE_DEVICES用于在Ubuntu下指定显卡。

--- a/utils/callbacks.py
+++ b/utils/callbacks.py
@@ -148,12 +148,9 @@ class EvalCallback():
            top_label   = np.array(results[0][:, 7], dtype = 'int32')
            top_conf    = results[0][:, 5] * results[0][:, 6]
            top_rboxes  = results[0][:, :5]
+            top_rboxes[:, [0, 2]]  *= image_shape[1]
+            top_rboxes[:, [1, 3]]  *= image_shape[0]
            top_polys   = rbox2poly(top_rboxes)
-            #---------------------------------------------------------#
-            #   将归一化的预测结果变为真实的预测框
-            #---------------------------------------------------------#
-            top_polys[..., [0, 2, 4, 6]] *= image_shape[1]
-            top_polys[..., [1, 3, 5, 7]] *= image_shape[0]
            top_hbbs    = poly2hbb(top_polys)
        top_100     = np.argsort(top_conf)[::-1][:self.max_boxes]
        top_hbbs    = top_hbbs[top_100]

--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -45,12 +45,12 @@ class YoloDataset(Dataset):
            lines = sample(self.annotation_lines, 3)
            lines.append(self.annotation_lines[index])
            shuffle(lines)
-            image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)
+            image, rbox  = self.get_random_data_with_Mosaic(lines, self.input_shape)
            
            if self.mixup and self.rand() < self.mixup_prob:
                lines           = sample(self.annotation_lines, 1)
-                image_2, box_2  = self.get_random_data(lines[0], self.input_shape, random = self.train)
-                image, box      = self.get_random_data_with_MixUp(image, box, image_2, box_2)
+                image_2, rbox_2  = self.get_random_data(lines[0], self.input_shape, random = self.train)
+                image, rbox      = self.get_random_data_with_MixUp(image, rbox, image_2, rbox_2)
        else:
            image, rbox      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)

@@ -97,11 +97,55 @@ class YoloDataset(Dataset):
        rbox    = np.zeros((box.shape[0], 6))
        rbox[..., :5] = poly2rbox(box[..., :8], (ih, iw), use_pi=True)
        rbox[..., 5]  = box[..., 8]
-        #---------------------------------#
-        #   图像调整
-        #---------------------------------#
-        image       = image.resize((w,h), Image.BICUBIC)
-        image_data  = np.array(image, np.uint8)
+
+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            #---------------------------------#
+            #   将图像多余的部分加上灰条
+            #---------------------------------#
+            image       = image.resize((nw,nh), Image.BICUBIC)
+            new_image   = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data  = np.array(new_image, np.float32)
+
+            #---------------------------------#
+            #   对真实框进行调整
+            #---------------------------------#
+            if len(rbox)>0:
+                np.random.shuffle(rbox)
+                rbox[:, 0] = rbox[:, 0]*nw/w + dx/w
+                rbox[:, 1] = rbox[:, 1]*nh/h + dy/h
+                rbox[:, 2] = rbox[:, 2]*nw/w
+                rbox[:, 3] = rbox[:, 3]*nh/h
+
+            return image_data, rbox
+
+        #------------------------------------------#
+        #   对图像进行缩放并且进行长和宽的扭曲
+        #------------------------------------------#
+        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
+        scale = self.rand(.25, 2)
+        if new_ar < 1:
+            nh = int(scale*h)
+            nw = int(nh*new_ar)
+        else:
+            nw = int(scale*w)
+            nh = int(nw/new_ar)
+        image = image.resize((nw,nh), Image.BICUBIC)
+
+        #------------------------------------------#
+        #   将图像多余的部分加上灰条
+        #------------------------------------------#
+        dx = int(self.rand(0, w-nw))
+        dy = int(self.rand(0, h-nh))
+        new_image = Image.new('RGB', (w,h), (128,128,128))
+        new_image.paste(image, (dx, dy))
+        image = new_image
        #------------------------------------------#
        #   翻转图像
        #------------------------------------------#
@@ -134,15 +178,19 @@ class YoloDataset(Dataset):
        #---------------------------------#
        if len(rbox)>0:
            np.random.shuffle(rbox)
+            rbox[:, 0] = rbox[:, 0]*nw/w + dx/w
+            rbox[:, 1] = rbox[:, 1]*nh/h + dy/h
+            rbox[:, 2] = rbox[:, 2]*nw/w
+            rbox[:, 3] = rbox[:, 3]*nh/h
            if flip: 
                rbox[:, 0] = 1 - rbox[:, 0]
                rbox[:, 4] *= -1
        # 查看旋转框是否正确
-        # draw = ImageDraw.Draw(image)
-        # polys = rbox2poly(rbox[..., :5])*w
-        # for poly in polys:
-        #     draw.polygon(xy=list(poly))
-        # image.show()
+        draw = ImageDraw.Draw(image)
+        polys = rbox2poly(rbox[..., :5])*w
+        for poly in polys:
+            draw.polygon(xy=list(poly))
+        image.show()
        return image_data, rbox
    
    def merge_bboxes(self, bboxes, cutx, cuty):
@@ -218,15 +266,20 @@ class YoloDataset(Dataset):
            #   保存框的位置
            #---------------------------------#
            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
-            
+            #------------------------------#
+            #   将polygon转换为rbox
+            #------------------------------#
+            rbox    = np.zeros((box.shape[0], 6))
+            rbox[..., :5] = poly2rbox(box[..., :8], (ih, iw), use_pi=True)
+            rbox[..., 5]  = box[..., 8]
            #---------------------------------#
            #   是否翻转图片
            #---------------------------------#
            flip = self.rand()<.5
-            if flip and len(box)>0:
+            if flip and len(rbox)>0:
                image = image.transpose(Image.FLIP_LEFT_RIGHT)
-                box[:, [0,2]] = iw - box[:, [2,0]]
-
+                rbox[:, 0] = 1 - rbox[:, 0]
+                rbox[:, 4] *= -1
            #------------------------------------------#
            #   对图像进行缩放并且进行长和宽的扭曲
            #------------------------------------------#
@@ -322,15 +375,21 @@ class YoloDataset(Dataset):

        return new_image, new_boxes

-    def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2):
+    def get_random_data_with_MixUp(self, image_1, rbox_1, image_2, rbox_2):
        new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5
-        if len(box_1) == 0:
-            new_boxes = box_2
-        elif len(box_2) == 0:
-            new_boxes = box_1
+        if len(rbox_1) == 0:
+            new_rboxes = rbox_2
+        elif len(rbox_2) == 0:
+            new_rboxes = rbox_1
        else:
-            new_boxes = np.concatenate([box_1, box_2], axis=0)
-        return new_image, new_boxes
+            new_rboxes = np.concatenate([rbox_1, rbox_2], axis=0)
+        # 查看旋转框是否正确
+        draw = ImageDraw.Draw(new_image)
+        polys = rbox2poly(new_rboxes[..., :5])*640
+        for poly in polys:
+            draw.polygon(xy=list(poly))
+        new_image.show()
+        return new_image, new_rboxes
    
    
 # DataLoader中collate_fn使用

--- a/yolo.py
+++ b/yolo.py
@@ -25,7 +25,7 @@ class YOLO(object):
        #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
        #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
        #--------------------------------------------------------------------------#
-        "model_path"        : 'model_data/ep010-loss0.039-val_loss0.032.pth',
+        "model_path"        : 'model_data/ep100-loss0.022-val_loss0.034.pth',
        "classes_path"      : 'model_data/ssdd_classes.txt',
        #---------------------------------------------------------------------#
        #   anchors_path代表先验框对应的txt文件，一般不修改。
@@ -157,12 +157,9 @@ class YOLO(object):
            top_label   = np.array(results[0][:, 7], dtype = 'int32')
            top_conf    = results[0][:, 5] * results[0][:, 6]
            top_rboxes  = results[0][:, :5]
+            top_rboxes[:, [0, 2]]  *= image_shape[1]
+            top_rboxes[:, [1, 3]]  *= image_shape[0]
            top_polys   = rbox2poly(top_rboxes)
-            #---------------------------------------------------------#
-            #   将归一化的预测结果变为真实的预测框
-            #---------------------------------------------------------#
-            top_polys[..., [0, 2, 4, 6]] *= image_shape[1]
-            top_polys[..., [1, 3, 5, 7]] *= image_shape[0]
        #---------------------------------------------------------#
        #   设置字体与边框厚度
        #---------------------------------------------------------#
@@ -381,12 +378,9 @@ class YOLO(object):
            top_label   = np.array(results[0][:, 7], dtype = 'int32')
            top_conf    = results[0][:, 5] * results[0][:, 6]
            top_rboxes  = results[0][:, :5]
+            top_rboxes[:, [0, 2]]  *= image_shape[1]
+            top_rboxes[:, [1, 3]]  *= image_shape[0]
            top_polys   = rbox2poly(top_rboxes)
-            #---------------------------------------------------------#
-            #   将归一化的预测结果变为真实的预测框
-            #---------------------------------------------------------#
-            top_polys[..., [0, 2, 4, 6]] *= image_shape[1]
-            top_polys[..., [1, 3, 5, 7]] *= image_shape[0]
            top_hbbs    = poly2hbb(top_polys)
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]