修改数据加载为obb

66cc7347 · Egrt · 6b5ec0ad · 66cc7347 · 66cc7347 · 66cc7347
6 changed file
--- a/2007_train.txt
+++ b/2007_train.txt
--- a/2007_val.txt
+++ b/2007_val.txt
--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
@@ -137,7 +137,7 @@ class YOLOLoss(nn.Module):
                #-------------------------------------------#
                xy      = prediction_pos[:, :2].sigmoid() * 2. - 0.5
                wh      = (prediction_pos[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
-                angle   = (prediction_pos[:, 4:5].sigmoid() - 0.5) * torch.pi
+                angle   = (prediction_pos[:, 4:5].sigmoid() - 0.5) * math.pi
                box_theta = torch.cat((xy, wh, angle), 1)
                #-------------------------------------------#
                #   对真实框进行处理，映射到特征层上
@@ -150,7 +150,7 @@ class YOLOLoss(nn.Module):
                #   计算预测框和真实框的回归损失
                #-------------------------------------------#
                kldloss                 = self.kldbbox(box_theta, selected_tbox_theta)
-                loss                    += kldloss.mean()
+                box_loss                += kldloss.mean()
                #-------------------------------------------#
                #   根据预测结果的iou获得置信度损失的gt
                #-------------------------------------------#
@@ -299,7 +299,7 @@ class YOLOLoss(nn.Module):
                grid    = torch.stack([gi, gj], dim=1).type_as(fg_pred)
                pxy     = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + grid) * self.stride[i]
                pwh     = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i]
-                pangle  = (fg_pred[:, 4:5].sigmoid() - 0.5) * torch.pi
+                pangle  = (fg_pred[:, 4:5].sigmoid() - 0.5) * math.pi
                pxywh   = torch.cat([pxy, pwh, pangle], dim=-1)
                pxyxys.append(pxywh)
            

--- a/train.py
+++ b/train.py
@@ -41,7 +41,7 @@ if __name__ == "__main__":
    #   Cuda    是否使用Cuda
    #           没有GPU可以设置成False
    #---------------------------------#
-    Cuda            = False
+    Cuda            = True
    #---------------------------------------------------------------------#
    #   distributed     用于指定是否使用单机多卡分布式运行
    #                   终端指令仅支持Ubuntu。CUDA_VISIBLE_DEVICES用于在Ubuntu下指定显卡。

--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -74,8 +74,8 @@ class YoloDataset(Dataset):
            #   序号为4的部分，为真实框的旋转角度
            #   序号为5的部分，为真实框的种类
            #---------------------------------------------------#
-            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
-            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
+            # box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
+            # box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
            
            #---------------------------------------------------#
            #   调整顺序，符合训练的格式
@@ -105,102 +105,8 @@ class YoloDataset(Dataset):
        #   获得预测框
        #------------------------------#
        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
-
-        if not random:
-            scale = min(w/iw, h/ih)
-            nw = int(iw*scale)
-            nh = int(ih*scale)
-            dx = (w-nw)//2
-            dy = (h-nh)//2
-
-            #---------------------------------#
-            #   将图像多余的部分加上灰条
-            #---------------------------------#
-            image       = image.resize((nw,nh), Image.BICUBIC)
-            new_image   = Image.new('RGB', (w,h), (128,128,128))
-            new_image.paste(image, (dx, dy))
-            image_data  = np.array(new_image, np.float32)
-
-            #---------------------------------#
-            #   对真实框进行调整
-            #---------------------------------#
-            if len(box)>0:
-                np.random.shuffle(box)
-                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
-                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
-                box[:, 0:2][box[:, 0:2]<0] = 0
-                box[:, 2][box[:, 2]>w] = w
-                box[:, 3][box[:, 3]>h] = h
-                box_w = box[:, 2] - box[:, 0]
-                box_h = box[:, 3] - box[:, 1]
-                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
-
-            return image_data, box
-                
-        #------------------------------------------#
-        #   对图像进行缩放并且进行长和宽的扭曲
-        #------------------------------------------#
-        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
-        scale = self.rand(.25, 2)
-        if new_ar < 1:
-            nh = int(scale*h)
-            nw = int(nh*new_ar)
-        else:
-            nw = int(scale*w)
-            nh = int(nw/new_ar)
-        image = image.resize((nw,nh), Image.BICUBIC)
-
-        #------------------------------------------#
-        #   将图像多余的部分加上灰条
-        #------------------------------------------#
-        dx = int(self.rand(0, w-nw))
-        dy = int(self.rand(0, h-nh))
-        new_image = Image.new('RGB', (w,h), (128,128,128))
-        new_image.paste(image, (dx, dy))
-        image = new_image
-
-        #------------------------------------------#
-        #   翻转图像
-        #------------------------------------------#
-        flip = self.rand()<.5
-        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
-
-        image_data      = np.array(image, np.uint8)
-        #---------------------------------#
-        #   对图像进行色域变换
-        #   计算色域变换的参数
-        #---------------------------------#
-        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
-        #---------------------------------#
-        #   将图像转到HSV上
-        #---------------------------------#
-        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
-        dtype           = image_data.dtype
-        #---------------------------------#
-        #   应用变换
-        #---------------------------------#
-        x       = np.arange(0, 256, dtype=r.dtype)
-        lut_hue = ((x * r[0]) % 180).astype(dtype)
-        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
-        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
-
-        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
-        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
-
-        #---------------------------------#
-        #   对真实框进行调整
-        #---------------------------------#
-        if len(box)>0:
-            np.random.shuffle(box)
-            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
-            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
-            if flip: box[:, [0,2]] = w - box[:, [2,0]]
-            box[:, 0:2][box[:, 0:2]<0] = 0
-            box[:, 2][box[:, 2]>w] = w
-            box[:, 3][box[:, 3]>h] = h
-            box_w = box[:, 2] - box[:, 0]
-            box_h = box[:, 3] - box[:, 1]
-            box = box[np.logical_and(box_w>1, box_h>1)] 
+        image   = image.resize((w,h), Image.BICUBIC)
+        image_data  = np.array(image, np.float32)
        
        return image_data, box
    

--- a/voc_annotation.py
+++ b/voc_annotation.py
@@ -56,7 +56,7 @@ def convert_annotation(year, image_id, list_file):
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('rotated_bndbox')
-        b = (int(float(xmlbox.find('rotated_bbox_cx').text)), int(float(xmlbox.find('rotated_bbox_cy').text)), int(float(xmlbox.find('rotated_bbox_w').text)), int(float(xmlbox.find('rotated_bbox_h').text)), float(xmlbox.find('rotated_bbox_theta').text))
+        b = (int(float(xmlbox.find('rotated_bbox_cx').text)), int(float(xmlbox.find('rotated_bbox_cy').text)), int(float(xmlbox.find('rotated_bbox_w').text)), int(float(xmlbox.find('rotated_bbox_h').text)), int(float(xmlbox.find('rotated_bbox_theta').text)))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
        
        nums[classes.index(cls)] = nums[classes.index(cls)] + 1