提交 66cc7347 编写于 作者: E Egrt

修改数据加载为obb

上级 6b5ec0ad
因为 它太大了无法显示 source diff 。你可以改为 查看blob
此差异已折叠。
......@@ -137,7 +137,7 @@ class YOLOLoss(nn.Module):
#-------------------------------------------#
xy = prediction_pos[:, :2].sigmoid() * 2. - 0.5
wh = (prediction_pos[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
angle = (prediction_pos[:, 4:5].sigmoid() - 0.5) * torch.pi
angle = (prediction_pos[:, 4:5].sigmoid() - 0.5) * math.pi
box_theta = torch.cat((xy, wh, angle), 1)
#-------------------------------------------#
# 对真实框进行处理,映射到特征层上
......@@ -150,7 +150,7 @@ class YOLOLoss(nn.Module):
# 计算预测框和真实框的回归损失
#-------------------------------------------#
kldloss = self.kldbbox(box_theta, selected_tbox_theta)
loss += kldloss.mean()
box_loss += kldloss.mean()
#-------------------------------------------#
# 根据预测结果的iou获得置信度损失的gt
#-------------------------------------------#
......@@ -299,7 +299,7 @@ class YOLOLoss(nn.Module):
grid = torch.stack([gi, gj], dim=1).type_as(fg_pred)
pxy = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + grid) * self.stride[i]
pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i]
pangle = (fg_pred[:, 4:5].sigmoid() - 0.5) * torch.pi
pangle = (fg_pred[:, 4:5].sigmoid() - 0.5) * math.pi
pxywh = torch.cat([pxy, pwh, pangle], dim=-1)
pxyxys.append(pxywh)
......
......@@ -41,7 +41,7 @@ if __name__ == "__main__":
# Cuda 是否使用Cuda
# 没有GPU可以设置成False
#---------------------------------#
Cuda = False
Cuda = True
#---------------------------------------------------------------------#
# distributed 用于指定是否使用单机多卡分布式运行
# 终端指令仅支持Ubuntu。CUDA_VISIBLE_DEVICES用于在Ubuntu下指定显卡。
......
......@@ -74,8 +74,8 @@ class YoloDataset(Dataset):
# 序号为4的部分,为真实框的旋转角度
# 序号为5的部分,为真实框的种类
#---------------------------------------------------#
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
# box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
# box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
#---------------------------------------------------#
# 调整顺序,符合训练的格式
......@@ -105,102 +105,8 @@ class YoloDataset(Dataset):
# 获得预测框
#------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if not random:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
#---------------------------------#
# 将图像多余的部分加上灰条
#---------------------------------#
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
return image_data, box
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
#------------------------------------------#
# 将图像多余的部分加上灰条
#------------------------------------------#
dx = int(self.rand(0, w-nw))
dy = int(self.rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
#------------------------------------------#
# 翻转图像
#------------------------------------------#
flip = self.rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
image_data = np.array(image, np.uint8)
#---------------------------------#
# 对图像进行色域变换
# 计算色域变换的参数
#---------------------------------#
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#---------------------------------#
# 将图像转到HSV上
#---------------------------------#
hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
dtype = image_data.dtype
#---------------------------------#
# 应用变换
#---------------------------------#
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
image = image.resize((w,h), Image.BICUBIC)
image_data = np.array(image, np.float32)
return image_data, box
......
......@@ -56,7 +56,7 @@ def convert_annotation(year, image_id, list_file):
continue
cls_id = classes.index(cls)
xmlbox = obj.find('rotated_bndbox')
b = (int(float(xmlbox.find('rotated_bbox_cx').text)), int(float(xmlbox.find('rotated_bbox_cy').text)), int(float(xmlbox.find('rotated_bbox_w').text)), int(float(xmlbox.find('rotated_bbox_h').text)), float(xmlbox.find('rotated_bbox_theta').text))
b = (int(float(xmlbox.find('rotated_bbox_cx').text)), int(float(xmlbox.find('rotated_bbox_cy').text)), int(float(xmlbox.find('rotated_bbox_w').text)), int(float(xmlbox.find('rotated_bbox_h').text)), int(float(xmlbox.find('rotated_bbox_theta').text)))
list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
nums[classes.index(cls)] = nums[classes.index(cls)] + 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册