diff --git a/train.py b/train.py index d449b4bf42b9afb7009bc83dda7b9a231364df08..64973789e9908e078e22c2be573543f1ae94015a 100644 --- a/train.py +++ b/train.py @@ -41,7 +41,7 @@ if __name__ == "__main__": # Cuda 是否使用Cuda # 没有GPU可以设置成False #---------------------------------# - Cuda = True + Cuda = False #---------------------------------------------------------------------# # distributed 用于指定是否使用单机多卡分布式运行 # 终端指令仅支持Ubuntu。CUDA_VISIBLE_DEVICES用于在Ubuntu下指定显卡。 diff --git a/utils/callbacks.py b/utils/callbacks.py index 1c79015e1f8eed0d630af532ec789128cb3079ef..a378ac2c99c020164cf641c10dd3c0ec048d4e9d 100644 --- a/utils/callbacks.py +++ b/utils/callbacks.py @@ -148,12 +148,9 @@ class EvalCallback(): top_label = np.array(results[0][:, 7], dtype = 'int32') top_conf = results[0][:, 5] * results[0][:, 6] top_rboxes = results[0][:, :5] + top_rboxes[:, [0, 2]] *= image_shape[1] + top_rboxes[:, [1, 3]] *= image_shape[0] top_polys = rbox2poly(top_rboxes) - #---------------------------------------------------------# - # 将归一化的预测结果变为真实的预测框 - #---------------------------------------------------------# - top_polys[..., [0, 2, 4, 6]] *= image_shape[1] - top_polys[..., [1, 3, 5, 7]] *= image_shape[0] top_hbbs = poly2hbb(top_polys) top_100 = np.argsort(top_conf)[::-1][:self.max_boxes] top_hbbs = top_hbbs[top_100] diff --git a/utils/dataloader.py b/utils/dataloader.py index 9aae33ddbde9921053af597338b90bb2d3352cf9..13868cc6b885bf1fc43a25942413e1878c2b3c63 100644 --- a/utils/dataloader.py +++ b/utils/dataloader.py @@ -45,12 +45,12 @@ class YoloDataset(Dataset): lines = sample(self.annotation_lines, 3) lines.append(self.annotation_lines[index]) shuffle(lines) - image, box = self.get_random_data_with_Mosaic(lines, self.input_shape) + image, rbox = self.get_random_data_with_Mosaic(lines, self.input_shape) if self.mixup and self.rand() < self.mixup_prob: lines = sample(self.annotation_lines, 1) - image_2, box_2 = self.get_random_data(lines[0], self.input_shape, random = self.train) - image, box = self.get_random_data_with_MixUp(image, box, image_2, box_2) + image_2, rbox_2 = self.get_random_data(lines[0], self.input_shape, random = self.train) + image, rbox = self.get_random_data_with_MixUp(image, rbox, image_2, rbox_2) else: image, rbox = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train) @@ -97,11 +97,55 @@ class YoloDataset(Dataset): rbox = np.zeros((box.shape[0], 6)) rbox[..., :5] = poly2rbox(box[..., :8], (ih, iw), use_pi=True) rbox[..., 5] = box[..., 8] - #---------------------------------# - # 图像调整 - #---------------------------------# - image = image.resize((w,h), Image.BICUBIC) - image_data = np.array(image, np.uint8) + + if not random: + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + dx = (w-nw)//2 + dy = (h-nh)//2 + + #---------------------------------# + # 将图像多余的部分加上灰条 + #---------------------------------# + image = image.resize((nw,nh), Image.BICUBIC) + new_image = Image.new('RGB', (w,h), (128,128,128)) + new_image.paste(image, (dx, dy)) + image_data = np.array(new_image, np.float32) + + #---------------------------------# + # 对真实框进行调整 + #---------------------------------# + if len(rbox)>0: + np.random.shuffle(rbox) + rbox[:, 0] = rbox[:, 0]*nw/w + dx/w + rbox[:, 1] = rbox[:, 1]*nh/h + dy/h + rbox[:, 2] = rbox[:, 2]*nw/w + rbox[:, 3] = rbox[:, 3]*nh/h + + return image_data, rbox + + #------------------------------------------# + # 对图像进行缩放并且进行长和宽的扭曲 + #------------------------------------------# + new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) + scale = self.rand(.25, 2) + if new_ar < 1: + nh = int(scale*h) + nw = int(nh*new_ar) + else: + nw = int(scale*w) + nh = int(nw/new_ar) + image = image.resize((nw,nh), Image.BICUBIC) + + #------------------------------------------# + # 将图像多余的部分加上灰条 + #------------------------------------------# + dx = int(self.rand(0, w-nw)) + dy = int(self.rand(0, h-nh)) + new_image = Image.new('RGB', (w,h), (128,128,128)) + new_image.paste(image, (dx, dy)) + image = new_image #------------------------------------------# # 翻转图像 #------------------------------------------# @@ -134,15 +178,19 @@ class YoloDataset(Dataset): #---------------------------------# if len(rbox)>0: np.random.shuffle(rbox) + rbox[:, 0] = rbox[:, 0]*nw/w + dx/w + rbox[:, 1] = rbox[:, 1]*nh/h + dy/h + rbox[:, 2] = rbox[:, 2]*nw/w + rbox[:, 3] = rbox[:, 3]*nh/h if flip: rbox[:, 0] = 1 - rbox[:, 0] rbox[:, 4] *= -1 # 查看旋转框是否正确 - # draw = ImageDraw.Draw(image) - # polys = rbox2poly(rbox[..., :5])*w - # for poly in polys: - # draw.polygon(xy=list(poly)) - # image.show() + draw = ImageDraw.Draw(image) + polys = rbox2poly(rbox[..., :5])*w + for poly in polys: + draw.polygon(xy=list(poly)) + image.show() return image_data, rbox def merge_bboxes(self, bboxes, cutx, cuty): @@ -218,15 +266,20 @@ class YoloDataset(Dataset): # 保存框的位置 #---------------------------------# box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) - + #------------------------------# + # 将polygon转换为rbox + #------------------------------# + rbox = np.zeros((box.shape[0], 6)) + rbox[..., :5] = poly2rbox(box[..., :8], (ih, iw), use_pi=True) + rbox[..., 5] = box[..., 8] #---------------------------------# # 是否翻转图片 #---------------------------------# flip = self.rand()<.5 - if flip and len(box)>0: + if flip and len(rbox)>0: image = image.transpose(Image.FLIP_LEFT_RIGHT) - box[:, [0,2]] = iw - box[:, [2,0]] - + rbox[:, 0] = 1 - rbox[:, 0] + rbox[:, 4] *= -1 #------------------------------------------# # 对图像进行缩放并且进行长和宽的扭曲 #------------------------------------------# @@ -322,15 +375,21 @@ class YoloDataset(Dataset): return new_image, new_boxes - def get_random_data_with_MixUp(self, image_1, box_1, image_2, box_2): + def get_random_data_with_MixUp(self, image_1, rbox_1, image_2, rbox_2): new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5 - if len(box_1) == 0: - new_boxes = box_2 - elif len(box_2) == 0: - new_boxes = box_1 + if len(rbox_1) == 0: + new_rboxes = rbox_2 + elif len(rbox_2) == 0: + new_rboxes = rbox_1 else: - new_boxes = np.concatenate([box_1, box_2], axis=0) - return new_image, new_boxes + new_rboxes = np.concatenate([rbox_1, rbox_2], axis=0) + # 查看旋转框是否正确 + draw = ImageDraw.Draw(new_image) + polys = rbox2poly(new_rboxes[..., :5])*640 + for poly in polys: + draw.polygon(xy=list(poly)) + new_image.show() + return new_image, new_rboxes # DataLoader中collate_fn使用 diff --git a/yolo.py b/yolo.py index fe5d4e5dc7dbd573d5651946deab48d3e90368c8..6777fc21d799f474b680afb13627713c70e6af56 100644 --- a/yolo.py +++ b/yolo.py @@ -25,7 +25,7 @@ class YOLO(object): # 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。 # 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改 #--------------------------------------------------------------------------# - "model_path" : 'model_data/ep010-loss0.039-val_loss0.032.pth', + "model_path" : 'model_data/ep100-loss0.022-val_loss0.034.pth', "classes_path" : 'model_data/ssdd_classes.txt', #---------------------------------------------------------------------# # anchors_path代表先验框对应的txt文件,一般不修改。 @@ -157,12 +157,9 @@ class YOLO(object): top_label = np.array(results[0][:, 7], dtype = 'int32') top_conf = results[0][:, 5] * results[0][:, 6] top_rboxes = results[0][:, :5] + top_rboxes[:, [0, 2]] *= image_shape[1] + top_rboxes[:, [1, 3]] *= image_shape[0] top_polys = rbox2poly(top_rboxes) - #---------------------------------------------------------# - # 将归一化的预测结果变为真实的预测框 - #---------------------------------------------------------# - top_polys[..., [0, 2, 4, 6]] *= image_shape[1] - top_polys[..., [1, 3, 5, 7]] *= image_shape[0] #---------------------------------------------------------# # 设置字体与边框厚度 #---------------------------------------------------------# @@ -381,12 +378,9 @@ class YOLO(object): top_label = np.array(results[0][:, 7], dtype = 'int32') top_conf = results[0][:, 5] * results[0][:, 6] top_rboxes = results[0][:, :5] + top_rboxes[:, [0, 2]] *= image_shape[1] + top_rboxes[:, [1, 3]] *= image_shape[0] top_polys = rbox2poly(top_rboxes) - #---------------------------------------------------------# - # 将归一化的预测结果变为真实的预测框 - #---------------------------------------------------------# - top_polys[..., [0, 2, 4, 6]] *= image_shape[1] - top_polys[..., [1, 3, 5, 7]] *= image_shape[0] top_hbbs = poly2hbb(top_polys) for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)]