diff --git a/get_map.py b/get_map.py index ccda5eb56385dc6f0f3bc11f8cc6556ab470fb81..a46eb6dfcd28c58f5a7ecd5ed4b8b319b73c713a 100644 --- a/get_map.py +++ b/get_map.py @@ -3,9 +3,10 @@ import xml.etree.ElementTree as ET from PIL import Image from tqdm import tqdm - +import numpy as np from utils.utils import get_classes from utils.utils_map import get_coco_map, get_map +from utils.utils_rbox import poly2hbb from yolo import YOLO if __name__ == "__main__": @@ -24,12 +25,12 @@ if __name__ == "__main__": # map_mode为3代表仅仅计算VOC_map。 # map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行 #-------------------------------------------------------------------------------------------------------------------# - map_mode = 0 + map_mode = 3 #--------------------------------------------------------------------------------------# # 此处的classes_path用于指定需要测量VOC_map的类别 # 一般情况下与训练和预测所用的classes_path一致即可 #--------------------------------------------------------------------------------------# - classes_path = 'model_data/voc_classes.txt' + classes_path = 'model_data/ssdd_classes.txt' #--------------------------------------------------------------------------------------# # MINOVERLAP用于指定想要获得的mAP0.x,mAP0.x的意义是什么请同学们百度一下。 # 比如计算mAP0.75,可以设定MINOVERLAP = 0.75。 @@ -115,12 +116,22 @@ if __name__ == "__main__": obj_name = obj.find('name').text if obj_name not in class_names: continue - bndbox = obj.find('bndbox') - left = bndbox.find('xmin').text - top = bndbox.find('ymin').text - right = bndbox.find('xmax').text - bottom = bndbox.find('ymax').text - + bndbox = obj.find('rotated_bndbox') + x1 = bndbox.find('x1').text + y1 = bndbox.find('y1').text + x2 = bndbox.find('x2').text + y2 = bndbox.find('y2').text + x3 = bndbox.find('x3').text + y3 = bndbox.find('y3').text + x4 = bndbox.find('x4').text + y4 = bndbox.find('y4').text + poly = np.array([[x1, y1, x2, y2, x3, y3, x4, y4]], dtype=np.int32) + hbb = poly2hbb(poly) + xc, yc, w, h = hbb[0] + left = xc - w/2 + top = yc - h/2 + right = xc + w/2 + bottom = yc + h/2 if difficult_flag: new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) else: diff --git a/train.py b/train.py index 6f23256b206623edd73d69bd0c5630cdb4b158dd..64973789e9908e078e22c2be573543f1ae94015a 100644 --- a/train.py +++ b/train.py @@ -195,7 +195,7 @@ if __name__ == "__main__": # Init_lr 模型的最大学习率 # Min_lr 模型的最小学习率,默认为最大学习率的0.01 #------------------------------------------------------------------# - Init_lr = 1e-2 + Init_lr = 1e-3 Min_lr = Init_lr * 0.01 #------------------------------------------------------------------# # optimizer_type 使用到的优化器种类,可选的有adam、sgd @@ -205,9 +205,9 @@ if __name__ == "__main__": # weight_decay 权值衰减,可防止过拟合 # adam会导致weight_decay错误,使用adam时建议设置为0。 #------------------------------------------------------------------# - optimizer_type = "sgd" + optimizer_type = "adam" momentum = 0.937 - weight_decay = 5e-4 + weight_decay = 0 #------------------------------------------------------------------# # lr_decay_type 使用到的学习率下降方式,可选的有step、cos #------------------------------------------------------------------# diff --git a/utils/dataloader.py b/utils/dataloader.py index fe0c13b6247363b858aa187f018863c05d18de52..9aae33ddbde9921053af597338b90bb2d3352cf9 100644 --- a/utils/dataloader.py +++ b/utils/dataloader.py @@ -101,8 +101,48 @@ class YoloDataset(Dataset): # 图像调整 #---------------------------------# image = image.resize((w,h), Image.BICUBIC) - image_data = np.array(image, np.float32) + image_data = np.array(image, np.uint8) + #------------------------------------------# + # 翻转图像 + #------------------------------------------# + flip = self.rand()<.5 + if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) + + image_data = np.array(image, np.uint8) + #---------------------------------# + # 对图像进行色域变换 + # 计算色域变换的参数 + #---------------------------------# + r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 + #---------------------------------# + # 将图像转到HSV上 + #---------------------------------# + hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) + dtype = image_data.dtype + #---------------------------------# + # 应用变换 + #---------------------------------# + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) + image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB) + #---------------------------------# + # 对真实框进行调整 + #---------------------------------# + if len(rbox)>0: + np.random.shuffle(rbox) + if flip: + rbox[:, 0] = 1 - rbox[:, 0] + rbox[:, 4] *= -1 + # 查看旋转框是否正确 + # draw = ImageDraw.Draw(image) + # polys = rbox2poly(rbox[..., :5])*w + # for poly in polys: + # draw.polygon(xy=list(poly)) + # image.show() return image_data, rbox def merge_bboxes(self, bboxes, cutx, cuty): diff --git a/yolo.py b/yolo.py index dce3a4d9cd02e5637d4fcc3dcffb797e1aabc84c..fe5d4e5dc7dbd573d5651946deab48d3e90368c8 100644 --- a/yolo.py +++ b/yolo.py @@ -11,7 +11,7 @@ from nets.yolo import YoloBody from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input, resize_image, show_config) from utils.utils_bbox import DecodeBox -from utils.utils_rbox import rbox2poly +from utils.utils_rbox import rbox2poly, poly2hbb ''' 训练自己的数据集必看注释! ''' @@ -25,7 +25,7 @@ class YOLO(object): # 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。 # 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改 #--------------------------------------------------------------------------# - "model_path" : 'logs/best_epoch_weights.pth', + "model_path" : 'model_data/ep010-loss0.039-val_loss0.032.pth', "classes_path" : 'model_data/ssdd_classes.txt', #---------------------------------------------------------------------# # anchors_path代表先验框对应的txt文件,一般不修改。 @@ -46,7 +46,7 @@ class YOLO(object): #---------------------------------------------------------------------# # 只有得分大于置信度的预测框会被保留下来 #---------------------------------------------------------------------# - "confidence" : 0.05, + "confidence" : 0.3, #---------------------------------------------------------------------# # 非极大抑制所用到的nms_iou大小 #---------------------------------------------------------------------# @@ -55,7 +55,7 @@ class YOLO(object): # 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, # 在多次测试后,发现关闭letterbox_image直接resize的效果更好 #---------------------------------------------------------------------# - "letterbox_image" : True, + "letterbox_image" : False, #-------------------------------# # 是否使用Cuda # 没有GPU可以设置成False @@ -198,7 +198,7 @@ class YOLO(object): text_origin = np.array([poly[0], poly[1]], np.int32) draw.polygon(xy=polygon_list, outline=self.colors[c]) - draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) + draw.text(text_origin, str(label,'UTF-8'), fill=self.colors[c], font=font) del draw return image @@ -378,16 +378,26 @@ class YOLO(object): if results[0] is None: return - top_label = np.array(results[0][:, 6], dtype = 'int32') - top_conf = results[0][:, 4] * results[0][:, 5] - top_boxes = results[0][:, :4] - + top_label = np.array(results[0][:, 7], dtype = 'int32') + top_conf = results[0][:, 5] * results[0][:, 6] + top_rboxes = results[0][:, :5] + top_polys = rbox2poly(top_rboxes) + #---------------------------------------------------------# + # 将归一化的预测结果变为真实的预测框 + #---------------------------------------------------------# + top_polys[..., [0, 2, 4, 6]] *= image_shape[1] + top_polys[..., [1, 3, 5, 7]] *= image_shape[0] + top_hbbs = poly2hbb(top_polys) for i, c in list(enumerate(top_label)): predicted_class = self.class_names[int(c)] - box = top_boxes[i] + hbb = top_hbbs[i] score = str(top_conf[i]) - top, left, bottom, right = box + xc, yc, w, h = hbb + left = xc - w/2 + top = yc - h/2 + right = xc + w/2 + bottom = yc + h/2 if predicted_class not in class_names: continue