import numpy as np import torch import math from utils.utils_rbox import * from utils.nms_rotated import obb_nms class DecodeBox(): def __init__(self, anchors, num_classes, input_shape, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]): super(DecodeBox, self).__init__() self.anchors = anchors self.num_classes = num_classes self.bbox_attrs = 6 + num_classes self.input_shape = input_shape #-----------------------------------------------------------# # 13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401] # 26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146] # 52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28] #-----------------------------------------------------------# self.anchors_mask = anchors_mask def decode_box(self, inputs): outputs = [] for i, input in enumerate(inputs): #-----------------------------------------------# # 输入的input一共有三个,他们的shape分别是 # batch_size = 1 # batch_size, 3 * (5 + 1 + 80), 20, 20 # batch_size, 255, 40, 40 # batch_size, 255, 80, 80 #-----------------------------------------------# batch_size = input.size(0) input_height = input.size(2) input_width = input.size(3) #-----------------------------------------------# # 输入为640x640时 # stride_h = stride_w = 32、16、8 #-----------------------------------------------# stride_h = self.input_shape[0] / input_height stride_w = self.input_shape[1] / input_width #-------------------------------------------------# # 此时获得的scaled_anchors大小是相对于特征层的 #-------------------------------------------------# scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors[self.anchors_mask[i]]] #-----------------------------------------------# # 输入的input一共有三个,他们的shape分别是 # batch_size, 3, 20, 20, 85 # batch_size, 3, 40, 40, 85 # batch_size, 3, 80, 80, 85 #-----------------------------------------------# prediction = input.view(batch_size, len(self.anchors_mask[i]), self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous() #-----------------------------------------------# # 先验框的中心位置的调整参数 #-----------------------------------------------# x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) #-----------------------------------------------# # 先验框的宽高调整参数 #-----------------------------------------------# w = torch.sigmoid(prediction[..., 2]) h = torch.sigmoid(prediction[..., 3]) #-----------------------------------------------# # 获取旋转角度 #-----------------------------------------------# angle = torch.sigmoid(prediction[..., 4]) #-----------------------------------------------# # 获得置信度,是否有物体 #-----------------------------------------------# conf = torch.sigmoid(prediction[..., 5]) #-----------------------------------------------# # 种类置信度 #-----------------------------------------------# pred_cls = torch.sigmoid(prediction[..., 6:]) FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor #----------------------------------------------------------# # 生成网格,先验框中心,网格左上角 # batch_size,3,20,20 #----------------------------------------------------------# grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat( batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor) grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat( batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor) #----------------------------------------------------------# # 按照网格格式生成先验框的宽高 # batch_size,3,20,20 #----------------------------------------------------------# anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape) anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape) #----------------------------------------------------------# # 利用预测结果对先验框进行调整 # 首先调整先验框的中心,从先验框中心向右下角偏移 # 再调整先验框的宽高。 # x 0 ~ 1 => 0 ~ 2 => -0.5, 1.5 => 负责一定范围的目标的预测 # y 0 ~ 1 => 0 ~ 2 => -0.5, 1.5 => 负责一定范围的目标的预测 # w 0 ~ 1 => 0 ~ 2 => 0 ~ 4 => 先验框的宽高调节范围为0~4倍 # h 0 ~ 1 => 0 ~ 2 => 0 ~ 4 => 先验框的宽高调节范围为0~4倍 #----------------------------------------------------------# pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data * 2. - 0.5 + grid_x pred_boxes[..., 1] = y.data * 2. - 0.5 + grid_y pred_boxes[..., 2] = (w.data * 2) ** 2 * anchor_w pred_boxes[..., 3] = (h.data * 2) ** 2 * anchor_h pred_theta = (angle.data - 0.5) * math.pi #----------------------------------------------------------# # 将输出结果归一化成小数的形式 #----------------------------------------------------------# _scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor) output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale, pred_theta.view(batch_size, -1, 1), conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1) outputs.append(output.data) return outputs def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, nms_thres=0.4): #----------------------------------------------------------# # prediction [batch_size, num_anchors, 85] #----------------------------------------------------------# output = [None for _ in range(len(prediction))] for i, image_pred in enumerate(prediction): #----------------------------------------------------------# # 对种类预测部分取max。 # class_conf [num_anchors, 1] 种类置信度 # class_pred [num_anchors, 1] 种类 #----------------------------------------------------------# class_conf, class_pred = torch.max(image_pred[:, 6:6 + num_classes], 1, keepdim=True) #----------------------------------------------------------# # 利用置信度进行第一轮筛选 #----------------------------------------------------------# conf_mask = (image_pred[:, 5] * class_conf[:, 0] >= conf_thres).squeeze() #----------------------------------------------------------# # 根据置信度进行预测结果的筛选 #----------------------------------------------------------# image_pred = image_pred[conf_mask] class_conf = class_conf[conf_mask] class_pred = class_pred[conf_mask] if not image_pred.size(0): continue #-------------------------------------------------------------------------# # detections [num_anchors, 8] # 8的内容为:x, y, w, h, angle, obj_conf, class_conf, class_pred #-------------------------------------------------------------------------# detections = torch.cat((image_pred[:, :6], class_conf.float(), class_pred.float()), 1) #------------------------------------------# # 获得预测结果中包含的所有种类 #------------------------------------------# unique_labels = detections[:, -1].cpu().unique() if prediction.is_cuda: unique_labels = unique_labels.cuda() detections = detections.cuda() for c in unique_labels: #------------------------------------------# # 获得某一类得分筛选后全部的预测结果 #------------------------------------------# detections_class = detections[detections[:, -1] == c] #------------------------------------------# # 使用官方自带的非极大抑制会速度更快一些! # 筛选出一定区域内,属于同一种类得分最大的框 #------------------------------------------# _, keep = obb_nms( detections_class[:, :5], detections_class[:, 5] * detections_class[:, 6], nms_thres ) max_detections = detections_class[keep] # Add max detections to outputs output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections)) if output[i] is not None: output[i] = output[i].cpu().numpy() output[i][:, :5] = self.yolo_correct_boxes(output[i], input_shape, image_shape, letterbox_image) return output def yolo_correct_boxes(self, output, input_shape, image_shape, letterbox_image): #-----------------------------------------------------------------# # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 #-----------------------------------------------------------------# box_xy = output[..., 0:2] box_wh = output[..., 2:4] angle = output[..., 4:5] box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] input_shape = np.array(input_shape) image_shape = np.array(image_shape) if letterbox_image: #-----------------------------------------------------------------# # 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况 # new_shape指的是宽高缩放情况 #-----------------------------------------------------------------# new_shape = np.round(image_shape * np.min(input_shape/image_shape)) offset = (input_shape - new_shape)/2./input_shape scale = input_shape/new_shape box_yx = (box_yx - offset) * scale box_hw *= scale box_xy = box_yx[:, ::-1] box_hw = box_wh[:, ::-1] rboxes = np.concatenate([box_xy, box_wh, angle], axis=-1) rboxes[:, [0, 2]] *= image_shape[1] rboxes[:, [1, 3]] *= image_shape[0] return rboxes if __name__ == "__main__": import matplotlib.pyplot as plt import numpy as np #---------------------------------------------------# # 将预测值的每个特征层调成真实值 #---------------------------------------------------# def get_anchors_and_decode(input, input_shape, anchors, anchors_mask, num_classes): #-----------------------------------------------# # input batch_size, 3 * (5 + 1 + num_classes), 20, 20 #-----------------------------------------------# batch_size = input.size(0) input_height = input.size(2) input_width = input.size(3) #-----------------------------------------------# # 输入为640x640时 input_shape = [640, 640] input_height = 20, input_width = 20 # 640 / 20 = 32 # stride_h = stride_w = 32 #-----------------------------------------------# stride_h = input_shape[0] / input_height stride_w = input_shape[1] / input_width #-------------------------------------------------# # 此时获得的scaled_anchors大小是相对于特征层的 # anchor_width, anchor_height / stride_h, stride_w #-------------------------------------------------# scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in anchors[anchors_mask[2]]] #-----------------------------------------------# # batch_size, 3 * (4 + 1 + num_classes), 20, 20 => # batch_size, 3, 5 + num_classes, 20, 20 => # batch_size, 3, 20, 20, 4 + 1 + num_classes #-----------------------------------------------# prediction = input.view(batch_size, len(anchors_mask[2]), num_classes + 6, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous() #-----------------------------------------------# # 先验框的中心位置的调整参数 #-----------------------------------------------# x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) #-----------------------------------------------# # 先验框的宽高调整参数 #-----------------------------------------------# w = torch.sigmoid(prediction[..., 2]) h = torch.sigmoid(prediction[..., 3]) #-----------------------------------------------# # 获得置信度,是否有物体 0 - 1 #-----------------------------------------------# conf = torch.sigmoid(prediction[..., 5]) #-----------------------------------------------# # 种类置信度 0 - 1 #-----------------------------------------------# pred_cls = torch.sigmoid(prediction[..., 6:]) FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor #----------------------------------------------------------# # 生成网格,先验框中心,网格左上角 # batch_size,3,20,20 # range(20) # [ # [0, 1, 2, 3 ……, 19], # [0, 1, 2, 3 ……, 19], # …… (20次) # [0, 1, 2, 3 ……, 19] # ] * (batch_size * 3) # [batch_size, 3, 20, 20] # # [ # [0, 1, 2, 3 ……, 19], # [0, 1, 2, 3 ……, 19], # …… (20次) # [0, 1, 2, 3 ……, 19] # ].T * (batch_size * 3) # [batch_size, 3, 20, 20] #----------------------------------------------------------# grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat( batch_size * len(anchors_mask[2]), 1, 1).view(x.shape).type(FloatTensor) grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat( batch_size * len(anchors_mask[2]), 1, 1).view(y.shape).type(FloatTensor) #----------------------------------------------------------# # 按照网格格式生成先验框的宽高 # batch_size, 3, 20 * 20 => batch_size, 3, 20, 20 # batch_size, 3, 20 * 20 => batch_size, 3, 20, 20 #----------------------------------------------------------# anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape) anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape) #----------------------------------------------------------# # 利用预测结果对先验框进行调整 # 首先调整先验框的中心,从先验框中心向右下角偏移 # 再调整先验框的宽高。 # x 0 ~ 1 => 0 ~ 2 => -0.5 ~ 1.5 + grid_x # y 0 ~ 1 => 0 ~ 2 => -0.5 ~ 1.5 + grid_y # w 0 ~ 1 => 0 ~ 2 => 0 ~ 4 * anchor_w # h 0 ~ 1 => 0 ~ 2 => 0 ~ 4 * anchor_h #----------------------------------------------------------# pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data * 2. - 0.5 + grid_x pred_boxes[..., 1] = y.data * 2. - 0.5 + grid_y pred_boxes[..., 2] = (w.data * 2) ** 2 * anchor_w pred_boxes[..., 3] = (h.data * 2) ** 2 * anchor_h point_h = 5 point_w = 5 box_xy = pred_boxes[..., 0:2].cpu().numpy() * 32 box_wh = pred_boxes[..., 2:4].cpu().numpy() * 32 grid_x = grid_x.cpu().numpy() * 32 grid_y = grid_y.cpu().numpy() * 32 anchor_w = anchor_w.cpu().numpy() * 32 anchor_h = anchor_h.cpu().numpy() * 32 fig = plt.figure() ax = fig.add_subplot(121) from PIL import Image img = Image.open("img/street.jpg").resize([640, 640]) plt.imshow(img, alpha=0.5) plt.ylim(-30, 650) plt.xlim(-30, 650) plt.scatter(grid_x, grid_y) plt.scatter(point_h * 32, point_w * 32, c='black') plt.gca().invert_yaxis() anchor_left = grid_x - anchor_w / 2 anchor_top = grid_y - anchor_h / 2 rect1 = plt.Rectangle([anchor_left[0, 0, point_h, point_w],anchor_top[0, 0, point_h, point_w]], \ anchor_w[0, 0, point_h, point_w],anchor_h[0, 0, point_h, point_w],color="r",fill=False) rect2 = plt.Rectangle([anchor_left[0, 1, point_h, point_w],anchor_top[0, 1, point_h, point_w]], \ anchor_w[0, 1, point_h, point_w],anchor_h[0, 1, point_h, point_w],color="r",fill=False) rect3 = plt.Rectangle([anchor_left[0, 2, point_h, point_w],anchor_top[0, 2, point_h, point_w]], \ anchor_w[0, 2, point_h, point_w],anchor_h[0, 2, point_h, point_w],color="r",fill=False) ax.add_patch(rect1) ax.add_patch(rect2) ax.add_patch(rect3) ax = fig.add_subplot(122) plt.imshow(img, alpha=0.5) plt.ylim(-30, 650) plt.xlim(-30, 650) plt.scatter(grid_x, grid_y) plt.scatter(point_h * 32, point_w * 32, c='black') plt.scatter(box_xy[0, :, point_h, point_w, 0], box_xy[0, :, point_h, point_w, 1], c='r') plt.gca().invert_yaxis() pre_left = box_xy[...,0] - box_wh[...,0] / 2 pre_top = box_xy[...,1] - box_wh[...,1] / 2 rect1 = plt.Rectangle([pre_left[0, 0, point_h, point_w], pre_top[0, 0, point_h, point_w]],\ box_wh[0, 0, point_h, point_w,0], box_wh[0, 0, point_h, point_w,1],color="r",fill=False) rect2 = plt.Rectangle([pre_left[0, 1, point_h, point_w], pre_top[0, 1, point_h, point_w]],\ box_wh[0, 1, point_h, point_w,0], box_wh[0, 1, point_h, point_w,1],color="r",fill=False) rect3 = plt.Rectangle([pre_left[0, 2, point_h, point_w], pre_top[0, 2, point_h, point_w]],\ box_wh[0, 2, point_h, point_w,0], box_wh[0, 2, point_h, point_w,1],color="r",fill=False) ax.add_patch(rect1) ax.add_patch(rect2) ax.add_patch(rect3) plt.show() # feat = torch.from_numpy(np.random.normal(0.2, 0.5, [4, 258, 20, 20])).float() anchors = np.array([[116, 90], [156, 198], [373, 326], [30,61], [62,45], [59,119], [10,13], [16,30], [33,23]]) anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] get_anchors_and_decode(feat, [640, 640], anchors, anchors_mask, 80)