import numpy as np from utils.cyops import kitti_utils, roipool3d_utils, iou3d_utils CLOSE_RANDOM = False def get_proposal_target_func(cfg, mode='TRAIN'): def sample_rois_for_rcnn(roi_boxes3d, gt_boxes3d): """ :param roi_boxes3d: (B, M, 7) :param gt_boxes3d: (B, N, 8) [x, y, z, h, w, l, ry, cls] :return batch_rois: (B, N, 7) batch_gt_of_rois: (B, N, 8) batch_roi_iou: (B, N) """ batch_size = roi_boxes3d.shape[0] #batch_size = 1 fg_rois_per_image = int(np.round(cfg.RCNN.FG_RATIO * cfg.RCNN.ROI_PER_IMAGE)) batch_rois = np.zeros((batch_size, cfg.RCNN.ROI_PER_IMAGE, 7)) batch_gt_of_rois = np.zeros((batch_size, cfg.RCNN.ROI_PER_IMAGE, 7)) batch_roi_iou = np.zeros((batch_size, cfg.RCNN.ROI_PER_IMAGE)) for idx in range(batch_size): cur_roi, cur_gt = roi_boxes3d[idx], gt_boxes3d[idx] k = cur_gt.shape[0] - 1 while cur_gt[k].sum() == 0: k -= 1 cur_gt = cur_gt[:k + 1] # include gt boxes in the candidate rois iou3d = iou3d_utils.boxes_iou3d(cur_roi, cur_gt[:, 0:7]) # (M, N) max_overlaps = np.max(iou3d, axis=1) gt_assignment = np.argmax(iou3d, axis=1) # sample fg, easy_bg, hard_bg fg_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) fg_inds = np.where(max_overlaps >= fg_thresh)[0].reshape(-1) # TODO: this will mix the fg and bg when CLS_BG_THRESH_LO < iou < CLS_BG_THRESH # fg_inds = torch.cat((fg_inds, roi_assignment), dim=0) # consider the roi which has max_iou with gt as fg easy_bg_inds = np.where(max_overlaps < cfg.RCNN.CLS_BG_THRESH_LO)[0].reshape(-1) hard_bg_inds = np.where((max_overlaps < cfg.RCNN.CLS_BG_THRESH) & (max_overlaps >= cfg.RCNN.CLS_BG_THRESH_LO))[0].reshape(-1) fg_num_rois = fg_inds.shape[0] bg_num_rois = hard_bg_inds.shape[0] + easy_bg_inds.shape[0] if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) if CLOSE_RANDOM: fg_inds = fg_inds[:fg_rois_per_this_image] else: rand_num = np.random.permutation(fg_num_rois) fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE - fg_rois_per_this_image bg_inds = sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg rand_num = np.floor(np.random.rand(cfg.RCNN.ROI_PER_IMAGE) * fg_num_rois) # rand_num = torch.from_numpy(rand_num).type_as(gt_boxes3d).long() fg_inds = fg_inds[rand_num] fg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE bg_rois_per_this_image = 0 elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE bg_inds = sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) fg_rois_per_this_image = 0 else: import pdb pdb.set_trace() raise NotImplementedError # augment the rois by noise roi_list, roi_iou_list, roi_gt_list = [], [], [] if fg_rois_per_this_image > 0: fg_rois_src = cur_roi[fg_inds] gt_of_fg_rois = cur_gt[gt_assignment[fg_inds]] iou3d_src = max_overlaps[fg_inds] fg_rois, fg_iou3d = aug_roi_by_noise( fg_rois_src, gt_of_fg_rois, iou3d_src, aug_times=cfg.RCNN.ROI_FG_AUG_TIMES) roi_list.append(fg_rois) roi_iou_list.append(fg_iou3d) roi_gt_list.append(gt_of_fg_rois) if bg_rois_per_this_image > 0: bg_rois_src = cur_roi[bg_inds] gt_of_bg_rois = cur_gt[gt_assignment[bg_inds]] iou3d_src = max_overlaps[bg_inds] aug_times = 1 if cfg.RCNN.ROI_FG_AUG_TIMES > 0 else 0 bg_rois, bg_iou3d = aug_roi_by_noise( bg_rois_src, gt_of_bg_rois, iou3d_src, aug_times=aug_times) roi_list.append(bg_rois) roi_iou_list.append(bg_iou3d) roi_gt_list.append(gt_of_bg_rois) rois = np.concatenate(roi_list, axis=0) iou_of_rois = np.concatenate(roi_iou_list, axis=0) gt_of_rois = np.concatenate(roi_gt_list, axis=0) batch_rois[idx] = rois batch_gt_of_rois[idx] = gt_of_rois batch_roi_iou[idx] = iou_of_rois return batch_rois, batch_gt_of_rois, batch_roi_iou def sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image): if hard_bg_inds.shape[0] > 0 and easy_bg_inds.shape[0] > 0: hard_bg_rois_num = int(bg_rois_per_this_image * cfg.RCNN.HARD_BG_RATIO) easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num # sampling hard bg if CLOSE_RANDOM: rand_idx = list(np.arange(0,hard_bg_inds.shape[0]))*hard_bg_rois_num rand_idx = rand_idx[:hard_bg_rois_num] else: rand_idx = np.random.randint(low=0, high=hard_bg_inds.shape[0], size=(hard_bg_rois_num,)) hard_bg_inds = hard_bg_inds[rand_idx] # sampling easy bg if CLOSE_RANDOM: rand_idx = list(np.arange(0,easy_bg_inds.shape[0]))*easy_bg_rois_num rand_idx = rand_idx[:easy_bg_rois_num] else: rand_idx = np.random.randint(low=0, high=easy_bg_inds.shape[0], size=(easy_bg_rois_num,)) easy_bg_inds = easy_bg_inds[rand_idx] bg_inds = np.concatenate([hard_bg_inds, easy_bg_inds], axis=0) elif hard_bg_inds.shape[0] > 0 and easy_bg_inds.shape[0] == 0: hard_bg_rois_num = bg_rois_per_this_image # sampling hard bg rand_idx = np.random.randint(low=0, high=hard_bg_inds.shape[0], size=(hard_bg_rois_num,)) bg_inds = hard_bg_inds[rand_idx] elif hard_bg_inds.shape[0] == 0 and easy_bg_inds.shape[0] > 0: easy_bg_rois_num = bg_rois_per_this_image # sampling easy bg rand_idx = np.random.randint(low=0, high=easy_bg_inds.shape[0], size=(easy_bg_rois_num,)) bg_inds = easy_bg_inds[rand_idx] else: raise NotImplementedError return bg_inds def aug_roi_by_noise(roi_boxes3d, gt_boxes3d, iou3d_src, aug_times=10): iou_of_rois = np.zeros(roi_boxes3d.shape[0]).astype(gt_boxes3d.dtype) pos_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) for k in range(roi_boxes3d.shape[0]): temp_iou = cnt = 0 roi_box3d = roi_boxes3d[k] gt_box3d = gt_boxes3d[k].reshape(1, 7) aug_box3d = roi_box3d keep = True while temp_iou < pos_thresh and cnt < aug_times: if True: #np.random.rand() < 0.2: aug_box3d = roi_box3d # p=0.2 to keep the original roi box keep = True else: aug_box3d = random_aug_box3d(roi_box3d) keep = False aug_box3d = aug_box3d.reshape((1, 7)) iou3d = iou3d_utils.boxes_iou3d(aug_box3d, gt_box3d) temp_iou = iou3d[0][0] cnt += 1 roi_boxes3d[k] = aug_box3d.reshape(-1) if cnt == 0 or keep: iou_of_rois[k] = iou3d_src[k] else: iou_of_rois[k] = temp_iou return roi_boxes3d, iou_of_rois def random_aug_box3d(box3d): """ :param box3d: (7) [x, y, z, h, w, l, ry] random shift, scale, orientation """ if cfg.RCNN.REG_AUG_METHOD == 'single': pos_shift = (np.random.rand(3) - 0.5) # [-0.5 ~ 0.5] hwl_scale = (np.random.rand(3) - 0.5) / (0.5 / 0.15) + 1.0 # angle_rot = (np.random.rand(1) - 0.5) / (0.5 / (np.pi / 12)) # [-pi/12 ~ pi/12] aug_box3d = np.concatenate([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, box3d[6:7] + angle_rot], axis=0) return aug_box3d elif cfg.RCNN.REG_AUG_METHOD == 'multiple': # pos_range, hwl_range, angle_range, mean_iou range_config = [[0.2, 0.1, np.pi / 12, 0.7], [0.3, 0.15, np.pi / 12, 0.6], [0.5, 0.15, np.pi / 9, 0.5], [0.8, 0.15, np.pi / 6, 0.3], [1.0, 0.15, np.pi / 3, 0.2]] idx = np.random.randint(low=0, high=len(range_config), size=(1,))[0] pos_shift = ((np.random.rand(3) - 0.5) / 0.5) * range_config[idx][0] hwl_scale = ((np.random.rand(3) - 0.5) / 0.5) * range_config[idx][1] + 1.0 angle_rot = ((np.random.rand(1) - 0.5) / 0.5) * range_config[idx][2] aug_box3d = np.concatenate([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, box3d[6:7] + angle_rot], axis=0) return aug_box3d elif cfg.RCNN.REG_AUG_METHOD == 'normal': x_shift = np.random.normal(loc=0, scale=0.3) y_shift = np.random.normal(loc=0, scale=0.2) z_shift = np.random.normal(loc=0, scale=0.3) h_shift = np.random.normal(loc=0, scale=0.25) w_shift = np.random.normal(loc=0, scale=0.15) l_shift = np.random.normal(loc=0, scale=0.5) ry_shift = ((np.random.rand() - 0.5) / 0.5) * np.pi / 12 aug_box3d = np.array([box3d[0] + x_shift, box3d[1] + y_shift, box3d[2] + z_shift, box3d[3] + h_shift, box3d[4] + w_shift, box3d[5] + l_shift, box3d[6] + ry_shift], dtype=np.float32) aug_box3d = aug_box3d.astype(box3d.dtype) return aug_box3d else: raise NotImplementedError def data_augmentation(pts, rois, gt_of_rois): """ :param pts: (B, M, 512, 3) :param rois: (B, M. 7) :param gt_of_rois: (B, M, 7) :return: """ batch_size, boxes_num = pts.shape[0], pts.shape[1] # rotation augmentation angles = (np.random.rand(batch_size, boxes_num) - 0.5 / 0.5) * (np.pi / cfg.AUG_ROT_RANGE) # calculate gt alpha from gt_of_rois temp_x, temp_z, temp_ry = gt_of_rois[:, :, 0], gt_of_rois[:, :, 2], gt_of_rois[:, :, 6] temp_beta = np.arctan2(temp_z, temp_x) gt_alpha = -np.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry # (B, M) temp_x, temp_z, temp_ry = rois[:, :, 0], rois[:, :, 2], rois[:, :, 6] temp_beta = np.arctan2(temp_z, temp_x) roi_alpha = -np.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry # (B, M) for k in range(batch_size): pts[k] = kitti_utils.rotate_pc_along_y_np(pts[k], angles[k]) gt_of_rois[k] = np.squeeze(kitti_utils.rotate_pc_along_y_np( np.expand_dims(gt_of_rois[k], axis=1), angles[k]), axis=1) rois[k] = np.squeeze(kitti_utils.rotate_pc_along_y_np( np.expand_dims(rois[k], axis=1), angles[k]),axis=1) # calculate the ry after rotation temp_x, temp_z = gt_of_rois[:, :, 0], gt_of_rois[:, :, 2] temp_beta = np.arctan2(temp_z, temp_x) gt_of_rois[:, :, 6] = np.sign(temp_beta) * np.pi / 2 + gt_alpha - temp_beta temp_x, temp_z = rois[:, :, 0], rois[:, :, 2] temp_beta = np.arctan2(temp_z, temp_x) rois[:, :, 6] = np.sign(temp_beta) * np.pi / 2 + roi_alpha - temp_beta # scaling augmentation scales = 1 + ((np.random.rand(batch_size, boxes_num) - 0.5) / 0.5) * 0.05 pts = pts * np.expand_dims(np.expand_dims(scales, axis=2), axis=3) gt_of_rois[:, :, 0:6] = gt_of_rois[:, :, 0:6] * np.expand_dims(scales, axis=2) rois[:, :, 0:6] = rois[:, :, 0:6] * np.expand_dims(scales, axis=2) # flip augmentation flip_flag = np.sign(np.random.rand(batch_size, boxes_num) - 0.5) pts[:, :, :, 0] = pts[:, :, :, 0] * np.expand_dims(flip_flag, axis=2) gt_of_rois[:, :, 0] = gt_of_rois[:, :, 0] * flip_flag # flip orientation: ry > 0: pi - ry, ry < 0: -pi - ry src_ry = gt_of_rois[:, :, 6] ry = (flip_flag == 1).astype(np.float32) * src_ry + (flip_flag == -1).astype(np.float32) * (np.sign(src_ry) * np.pi - src_ry) gt_of_rois[:, :, 6] = ry rois[:, :, 0] = rois[:, :, 0] * flip_flag # flip orientation: ry > 0: pi - ry, ry < 0: -pi - ry src_ry = rois[:, :, 6] ry = (flip_flag == 1).astype(np.float32) * src_ry + (flip_flag == -1).astype(np.float32) * (np.sign(src_ry) * np.pi - src_ry) rois[:, :, 6] = ry return pts, rois, gt_of_rois def generate_proposal_target(seg_mask,rpn_features,gt_boxes3d,rpn_xyz,pts_depth,roi_boxes3d,rpn_intensity): seg_mask = np.array(seg_mask) features = np.array(rpn_features) gt_boxes3d = np.array(gt_boxes3d) rpn_xyz = np.array(rpn_xyz) pts_depth = np.array(pts_depth) roi_boxes3d = np.array(roi_boxes3d) rpn_intensity = np.array(rpn_intensity) batch_rois, batch_gt_of_rois, batch_roi_iou = sample_rois_for_rcnn(roi_boxes3d, gt_boxes3d) if cfg.RCNN.USE_INTENSITY: pts_extra_input_list = [np.expand_dims(rpn_intensity, axis=2), np.expand_dims(seg_mask, axis=2)] else: pts_extra_input_list = [np.expand_dims(seg_mask, axis=2)] if cfg.RCNN.USE_DEPTH: pts_depth = pts_depth / 70.0 - 0.5 pts_extra_input_list.append(np.expand_dims(pts_depth, axis=2)) pts_extra_input = np.concatenate(pts_extra_input_list, axis=2) # point cloud pooling pts_feature = np.concatenate((pts_extra_input, rpn_features), axis=2) batch_rois = batch_rois.astype(np.float32) pooled_features, pooled_empty_flag = roipool3d_utils.roipool3d_gpu( rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH, sampled_pt_num=cfg.RCNN.NUM_POINTS ) sampled_pts, sampled_features = pooled_features[:, :, :, 0:3], pooled_features[:, :, :, 3:] # data augmentation if cfg.AUG_DATA: # data augmentation sampled_pts, batch_rois, batch_gt_of_rois = \ data_augmentation(sampled_pts, batch_rois, batch_gt_of_rois) # canonical transformation batch_size = batch_rois.shape[0] roi_ry = batch_rois[:, :, 6] % (2 * np.pi) roi_center = batch_rois[:, :, 0:3] sampled_pts = sampled_pts - np.expand_dims(roi_center, axis=2) # (B, M, 512, 3) batch_gt_of_rois[:, :, 0:3] = batch_gt_of_rois[:, :, 0:3] - roi_center batch_gt_of_rois[:, :, 6] = batch_gt_of_rois[:, :, 6] - roi_ry for k in range(batch_size): sampled_pts[k] = kitti_utils.rotate_pc_along_y_np(sampled_pts[k], batch_rois[k, :, 6]) batch_gt_of_rois[k] = np.squeeze(kitti_utils.rotate_pc_along_y_np( np.expand_dims(batch_gt_of_rois[k], axis=1), roi_ry[k]), axis=1) # regression valid mask valid_mask = (pooled_empty_flag == 0) reg_valid_mask = ((batch_roi_iou > cfg.RCNN.REG_FG_THRESH) & valid_mask).astype(np.float32) # classification label batch_cls_label = (batch_roi_iou > cfg.RCNN.CLS_FG_THRESH).astype(np.int64) invalid_mask = (batch_roi_iou > cfg.RCNN.CLS_BG_THRESH) & (batch_roi_iou < cfg.RCNN.CLS_FG_THRESH) batch_cls_label[valid_mask == 0] = -1 batch_cls_label[invalid_mask > 0] = -1 output_dict = {'sampled_pts': sampled_pts.reshape(-1, cfg.RCNN.NUM_POINTS, 3).astype(np.float32), 'pts_feature': sampled_features.reshape(-1, cfg.RCNN.NUM_POINTS, sampled_features.shape[3]).astype(np.float32), 'cls_label': batch_cls_label.reshape(-1), 'reg_valid_mask': reg_valid_mask.reshape(-1).astype(np.float32), 'gt_of_rois': batch_gt_of_rois.reshape(-1, 7).astype(np.float32), 'gt_iou': batch_roi_iou.reshape(-1).astype(np.float32), 'roi_boxes3d': batch_rois.reshape(-1, 7).astype(np.float32)} return output_dict.values() return generate_proposal_target if __name__ == "__main__": input_dict = {} input_dict['roi_boxes3d'] = np.load("models/rpn_data/roi_boxes3d.npy") input_dict['gt_boxes3d'] = np.load("models/rpn_data/gt_boxes3d.npy") input_dict['rpn_xyz'] = np.load("models/rpn_data/rpn_xyz.npy") input_dict['rpn_features'] = np.load("models/rpn_data/rpn_features.npy") input_dict['rpn_intensity'] = np.load("models/rpn_data/rpn_intensity.npy") input_dict['seg_mask'] = np.load("models/rpn_data/seg_mask.npy") input_dict['pts_depth'] = np.load("models/rpn_data/pts_depth.npy") for k, v in input_dict.items(): print(k, v.shape, np.sum(np.abs(v))) input_dict[k] = np.expand_dims(v, axis=0) from utils.config import cfg cfg.RPN.LOC_XZ_FINE = True cfg.TEST.RPN_DISTANCE_BASED_PROPOSE = False cfg.RPN.NMS_TYPE = 'rotate' proposal_target_func = get_proposal_target_func(cfg) out_dict = proposal_target_func(input_dict['seg_mask'],input_dict['rpn_features'],input_dict['gt_boxes3d'], input_dict['rpn_xyz'],input_dict['pts_depth'],input_dict['roi_boxes3d'],input_dict['rpn_intensity']) for key in out_dict.keys(): print("name:{}, shape{}".format(key,out_dict[key].shape))