# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. """loss""" import sys from functools import reduce sys.dont_write_bytecode = True # ----------------------------------------- # custom modules # ----------------------------------------- import paddle.fluid as fluid import paddle from paddle.fluid.dygraph import to_variable sys.path.append("../../") from lib.rpn_util import * class RPN_3D_loss(fluid.dygraph.Layer): def __init__(self, conf): super(RPN_3D_loss, self).__init__() self.num_classes = len(conf.lbls) + 1 self.num_anchors = conf.anchors.shape[0] self.anchors = conf.anchors self.bbox_means = conf.bbox_means self.bbox_stds = conf.bbox_stds self.feat_stride = conf.feat_stride self.fg_fraction = conf.fg_fraction self.box_samples = conf.box_samples self.ign_thresh = conf.ign_thresh self.nms_thres = conf.nms_thres self.fg_thresh = conf.fg_thresh self.bg_thresh_lo = conf.bg_thresh_lo self.bg_thresh_hi = conf.bg_thresh_hi self.best_thresh = conf.best_thresh self.hard_negatives = conf.hard_negatives self.focal_loss = conf.focal_loss self.crop_size = conf.crop_size self.cls_2d_lambda = conf.cls_2d_lambda self.iou_2d_lambda = conf.iou_2d_lambda self.bbox_2d_lambda = conf.bbox_2d_lambda self.bbox_3d_lambda = conf.bbox_3d_lambda self.bbox_3d_proj_lambda = conf.bbox_3d_proj_lambda self.lbls = conf.lbls self.ilbls = conf.ilbls self.min_gt_vis = conf.min_gt_vis self.min_gt_h = conf.min_gt_h self.max_gt_h = conf.max_gt_h def forward(self, cls, prob, bbox_2d, bbox_3d, imobjs, feat_size): stats = [] loss = np.array([0]).astype('float32') loss = to_variable(loss) FG_ENC = 1000 BG_ENC = 2000 IGN_FLAG = 3000 batch_size = cls.shape[0] prob_detach = prob.detach().numpy() bbox_x = bbox_2d[:, :, 0] bbox_y = bbox_2d[:, :, 1] bbox_w = bbox_2d[:, :, 2] bbox_h = bbox_2d[:, :, 3] bbox_x3d = bbox_3d[:, :, 0] bbox_y3d = bbox_3d[:, :, 1] bbox_z3d = bbox_3d[:, :, 2] bbox_w3d = bbox_3d[:, :, 3] bbox_h3d = bbox_3d[:, :, 4] bbox_l3d = bbox_3d[:, :, 5] bbox_ry3d = bbox_3d[:, :, 6] bbox_x3d_proj = np.zeros(bbox_x3d.shape) bbox_y3d_proj = np.zeros(bbox_x3d.shape) bbox_z3d_proj = np.zeros(bbox_x3d.shape) labels = np.zeros(cls.shape[0:2]) labels_weight = np.zeros(cls.shape[0:2]) labels_scores = np.zeros(cls.shape[0:2]) bbox_x_tar = np.zeros(cls.shape[0:2]) bbox_y_tar = np.zeros(cls.shape[0:2]) bbox_w_tar = np.zeros(cls.shape[0:2]) bbox_h_tar = np.zeros(cls.shape[0:2]) bbox_x3d_tar = np.zeros(cls.shape[0:2]) bbox_y3d_tar = np.zeros(cls.shape[0:2]) bbox_z3d_tar = np.zeros(cls.shape[0:2]) bbox_w3d_tar = np.zeros(cls.shape[0:2]) bbox_h3d_tar = np.zeros(cls.shape[0:2]) bbox_l3d_tar = np.zeros(cls.shape[0:2]) bbox_ry3d_tar = np.zeros(cls.shape[0:2]) bbox_x3d_proj_tar = np.zeros(cls.shape[0:2]) bbox_y3d_proj_tar = np.zeros(cls.shape[0:2]) bbox_z3d_proj_tar = np.zeros(cls.shape[0:2]) bbox_weights = np.zeros(cls.shape[0:2]) ious_2d = np.zeros(cls.shape[0:2]) ious_3d = np.zeros(cls.shape[0:2]) coords_abs_z = np.zeros(cls.shape[0:2]) coords_abs_ry = np.zeros(cls.shape[0:2]) # get all rois # rois' type now is nparray rois = locate_anchors(self.anchors, feat_size, self.feat_stride) rois = rois.astype('float32') #bbox_3d dtype is Variable, so bbox_3d_dn is bbox_x3d_dn = bbox_x3d * self.bbox_stds[:, 4][0] + self.bbox_means[:, 4][0] bbox_y3d_dn = bbox_y3d * self.bbox_stds[:, 5][0] + self.bbox_means[:, 5][0] bbox_z3d_dn = bbox_z3d * self.bbox_stds[:, 6][0] + self.bbox_means[:, 6][0] bbox_w3d_dn = bbox_w3d * self.bbox_stds[:, 7][0] + self.bbox_means[:, 7][0] bbox_h3d_dn = bbox_h3d * self.bbox_stds[:, 8][0] + self.bbox_means[:, 8][0] bbox_l3d_dn = bbox_l3d * self.bbox_stds[:, 9][0] + self.bbox_means[:, 9][0] bbox_ry3d_dn = bbox_ry3d * self.bbox_stds[:, 10][ 0] + self.bbox_means[:, 10][0] src_anchors = self.anchors[rois[:, 4].astype('int64'), :] #nparray src_anchors = src_anchors.astype('float32') src_anchors = to_variable(src_anchors) #Variable src_anchors.stop_gradient = True if len(src_anchors.shape) == 1: src_anchors = fluid.layers.unsqueeze(input=src_anchors, axis=0) # compute 3d transform #the following four all are nparrays widths = rois[:, 2] - rois[:, 0] + 1.0 heights = rois[:, 3] - rois[:, 1] + 1.0 ctr_x = rois[:, 0] + 0.5 * widths ctr_y = rois[:, 1] + 0.5 * heights ctr_x_unsqueeze = fluid.layers.unsqueeze( input=to_variable(ctr_x), axes=0) ctr_y_unsqueeze = fluid.layers.unsqueeze( input=to_variable(ctr_y), axes=0) widths_unsqueeze = fluid.layers.unsqueeze( input=to_variable(widths), axes=0) heights_unsqueeze = fluid.layers.unsqueeze( input=to_variable(heights), axes=0) bbox_z3d_unsqueeze = fluid.layers.unsqueeze( input=src_anchors[:, 4], axes=0) bbox_w3d_unsqueeze = fluid.layers.unsqueeze( input=src_anchors[:, 5], axes=0) bbox_h3d_unsqueeze = fluid.layers.unsqueeze( input=src_anchors[:, 6], axes=0) bbox_l3d_unsqueeze = fluid.layers.unsqueeze( input=src_anchors[:, 7], axes=0) bbox_ry3d_unsqueeze = fluid.layers.unsqueeze( input=src_anchors[:, 8], axes=0) bbox_x3d_dn = bbox_x3d_dn * widths_unsqueeze + ctr_x_unsqueeze bbox_y3d_dn = bbox_y3d_dn * heights_unsqueeze + ctr_y_unsqueeze bbox_z3d_dn = bbox_z3d_unsqueeze + bbox_z3d_dn bbox_w3d_dn = fluid.layers.exp(bbox_w3d_dn) * bbox_w3d_unsqueeze bbox_h3d_dn = fluid.layers.exp(bbox_h3d_dn) * bbox_h3d_unsqueeze bbox_l3d_dn = fluid.layers.exp(bbox_l3d_dn) * bbox_l3d_unsqueeze bbox_ry3d_dn = bbox_ry3d_unsqueeze + bbox_ry3d_dn ious_2d_var_list = [] for bind in range(0, batch_size): imobj = imobjs[bind] gts = imobj.gts p2_inv = to_variable(imobj.p2_inv).astype('float32') # filter gts igns, rmvs = determine_ignores(gts, self.lbls, self.ilbls, self.min_gt_vis, self.min_gt_h) # accumulate boxes gts_all = bbXYWH2Coords(np.array([gt.bbox_full for gt in gts])) gts_3d = np.array([gt.bbox_3d for gt in gts]) if not ((rmvs == False) & (igns == False)).any(): continue # filter out irrelevant cls, and ignore cls gts_val = gts_all[(rmvs == False) & (igns == False), :] gts_ign = gts_all[(rmvs == False) & (igns == True), :] gts_3d = gts_3d[(rmvs == False) & (igns == False), :] # accumulate labels box_lbls = np.array([gt.cls for gt in gts]) box_lbls = box_lbls[(rmvs == False) & (igns == False)] box_lbls = np.array( [clsName2Ind(self.lbls, cls) for cls in box_lbls]) if gts_val.shape[0] > 0 or gts_ign.shape[0] > 0: # bbox regression transforms, ols, raw_gt = compute_targets( gts_val, gts_ign, box_lbls, rois, self.fg_thresh, self.ign_thresh, self.bg_thresh_lo, self.bg_thresh_hi, self.best_thresh, anchors=self.anchors, gts_3d=gts_3d, tracker=rois[:, 4]) # normalize 2d transforms[:, 0:4] -= self.bbox_means[:, 0:4] transforms[:, 0:4] /= self.bbox_stds[:, 0:4] # normalize 3d transforms[:, 5:12] -= self.bbox_means[:, 4:] transforms[:, 5:12] /= self.bbox_stds[:, 4:] labels_fg = transforms[:, 4] > 0 labels_bg = transforms[:, 4] < 0 labels_ign = transforms[:, 4] == 0 fg_inds = np.flatnonzero(labels_fg) bg_inds = np.flatnonzero(labels_bg) ign_inds = np.flatnonzero(labels_ign) labels[bind, fg_inds] = transforms[fg_inds, 4] labels[bind, ign_inds] = IGN_FLAG labels[bind, bg_inds] = 0 bbox_x_tar[bind, :] = transforms[:, 0] bbox_y_tar[bind, :] = transforms[:, 1] bbox_w_tar[bind, :] = transforms[:, 2] bbox_h_tar[bind, :] = transforms[:, 3] bbox_x3d_tar[bind, :] = transforms[:, 5] bbox_y3d_tar[bind, :] = transforms[:, 6] bbox_z3d_tar[bind, :] = transforms[:, 7] bbox_w3d_tar[bind, :] = transforms[:, 8] bbox_h3d_tar[bind, :] = transforms[:, 9] bbox_l3d_tar[bind, :] = transforms[:, 10] bbox_ry3d_tar[bind, :] = transforms[:, 11] bbox_x3d_proj_tar[bind, :] = raw_gt[:, 12] bbox_y3d_proj_tar[bind, :] = raw_gt[:, 13] bbox_z3d_proj_tar[bind, :] = raw_gt[:, 14] transforms = to_variable(transforms) # ---------------------------------------- # box sampling # ---------------------------------------- if self.box_samples == np.inf: fg_num = len(fg_inds) bg_num = len(bg_inds) else: fg_num = min( round(rois.shape[0] * self.box_samples * self.fg_fraction), len(fg_inds)) bg_num = min( round(rois.shape[0] * self.box_samples - fg_num), len(bg_inds)) if self.hard_negatives: if fg_num > 0 and fg_num != fg_inds.shape[0]: scores = prob_detach[bind, fg_inds, labels[ bind, fg_inds].astype(int)] fg_score_ascend = (scores).argsort() fg_inds = fg_inds[fg_score_ascend] fg_inds = fg_inds[0:fg_num] if bg_num > 0 and bg_num != bg_inds.shape[0]: scores = prob_detach[bind, bg_inds, labels[ bind, bg_inds].astype(int)] bg_score_ascend = (scores).argsort() bg_inds = bg_inds[bg_score_ascend] bg_inds = bg_inds[0:bg_num] else: if fg_num > 0 and fg_num != fg_inds.shape[0]: fg_inds = np.random.choice( fg_inds, fg_num, replace=False) if bg_num > 0 and bg_num != bg_inds.shape[0]: bg_inds = np.random.choice( bg_inds, bg_num, replace=False) labels_weight[bind, bg_inds] = BG_ENC labels_weight[bind, fg_inds] = FG_ENC bbox_weights[bind, fg_inds] = 1 # ---------------------------------------- # compute IoU stats # ---------------------------------------- if fg_num > 0: # compile deltas pred (Variable) bbox_x_bind = bbox_x[bind, :] bbox_x_bind_unsqueeze = fluid.layers.unsqueeze( bbox_x_bind, axes=1) bbox_y_bind = bbox_y[bind, :] bbox_y_bind_unsqueeze = fluid.layers.unsqueeze( bbox_y_bind, axes=1) bbox_w_bind = bbox_w[bind, :] bbox_w_bind_unsqueeze = fluid.layers.unsqueeze( bbox_w_bind, axes=1) bbox_h_bind = bbox_h[bind, :] bbox_h_bind_unsqueeze = fluid.layers.unsqueeze( bbox_h_bind, axes=1) deltas_2d = fluid.layers.concat( (bbox_x_bind_unsqueeze, bbox_y_bind_unsqueeze, bbox_w_bind_unsqueeze, bbox_h_bind_unsqueeze), axis=1) # compile deltas targets (nparray) deltas_2d_tar = np.concatenate( (bbox_x_tar[bind, :, np.newaxis], bbox_y_tar[bind, :, np.newaxis], bbox_w_tar[bind, :, np.newaxis], bbox_h_tar[bind, :, np.newaxis]), axis=1).astype('float32') # move to gpu deltas_2d_tar = to_variable(deltas_2d_tar) deltas_2d_tar.stop_gradient = True means = self.bbox_means[0, :] stds = self.bbox_stds[0, :] #variable coords_2d = bbox_transform_inv( rois, deltas_2d, means=means, stds=stds) coords_2d_tar = bbox_transform_inv( rois, deltas_2d_tar, means=means, stds=stds) #vaiable ious_2d_var = iou(coords_2d, coords_2d_tar, mode='list') ious_2d_var_shape = ious_2d_var.shape ious_2d_fg_mask = np.zeros(ious_2d_var_shape).astype( 'float32') ious_2d_fg_mask[fg_inds] = 1 ious_2d_var = ious_2d_var * to_variable(ious_2d_fg_mask) ious_2d_var_list.append(ious_2d_var) bbox_x3d_dn_fg = bbox_x3d_dn.numpy()[bind, fg_inds] bbox_y3d_dn_fg = bbox_y3d_dn.numpy()[bind, fg_inds] src_anchors = self.anchors[rois[fg_inds, 4].astype('int64')] src_anchors = to_variable(src_anchors).astype('float32') src_anchors.stop_gradient = True if len(src_anchors.shape) == 1: src_anchors = fluid.layers.unsqueeze( input=src_anchors, axes=0) #nparray bbox_x3d_dn_fg = bbox_x3d_dn.numpy()[bind, fg_inds] bbox_y3d_dn_fg = bbox_y3d_dn.numpy()[bind, fg_inds] bbox_z3d_dn_fg = bbox_z3d_dn.numpy()[bind, fg_inds] bbox_w3d_dn_fg = bbox_w3d_dn.numpy()[bind, fg_inds] bbox_h3d_dn_fg = bbox_h3d_dn.numpy()[bind, fg_inds] bbox_l3d_dn_fg = bbox_l3d_dn.numpy()[bind, fg_inds] bbox_ry3d_dn_fg = bbox_ry3d_dn.numpy()[bind, fg_inds] # re-scale all 2D back to original bbox_x3d_dn_fg /= imobj['scale_factor'] bbox_y3d_dn_fg /= imobj['scale_factor'] coords_2d = fluid.layers.concat( (to_variable(bbox_x3d_dn_fg[np.newaxis, :] * bbox_z3d_dn_fg[np.newaxis, :]), to_variable(bbox_y3d_dn_fg[np.newaxis, :] * bbox_z3d_dn_fg[np.newaxis, :]), to_variable(bbox_z3d_dn_fg[np.newaxis, :])), axis=0) coords_2d = fluid.layers.concat( (coords_2d, to_variable(np.ones([1, coords_2d.shape[1]])).astype( 'float32')), axis=0) coords_3d = fluid.layers.matmul(p2_inv, coords_2d) bbox_x3d_proj[bind, fg_inds] = coords_3d[0, :].numpy() bbox_y3d_proj[bind, fg_inds] = coords_3d[1, :].numpy() bbox_z3d_proj[bind, fg_inds] = coords_3d[2, :].numpy() # absolute targets bbox_z3d_dn_tar = bbox_z3d_tar[ bind, fg_inds] * self.bbox_stds[:, 6][ 0] + self.bbox_means[:, 6][0] bbox_z3d_dn_tar = to_variable(bbox_z3d_dn_tar).astype( 'float32') bbox_z3d_dn_tar.stop_gradient = True bbox_z3d_dn_tar = src_anchors[:, 4] + bbox_z3d_dn_tar bbox_ry3d_dn_tar = bbox_ry3d_tar[ bind, fg_inds] * self.bbox_stds[:, 10][ 0] + self.bbox_means[:, 10][0] bbox_ry3d_dn_tar = to_variable(bbox_ry3d_dn_tar).astype( 'float32') bbox_ry3d_dn_tar.stop_gradient = True bbox_ry3d_dn_tar = src_anchors[:, 8] + bbox_ry3d_dn_tar bbox_z3d_dn_fg = to_variable(bbox_z3d_dn_fg) bbox_ry3d_dn_fg = to_variable(bbox_ry3d_dn_fg) bbox_abs_z3d_var = fluid.layers.abs(bbox_z3d_dn_tar - bbox_z3d_dn_fg) coords_abs_z[bind, fg_inds] = bbox_abs_z3d_var.numpy() bbox_abs_ry3d_var = fluid.layers.abs(bbox_ry3d_dn_tar - bbox_ry3d_dn_fg) coords_abs_ry[bind, fg_inds] = bbox_abs_ry3d_var.numpy() else: bg_inds = np.arange(0, rois.shape[0]) if self.box_samples == np.inf: bg_num = len(bg_inds) else: bg_num = min( round(self.box_samples * (1 - self.fg_fraction)), len(bg_inds)) if self.hard_negatives: if bg_num > 0 and bg_num != bg_inds.shape[0]: scores = prob_detach[bind, bg_inds, labels[ bind, bg_inds].astype(int)] bg_score_ascend = (scores).argsort() bg_inds = bg_inds[bg_score_ascend] bg_inds = bg_inds[0:bg_num] else: if bg_num > 0 and bg_num != bg_inds.shape[0]: bg_inds = np.random.choice( bg_inds, bg_num, replace=False) labels[bind, :] = 0 labels_weight[bind, bg_inds] = BG_ENC # grab label predictions (for weighing purposes) dtype: nparray active = labels[bind, :] != IGN_FLAG labels_scores[bind, active] = prob_detach[bind, active, labels[ bind, active].astype(int)] # ---------------------------------------- # useful statistics # ---------------------------------------- fg_inds_all = np.flatnonzero((labels > 0) & (labels != IGN_FLAG)) bg_inds_all = np.flatnonzero((labels == 0) & (labels != IGN_FLAG)) fg_inds_unravel = np.unravel_index(fg_inds_all, prob_detach.shape[0:2]) bg_inds_unravel = np.unravel_index(bg_inds_all, prob_detach.shape[0:2]) cls_pred = np.argmax(cls.detach().numpy(), axis=2) if self.cls_2d_lambda and len(fg_inds_all) > 0: acc_fg = np.mean( cls_pred[fg_inds_unravel] == labels[fg_inds_unravel]) stats.append({ 'name': 'fg', 'val': acc_fg, 'format': '{:0.2f}', 'group': 'acc' }) if self.cls_2d_lambda and len(bg_inds_all) > 0: acc_bg = np.mean( cls_pred[bg_inds_unravel] == labels[bg_inds_unravel]) stats.append({ 'name': 'bg', 'val': acc_bg, 'format': '{:0.2f}', 'group': 'acc' }) # ---------------------------------------- # box weighting # ---------------------------------------- fg_inds = np.flatnonzero(labels_weight == FG_ENC) bg_inds = np.flatnonzero(labels_weight == BG_ENC) active_inds = np.concatenate((fg_inds, bg_inds), axis=0) fg_num = len(fg_inds) bg_num = len(bg_inds) labels_weight[...] = 0.0 box_samples = fg_num + bg_num fg_inds_unravel = np.unravel_index(fg_inds, labels_weight.shape) bg_inds_unravel = np.unravel_index(bg_inds, labels_weight.shape) active_inds_unravel = np.unravel_index(active_inds, labels_weight.shape) labels_weight[active_inds_unravel] = 1.0 if self.fg_fraction is not None: if fg_num > 0: fg_weight = (self.fg_fraction / (1 - self.fg_fraction)) * (bg_num / fg_num) labels_weight[fg_inds_unravel] = fg_weight labels_weight[bg_inds_unravel] = 1.0 else: labels_weight[bg_inds_unravel] = 1.0 # different method of doing hard negative mining # use the scores to normalize the importance of each sample # hence, encourages the network to get all "correct" rather than # becoming more correct at a decision it is already good at # this method is equivelent to the focal loss with additional mean scaling if self.focal_loss: weights_sum = 0 # re-weight bg if bg_num > 0: bg_scores = labels_scores[bg_inds_unravel] bg_weights = (1 - bg_scores)**self.focal_loss weights_sum += np.sum(bg_weights) labels_weight[bg_inds_unravel] *= bg_weights # re-weight fg if fg_num > 0: fg_scores = labels_scores[fg_inds_unravel] fg_weights = (1 - fg_scores)**self.focal_loss weights_sum += np.sum(fg_weights) labels_weight[fg_inds_unravel] *= fg_weights # ---------------------------------------- # classification loss # ---------------------------------------- labels_weight = labels_weight.view() labels_weight.shape = np.product(labels_weight.shape) active = labels_weight > 0 labels_weight_active = labels_weight[active] labels_weight_active = to_variable(labels_weight_active) labels_weight_active = labels_weight_active.astype('float32') labels_weight_active.stop_gradient = True labels = labels.view().astype('int64') labels.shape = np.product(labels.shape) labels_active = labels[active] labels_active = to_variable(labels_active) labels_active.stop_gradient = True active_index = np.flatnonzero(active) cls_reshape = fluid.layers.reshape(cls, shape=[-1, cls.shape[2]]) active_index_var = to_variable(active_index) active_index_var.stop_gradient = True cls_active = fluid.layers.gather(cls_reshape, index=active_index_var) if self.cls_2d_lambda: # cls loss if np.any(active): labels_active = fluid.layers.reshape( labels_active, shape=[-1, 1]) loss_cls = fluid.layers.softmax_with_cross_entropy( cls_active, labels_active, ignore_index=IGN_FLAG) labels_weight_active = fluid.layers.unsqueeze( labels_weight_active, axes=1) loss_cls = fluid.layers.elementwise_mul(loss_cls, labels_weight_active) # simple gradient clipping loss_cls = fluid.layers.clip(loss_cls, min=0.0, max=2000.0) # take mean and scale lambda loss_cls = fluid.layers.mean(loss_cls) loss_cls *= self.cls_2d_lambda loss += loss_cls stats.append({ 'name': 'cls', 'val': loss_cls.numpy(), 'format': '{:0.4f}', 'group': 'loss' }) # ---------------------------------------- # bbox regression loss # ---------------------------------------- if np.sum(bbox_weights) > 0: bbox_total_nums = np.product(bbox_weights.shape) bbox_weights = bbox_weights.view().astype('float32') bbox_weights.shape = bbox_total_nums active = bbox_weights > 0 active_index = np.flatnonzero(active) active_len = active_index.size active_index_var = to_variable(active_index) active_index_var.stop_gradient = True bbox_weights.shape = 1, bbox_total_nums bbox_weights_active = bbox_weights[:, active] bbox_weights_active = to_variable(bbox_weights_active) bbox_weights_active.stop_gradient = True if self.bbox_2d_lambda: # bbox loss 2d bbox_x_tar = bbox_x_tar.view().astype('float32') bbox_x_tar.shape = 1, bbox_total_nums bbox_x_tar_active = bbox_x_tar[:, active] bbox_x_tar_active = to_variable(bbox_x_tar_active) bbox_x_tar_active.stop_gradient = True bbox_y_tar = bbox_y_tar.view().astype('float32') bbox_y_tar.shape = 1, bbox_total_nums bbox_y_tar_active = bbox_y_tar[:, active] bbox_y_tar_active = to_variable(bbox_y_tar_active) bbox_y_tar_active.stop_gradient = True bbox_w_tar = bbox_w_tar.view().astype('float32') bbox_w_tar.shape = 1, bbox_total_nums bbox_w_tar_active = bbox_w_tar[:, active] bbox_w_tar_active = to_variable(bbox_w_tar_active) bbox_w_tar_active.stop_gradient = True bbox_h_tar = bbox_h_tar.view().astype('float32') bbox_h_tar.shape = 1, bbox_total_nums bbox_h_tar_active = bbox_h_tar[:, active] bbox_h_tar_active = to_variable(bbox_h_tar_active) bbox_h_tar_active.stop_gradient = True bbox_x = fluid.layers.reshape(bbox_x, shape=[-1]) bbox_x_active = fluid.layers.gather(bbox_x, active_index_var) bbox_x_active = fluid.layers.unsqueeze(bbox_x_active, axes=0) bbox_y = fluid.layers.reshape(bbox_y, shape=[-1]) bbox_y_active = fluid.layers.gather(bbox_y, active_index_var) bbox_y_active = fluid.layers.unsqueeze(bbox_y_active, axes=0) bbox_w = fluid.layers.reshape(bbox_w, shape=[-1]) bbox_w_active = fluid.layers.gather(bbox_w, active_index_var) bbox_w_active = fluid.layers.unsqueeze(bbox_w_active, axes=0) bbox_h = fluid.layers.reshape(bbox_h, shape=[-1]) bbox_h_active = fluid.layers.gather(bbox_h, active_index_var) bbox_h_active = fluid.layers.unsqueeze(bbox_h_active, axes=0) loss_bbox_x = fluid.layers.smooth_l1( bbox_x_active, bbox_x_tar_active, outside_weight=bbox_weights_active) loss_bbox_y = fluid.layers.smooth_l1( bbox_y_active, bbox_y_tar_active, outside_weight=bbox_weights_active) loss_bbox_w = fluid.layers.smooth_l1( bbox_w_active, bbox_w_tar_active, outside_weight=bbox_weights_active) loss_bbox_h = fluid.layers.smooth_l1( bbox_h_active, bbox_h_tar_active, outside_weight=bbox_weights_active) bbox_2d_loss = ( loss_bbox_x + loss_bbox_y + loss_bbox_w + loss_bbox_h ) / active_len bbox_2d_loss *= self.bbox_2d_lambda loss += bbox_2d_loss stats.append({ 'name': 'bbox_2d', 'val': bbox_2d_loss.numpy(), 'format': '{:0.4f}', 'group': 'loss' }) if self.bbox_3d_lambda: # bbox loss 3d bbox_x3d_tar = bbox_x3d_tar.view().astype('float32') bbox_x3d_tar.shape = 1, bbox_total_nums bbox_x3d_tar_active = bbox_x3d_tar[:, active] bbox_x3d_tar_active = to_variable(bbox_x3d_tar_active) bbox_x3d_tar_active.stop_gradient = True bbox_y3d_tar = bbox_y3d_tar.view().astype('float32') bbox_y3d_tar.shape = 1, bbox_total_nums bbox_y3d_tar_active = bbox_y3d_tar[:, active] bbox_y3d_tar_active = to_variable(bbox_y3d_tar_active) bbox_y3d_tar_active.stop_gradient = True bbox_z3d_tar = bbox_z3d_tar.view().astype('float32') bbox_z3d_tar.shape = 1, bbox_total_nums bbox_z3d_tar_active = bbox_z3d_tar[:, active] bbox_z3d_tar_active = to_variable(bbox_z3d_tar_active) bbox_z3d_tar_active.stop_gradient = True bbox_w3d_tar = bbox_w3d_tar.view().astype('float32') bbox_w3d_tar.shape = 1, bbox_total_nums bbox_w3d_tar_active = bbox_w3d_tar[:, active] bbox_w3d_tar_active = to_variable(bbox_w3d_tar_active) bbox_w3d_tar_active.stop_gradient = True bbox_h3d_tar = bbox_h3d_tar.view().astype('float32') bbox_h3d_tar.shape = 1, bbox_total_nums bbox_h3d_tar_active = bbox_h3d_tar[:, active] bbox_h3d_tar_active = to_variable(bbox_h3d_tar_active) bbox_h3d_tar_active.stop_gradient = True bbox_l3d_tar = bbox_l3d_tar.view().astype('float32') bbox_l3d_tar.shape = 1, bbox_total_nums bbox_l3d_tar_active = bbox_l3d_tar[:, active] bbox_l3d_tar_active = to_variable(bbox_l3d_tar_active) bbox_l3d_tar_active.stop_gradient = True bbox_ry3d_tar = bbox_ry3d_tar.view().astype('float32') bbox_ry3d_tar.shape = 1, bbox_total_nums bbox_ry3d_tar_active = bbox_ry3d_tar[:, active] bbox_ry3d_tar_active = to_variable(bbox_ry3d_tar_active) bbox_ry3d_tar_active.stop_gradient = True bbox_x3d = fluid.layers.reshape(bbox_x3d, shape=[-1]) bbox_x3d_active = fluid.layers.gather(bbox_x3d, active_index_var) bbox_x3d_active = fluid.layers.unsqueeze( bbox_x3d_active, axes=0) bbox_y3d = fluid.layers.reshape(bbox_y3d, shape=[-1]) bbox_y3d_active = fluid.layers.gather(bbox_y3d, active_index_var) bbox_y3d_active = fluid.layers.unsqueeze( bbox_y3d_active, axes=0) bbox_z3d = fluid.layers.reshape(bbox_z3d, shape=[-1]) bbox_z3d_active = fluid.layers.gather(bbox_z3d, active_index_var) bbox_z3d_active = fluid.layers.unsqueeze( bbox_z3d_active, axes=0) bbox_w3d = fluid.layers.reshape(bbox_w3d, shape=[-1]) bbox_w3d_active = fluid.layers.gather(bbox_w3d, active_index_var) bbox_w3d_active = fluid.layers.unsqueeze( bbox_w3d_active, axes=0) bbox_h3d = fluid.layers.reshape(bbox_h3d, shape=[-1]) bbox_h3d_active = fluid.layers.gather(bbox_h3d, active_index_var) bbox_h3d_active = fluid.layers.unsqueeze( bbox_h3d_active, axes=0) bbox_l3d = fluid.layers.reshape(bbox_l3d, shape=[-1]) bbox_l3d_active = fluid.layers.gather(bbox_l3d, active_index_var) bbox_l3d_active = fluid.layers.unsqueeze( bbox_l3d_active, axes=0) bbox_ry3d = fluid.layers.reshape(bbox_ry3d, shape=[-1]) bbox_ry3d_active = fluid.layers.gather(bbox_ry3d, active_index_var) bbox_ry3d_active = fluid.layers.unsqueeze( bbox_ry3d_active, axes=0) loss_bbox_x3d = fluid.layers.smooth_l1( bbox_x3d_active.astype('float32'), bbox_x3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_y3d = fluid.layers.smooth_l1( bbox_y3d_active.astype('float32'), bbox_y3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_z3d = fluid.layers.smooth_l1( bbox_z3d_active.astype('float32'), bbox_z3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_w3d = fluid.layers.smooth_l1( bbox_w3d_active.astype('float32'), bbox_w3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_h3d = fluid.layers.smooth_l1( bbox_h3d_active.astype('float32'), bbox_h3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_l3d = fluid.layers.smooth_l1( bbox_l3d_active.astype('float32'), bbox_l3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_ry3d = fluid.layers.smooth_l1( bbox_ry3d_active.astype('float32'), bbox_ry3d_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) bbox_3d_loss = (loss_bbox_x3d + loss_bbox_y3d + loss_bbox_z3d) bbox_3d_loss += (loss_bbox_w3d + loss_bbox_h3d + loss_bbox_l3d + loss_bbox_ry3d) bbox_3d_loss = bbox_3d_loss / active_len bbox_3d_loss *= self.bbox_3d_lambda bbox_3d_loss = bbox_3d_loss loss += bbox_3d_loss stats.append({ 'name': 'bbox_3d', 'val': bbox_3d_loss.numpy(), 'format': '{:0.4f}', 'group': 'loss' }) if self.bbox_3d_proj_lambda: # bbox loss 3d bbox_x3d_proj_tar = bbox_x3d_proj_tar.view().astype('float32') bbox_x3d_proj_tar.shape = 1, bbox_total_nums bbox_x3d_proj_tar_active = bbox_x3d_proj_tar[:, active] bbox_x3d_proj_tar_active = to_variable(bbox_x3d_proj_tar_active) bbox_x3d_proj_tar_active.stop_gradient = True bbox_y3d_proj_tar = bbox_y3d_proj_tar.view().astype('float32') bbox_y3d_proj_tar.shape = 1, bbox_total_nums bbox_y3d_proj_tar_active = bbox_y3d_proj_tar[:, active] bbox_y3d_proj_tar_active = to_variable(bbox_y3d_proj_tar_active) bbox_y3d_proj_tar_active.stop_gradient = True bbox_z3d_proj_tar = bbox_z3d_proj_tar.view().astype('float32') bbox_z3d_proj_tar.shape = 1, bbox_total_nums bbox_z3d_proj_tar_active = bbox_z3d_proj_tar[:, active] bbox_z3d_proj_tar_active = to_variable(bbox_z3d_proj_tar_active) bbox_z3d_proj_tar_active.stop_gradient = True bbox_x3d_proj = bbox_x3d_proj.view() bbox_x3d_proj.shape = 1, bbox_total_nums bbox_x3d_proj_active = bbox_x3d_proj[:, active] bbox_x3d_proj_active = to_variable(bbox_x3d_proj_active) bbox_y3d_proj = bbox_y3d_proj.view() bbox_y3d_proj.shape = 1, bbox_total_nums bbox_y3d_proj_active = bbox_y3d_proj[:, active] bbox_y3d_proj_active = to_variable(bbox_y3d_proj_active) bbox_y3d_proj_active.stop_gradient = True bbox_z3d_proj = bbox_z3d_proj.view() bbox_z3d_proj.shape = 1, bbox_total_nums bbox_z3d_proj_active = bbox_z3d_proj[:, active] bbox_z3d_proj_active = to_variable(bbox_z3d_proj_active) bbox_z3d_proj_active.stop_gradient = True loss_bbox_x3d_proj = fluid.layers.smooth_l1( bbox_x3d_proj_active.astype('float32'), bbox_x3d_proj_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_y3d_proj = fluid.layers.smooth_l1( bbox_y3d_proj_active.astype('float32'), bbox_y3d_proj_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) loss_bbox_z3d_proj = fluid.layers.smooth_l1( bbox_z3d_proj_active.astype('float32'), bbox_z3d_proj_tar_active.astype('float32'), outside_weight=bbox_weights_active.astype('float32')) bbox_3d_proj_loss = ( loss_bbox_x3d_proj + loss_bbox_y3d_proj + loss_bbox_z3d_proj ) bbox_3d_proj_loss = bbox_3d_proj_loss / active_len bbox_3d_proj_loss *= self.bbox_3d_proj_lambda bbox_3d_proj_loss = bbox_3d_proj_loss loss += bbox_3d_proj_loss stats.append({ 'name': 'bbox_3d_proj', 'val': bbox_3d_proj_loss.numpy(), 'format': '{:0.4f}', 'group': 'loss' }) coords_abs_z = fluid.layers.reshape( to_variable(coords_abs_z), shape=[-1]) coords_abs_z_np = coords_abs_z.numpy() coords_abs_z_active = coords_abs_z_np[active] coords_abs_z = to_variable(coords_abs_z_active) coords_abs_z_mean = fluid.layers.mean(coords_abs_z) stats.append({ 'name': 'z', 'val': coords_abs_z_mean.numpy(), 'format': '{:0.2f}', 'group': 'misc' }) coords_abs_ry = fluid.layers.reshape( to_variable(coords_abs_ry), shape=[-1]) coords_abs_ry_np = coords_abs_ry.numpy() coords_abs_ry_active = coords_abs_ry_np[active] coords_abs_ry = to_variable(coords_abs_ry_active) coords_abs_ry_mean = fluid.layers.mean(coords_abs_ry) stats.append({ 'name': 'ry', 'val': coords_abs_ry_mean.numpy(), 'format': '{:0.2f}', 'group': 'misc' }) ious_2d = fluid.layers.concat(ious_2d_var_list, axis=0) ious_2d = fluid.layers.reshape(ious_2d, shape=[-1]) ious_2d_active = fluid.layers.gather(ious_2d, active_index_var) ious_2d_mean = fluid.layers.mean(ious_2d_active) stats.append({ 'name': 'iou', 'val': ious_2d_mean.numpy(), 'format': '{:0.2f}', 'group': 'acc' }) # use a 2d IoU based log loss if self.iou_2d_lambda: iou_2d_loss = -fluid.layers.log(ious_2d_active) iou_2d_loss = (iou_2d_loss * bbox_weights_active) iou_2d_loss = fluid.layers.mean(iou_2d_loss) iou_2d_loss *= self.iou_2d_lambda loss += iou_2d_loss stats.append({ 'name': 'iou', 'val': iou_2d_loss.numpy(), 'format': '{:0.4f}', 'group': 'loss' }) return loss, stats