diff --git a/dygraph/deploy/python/infer.py b/dygraph/deploy/python/infer.py index 41ddadf1b83fdc247e28337b7058af60fc8b13b4..53e7693f51a8e62d730adcdff22dc474a3c64f61 100644 --- a/dygraph/deploy/python/infer.py +++ b/dygraph/deploy/python/infer.py @@ -84,16 +84,8 @@ class Detector(object): np_boxes[:, 3] *= w np_boxes[:, 4] *= h np_boxes[:, 5] *= w - expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) - np_boxes = np_boxes[expect_boxes, :] - for box in np_boxes: - print('class_id:{:d}, confidence:{:.4f},' - 'left_top:[{:.2f},{:.2f}],' - ' right_bottom:[{:.2f},{:.2f}]'.format( - int(box[0]), box[1], box[2], box[3], box[4], box[5])) results['boxes'] = np_boxes if np_masks is not None: - np_masks = np_masks[expect_boxes, :, :, :] results['masks'] = np_masks return results @@ -111,7 +103,7 @@ class Detector(object): results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] MaskRCNN's results include 'masks': np.ndarray: - shape:[N, class_num, mask_resolution, mask_resolution] + shape: [N, im_h, im_w] ''' inputs = self.preprocess(image) np_boxes, np_masks = None, None @@ -125,7 +117,7 @@ class Detector(object): output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() - if self.pred_config.mask_resolution is not None: + if self.pred_config.mask: masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() @@ -135,14 +127,7 @@ class Detector(object): output_names = self.predictor.get_output_names() boxes_tensor = self.predictor.get_output_handle(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() - score_tensor = self.predictor.get_output_handle(output_names[3]) - np_score = score_tensor.copy_to_cpu() - label_tensor = self.predictor.get_output_handle(output_names[2]) - np_label = label_tensor.copy_to_cpu() - np_boxes = np.concatenate( - [np_label[:, np.newaxis], np_score[:, np.newaxis], np_boxes], - axis=-1) - if self.pred_config.mask_resolution is not None: + if self.pred_config.mask: masks_tensor = self.predictor.get_output_handle(output_names[2]) np_masks = masks_tensor.copy_to_cpu() t2 = time.time() @@ -196,10 +181,9 @@ class DetectorSOLOv2(Detector): image (str/np.ndarray): path of image/ np.ndarray read by cv2 threshold (float): threshold of predicted box' score Returns: - results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, - matix element:[class, score, x_min, y_min, x_max, y_max] - MaskRCNN's results include 'masks': np.ndarray: - shape:[N, class_num, mask_resolution, mask_resolution] + results (dict): 'segm': np.ndarray,shape:[N, im_h, im_w] + 'cate_label': label of segm, shape:[N] + 'cate_score': confidence score of segm, shape:[N] ''' inputs = self.preprocess(image) np_label, np_score, np_segms = None, None, None @@ -273,9 +257,9 @@ class PredictConfig(): self.preprocess_infos = yml_conf['Preprocess'] self.min_subgraph_size = yml_conf['min_subgraph_size'] self.labels = yml_conf['label_list'] - self.mask_resolution = None - if 'mask_resolution' in yml_conf: - self.mask_resolution = yml_conf['mask_resolution'] + self.mask = False + if 'mask' in yml_conf: + self.mask = yml_conf['mask'] self.input_shape = yml_conf['image_shape'] self.print_config() @@ -355,19 +339,9 @@ def load_predictor(model_dir, return predictor -def visualize(image_file, - results, - labels, - mask_resolution=14, - output_dir='output/', - threshold=0.5): +def visualize(image_file, results, labels, output_dir='output/', threshold=0.5): # visualize the predict result - im = visualize_box_mask( - image_file, - results, - labels, - mask_resolution=mask_resolution, - threshold=threshold) + im = visualize_box_mask(image_file, results, labels, threshold=threshold) img_name = os.path.split(image_file)[-1] if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -397,7 +371,6 @@ def predict_image(detector): FLAGS.image_file, results, detector.pred_config.labels, - mask_resolution=detector.pred_config.mask_resolution, output_dir=FLAGS.output_dir, threshold=FLAGS.threshold) @@ -431,7 +404,6 @@ def predict_video(detector, camera_id): frame, results, detector.pred_config.labels, - mask_resolution=detector.pred_config.mask_resolution, threshold=FLAGS.threshold) im = np.array(im) writer.write(im) diff --git a/dygraph/deploy/python/visualize.py b/dygraph/deploy/python/visualize.py index 1c136be4dde9c97c01649b1dab62135eed8062d8..fefba9773f12296d134e436698ca04bc0c327fcc 100644 --- a/dygraph/deploy/python/visualize.py +++ b/dygraph/deploy/python/visualize.py @@ -21,16 +21,15 @@ from PIL import Image, ImageDraw from scipy import ndimage -def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5): +def visualize_box_mask(im, results, labels, threshold=0.5): """ Args: im (str/np.ndarray): path of image/np.ndarray read by cv2 results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] MaskRCNN's results include 'masks': np.ndarray: - shape:[N, class_num, mask_resolution, mask_resolution] + shape:[N, im_h, im_w] labels (list): labels:['class1', ..., 'classn'] - mask_resolution (int): shape of a mask is:[mask_resolution, mask_resolution] threshold (float): Threshold of score. Returns: im (PIL.Image.Image): visualized image @@ -41,13 +40,9 @@ def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5): im = Image.fromarray(im) if 'masks' in results and 'boxes' in results: im = draw_mask( - im, - results['boxes'], - results['masks'], - labels, - resolution=mask_resolution) + im, results['boxes'], results['masks'], labels, threshold=threshold) if 'boxes' in results: - im = draw_box(im, results['boxes'], labels) + im = draw_box(im, results['boxes'], labels, threshold=threshold) if 'segm' in results: im = draw_segm( im, @@ -80,91 +75,49 @@ def get_color_map_list(num_classes): return color_map -def expand_boxes(boxes, scale=0.0): - """ - Args: - boxes (np.ndarray): shape:[N,4], N:number of box, - matix element:[x_min, y_min, x_max, y_max] - scale (float): scale of boxes - Returns: - boxes_exp (np.ndarray): expanded boxes - """ - w_half = (boxes[:, 2] - boxes[:, 0]) * .5 - h_half = (boxes[:, 3] - boxes[:, 1]) * .5 - x_c = (boxes[:, 2] + boxes[:, 0]) * .5 - y_c = (boxes[:, 3] + boxes[:, 1]) * .5 - w_half *= scale - h_half *= scale - boxes_exp = np.zeros(boxes.shape) - boxes_exp[:, 0] = x_c - w_half - boxes_exp[:, 2] = x_c + w_half - boxes_exp[:, 1] = y_c - h_half - boxes_exp[:, 3] = y_c + h_half - return boxes_exp - - -def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5): +def draw_mask(im, np_boxes, np_masks, labels, threshold=0.5): """ Args: im (PIL.Image.Image): PIL image np_boxes (np.ndarray): shape:[N,6], N: number of box, - matix element:[class, score, x_min, y_min, x_max, y_max] - np_masks (np.ndarray): shape:[N, class_num, resolution, resolution] + matix element:[class, score, x_min, y_min, x_max, y_max] + np_masks (np.ndarray): shape:[N, im_h, im_w] labels (list): labels:['class1', ..., 'classn'] - resolution (int): shape of a mask is:[resolution, resolution] threshold (float): threshold of mask Returns: im (PIL.Image.Image): visualized image """ color_list = get_color_map_list(len(labels)) - scale = (resolution + 2.0) / resolution - im_w, im_h = im.size w_ratio = 0.4 alpha = 0.7 im = np.array(im).astype('float32') - rects = np_boxes[:, 2:] - expand_rects = expand_boxes(rects, scale) - expand_rects = expand_rects.astype(np.int32) - clsid_scores = np_boxes[:, 0:2] - padded_mask = np.zeros((resolution + 2, resolution + 2), dtype=np.float32) clsid2color = {} - for idx in range(len(np_boxes)): - clsid, score = clsid_scores[idx].tolist() - clsid = int(clsid) - xmin, ymin, xmax, ymax = expand_rects[idx].tolist() - w = xmax - xmin + 1 - h = ymax - ymin + 1 - w = np.maximum(w, 1) - h = np.maximum(h, 1) - padded_mask[1:-1, 1:-1] = np_masks[idx, int(clsid), :, :] - resized_mask = cv2.resize(padded_mask, (w, h)) - resized_mask = np.array(resized_mask > threshold, dtype=np.uint8) - x0 = min(max(xmin, 0), im_w) - x1 = min(max(xmax + 1, 0), im_w) - y0 = min(max(ymin, 0), im_h) - y1 = min(max(ymax + 1, 0), im_h) - im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), ( - x0 - xmin):(x1 - xmin)] + expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) + np_boxes = np_boxes[expect_boxes, :] + np_masks = np_masks[expect_boxes, :, :] + for i in range(len(np_masks)): + clsid, score = int(np_boxes[i][0]), np_boxes[i][1] + mask = np_masks[i] if clsid not in clsid2color: clsid2color[clsid] = color_list[clsid] color_mask = clsid2color[clsid] for c in range(3): color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255 - idx = np.nonzero(im_mask) + idx = np.nonzero(mask) color_mask = np.array(color_mask) im[idx[0], idx[1], :] *= 1.0 - alpha im[idx[0], idx[1], :] += alpha * color_mask return Image.fromarray(im.astype('uint8')) -def draw_box(im, np_boxes, labels): +def draw_box(im, np_boxes, labels, threshold=0.5): """ Args: im (PIL.Image.Image): PIL image np_boxes (np.ndarray): shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] labels (list): labels:['class1', ..., 'classn'] + threshold (float): threshold of box Returns: im (PIL.Image.Image): visualized image """ @@ -172,10 +125,15 @@ def draw_box(im, np_boxes, labels): draw = ImageDraw.Draw(im) clsid2color = {} color_list = get_color_map_list(len(labels)) + expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) + np_boxes = np_boxes[expect_boxes, :] for dt in np_boxes: clsid, bbox, score = int(dt[0]), dt[2:], dt[1] xmin, ymin, xmax, ymax = bbox + print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],' + 'right_bottom:[{:.2f},{:.2f}]'.format( + int(clsid), score, xmin, ymin, xmax, ymax)) w = xmax - xmin h = ymax - ymin if clsid not in clsid2color: diff --git a/dygraph/ppdet/engine/export_utils.py b/dygraph/ppdet/engine/export_utils.py index 85cd8d003e3130c5886c57e1050924fd5acfcbb9..140d61964921ac3cceafcda96eea6e68c7c845fc 100644 --- a/dygraph/ppdet/engine/export_utils.py +++ b/dygraph/ppdet/engine/export_utils.py @@ -98,9 +98,8 @@ def _dump_infer_config(config, path, image_shape, model): 'Architecture: {} is not supported for exporting model now'.format( infer_arch)) os._exit(0) - if 'mask_post_process' in model.__dict__ and model.__dict__[ - 'mask_post_process']: - infer_cfg['mask_resolution'] = model.mask_post_process.mask_resolution + if 'Mask' in infer_arch: + infer_cfg['mask'] = True infer_cfg['Preprocess'], infer_cfg[ 'label_list'], image_shape = _parse_reader( config['TestReader'], config['TestDataset'], config['metric'], diff --git a/dygraph/ppdet/metrics/coco_utils.py b/dygraph/ppdet/metrics/coco_utils.py index 40929d0ad107ed6af25024a500bc22120a624760..6a14fd6fadd7caaca845c898e390ad9a647696e2 100644 --- a/dygraph/ppdet/metrics/coco_utils.py +++ b/dygraph/ppdet/metrics/coco_utils.py @@ -30,7 +30,7 @@ def get_infer_results(outs, catid, bias=0): The output format is dictionary containing bbox or mask result. For example, bbox result is a list and each element contains - image_id, category_id, bbox and score. + image_id, category_id, bbox and score. """ if outs is None or len(outs) == 0: raise ValueError( @@ -42,19 +42,12 @@ def get_infer_results(outs, catid, bias=0): infer_res = {} if 'bbox' in outs: infer_res['bbox'] = get_det_res( - outs['bbox'], - outs['score'], - outs['label'], - outs['bbox_num'], - im_id, - catid, - bias=bias) + outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias) if 'mask' in outs: # mask post process - infer_res['mask'] = get_seg_res(outs['mask'], outs['score'], - outs['label'], outs['bbox_num'], im_id, - catid) + infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'], + outs['bbox_num'], im_id, catid) if 'segm' in outs: infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid) diff --git a/dygraph/ppdet/modeling/architectures/faster_rcnn.py b/dygraph/ppdet/modeling/architectures/faster_rcnn.py index e9a4af87b0f5d0e9bccaa85050444bbeb770f1df..b7cd9308fa7546bdb7904fed3da502d88d01fd54 100644 --- a/dygraph/ppdet/modeling/architectures/faster_rcnn.py +++ b/dygraph/ppdet/modeling/architectures/faster_rcnn.py @@ -99,13 +99,5 @@ class FasterRCNN(BaseArch): def get_pred(self): bbox_pred, bbox_num = self._forward() - label = bbox_pred[:, 0] - score = bbox_pred[:, 1] - bbox = bbox_pred[:, 2:] - output = { - 'bbox': bbox, - 'score': score, - 'label': label, - 'bbox_num': bbox_num - } + output = {'bbox': bbox_pred, 'bbox_num': bbox_num} return output diff --git a/dygraph/ppdet/modeling/architectures/fcos.py b/dygraph/ppdet/modeling/architectures/fcos.py index c8db8c0be879a29c0388dae5dda5b1c8c3a3d23f..6f71c8992bf49ab7f8a87d4eeb843b39e30f99fa 100644 --- a/dygraph/ppdet/modeling/architectures/fcos.py +++ b/dygraph/ppdet/modeling/architectures/fcos.py @@ -91,13 +91,5 @@ class FCOS(BaseArch): def get_pred(self): bboxes, bbox_num = self._forward() - label = bboxes[:, 0] - score = bboxes[:, 1] - bbox = bboxes[:, 2:] - output = { - 'bbox': bbox, - 'score': score, - 'label': label, - 'bbox_num': bbox_num - } + output = {'bbox': bboxes, 'bbox_num': bbox_num} return output diff --git a/dygraph/ppdet/modeling/architectures/mask_rcnn.py b/dygraph/ppdet/modeling/architectures/mask_rcnn.py index 05b05c137bce52458428fb995417ad2869a602dd..3b5618655e0624961a8f15898fc9f1337fcf4cc8 100644 --- a/dygraph/ppdet/modeling/architectures/mask_rcnn.py +++ b/dygraph/ppdet/modeling/architectures/mask_rcnn.py @@ -124,14 +124,5 @@ class MaskRCNN(BaseArch): def get_pred(self): bbox_pred, bbox_num, mask_pred = self._forward() - label = bbox_pred[:, 0] - score = bbox_pred[:, 1] - bbox = bbox_pred[:, 2:] - output = { - 'label': label, - 'score': score, - 'bbox': bbox, - 'bbox_num': bbox_num, - 'mask': mask_pred, - } + output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred} return output diff --git a/dygraph/ppdet/modeling/architectures/ttfnet.py b/dygraph/ppdet/modeling/architectures/ttfnet.py index 181aa7f0d88f05332cd406da58b13d9d4fe0612a..c3eb61c877efbffd8f5d6c3d957aff161d1af185 100644 --- a/dygraph/ppdet/modeling/architectures/ttfnet.py +++ b/dygraph/ppdet/modeling/architectures/ttfnet.py @@ -91,13 +91,8 @@ class TTFNet(BaseArch): def get_pred(self): bbox_pred, bbox_num = self._forward() - label = bbox_pred[:, 0] - score = bbox_pred[:, 1] - bbox = bbox_pred[:, 2:] output = { - "bbox": bbox, - 'score': score, - 'label': label, + "bbox": bbox_pred, "bbox_num": bbox_num, } return output diff --git a/dygraph/ppdet/modeling/architectures/yolo.py b/dygraph/ppdet/modeling/architectures/yolo.py index 42391bb989a4d4c2fc44f724a6ea1bb0ab267ccb..a9d2a6e764530e65eeaf569d6e2110edd2ae9e98 100644 --- a/dygraph/ppdet/modeling/architectures/yolo.py +++ b/dygraph/ppdet/modeling/architectures/yolo.py @@ -61,13 +61,5 @@ class YOLOv3(BaseArch): def get_pred(self): bbox_pred, bbox_num = self._forward() - label = bbox_pred[:, 0] - score = bbox_pred[:, 1] - bbox = bbox_pred[:, 2:] - output = { - 'bbox': bbox, - 'score': score, - 'label': label, - 'bbox_num': bbox_num - } + output = {'bbox': bbox_pred, 'bbox_num': bbox_num} return output diff --git a/dygraph/ppdet/modeling/bbox_utils.py b/dygraph/ppdet/modeling/bbox_utils.py index 51f963a60394f2cb2169c2048b55f923095a0615..505cea8e5ef438637d6da7aaec91ef8e0bde45e8 100644 --- a/dygraph/ppdet/modeling/bbox_utils.py +++ b/dygraph/ppdet/modeling/bbox_utils.py @@ -39,8 +39,6 @@ def bbox2delta(src_boxes, tgt_boxes, weights): def delta2bbox(deltas, boxes, weights): clip_scale = math.log(1000.0 / 16) - if boxes.shape[0] == 0: - return paddle.zeros((0, deltas.shape[1]), dtype='float32') widths = boxes[:, 2] - boxes[:, 0] heights = boxes[:, 3] - boxes[:, 1] @@ -61,12 +59,13 @@ def delta2bbox(deltas, boxes, weights): pred_w = paddle.exp(dw) * widths.unsqueeze(1) pred_h = paddle.exp(dh) * heights.unsqueeze(1) - pred_boxes = paddle.zeros_like(deltas) + pred_boxes = [] + pred_boxes.append(pred_ctr_x - 0.5 * pred_w) + pred_boxes.append(pred_ctr_y - 0.5 * pred_h) + pred_boxes.append(pred_ctr_x + 0.5 * pred_w) + pred_boxes.append(pred_ctr_y + 0.5 * pred_h) + pred_boxes = paddle.stack(pred_boxes, axis=-1) - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h return pred_boxes diff --git a/dygraph/ppdet/modeling/heads/bbox_head.py b/dygraph/ppdet/modeling/heads/bbox_head.py index a2af5f30344c58b87ee359b65075b962aefe2e53..f897d799d11ae20d2d6595eb0a8519700d382275 100644 --- a/dygraph/ppdet/modeling/heads/bbox_head.py +++ b/dygraph/ppdet/modeling/heads/bbox_head.py @@ -141,8 +141,7 @@ class BBoxHead(nn.Layer): rois_feat = self.roi_extractor(body_feats, rois, rois_num) bbox_feat = self.head(rois_feat) - #if self.with_pool: - if len(bbox_feat.shape) > 2 and bbox_feat.shape[-1] > 1: + if self.with_pool: feat = F.adaptive_avg_pool2d(bbox_feat, output_size=1) feat = paddle.squeeze(feat, axis=[2, 3]) else: diff --git a/dygraph/ppdet/modeling/heads/mask_head.py b/dygraph/ppdet/modeling/heads/mask_head.py index 3ccf23ba846c90aeeb0515cf6f2a96592879626e..9a5243d958704b81f7c6542578d48f2f531d7873 100644 --- a/dygraph/ppdet/modeling/heads/mask_head.py +++ b/dygraph/ppdet/modeling/heads/mask_head.py @@ -182,11 +182,12 @@ class MaskHead(nn.Layer): mask_out = F.sigmoid(mask_logit) else: num_masks = mask_logit.shape[0] - pred_masks = paddle.split(mask_logit, num_masks) mask_out = [] # TODO: need to optimize gather - for i, pred_mask in enumerate(pred_masks): - mask = paddle.gather(pred_mask, labels[i], axis=1) + for i in range(mask_logit.shape[0]): + pred_masks = paddle.unsqueeze( + mask_logit[i, :, :, :], axis=0) + mask = paddle.gather(pred_masks, labels[i], axis=1) mask_out.append(mask) mask_out = F.sigmoid(paddle.concat(mask_out)) return mask_out diff --git a/dygraph/ppdet/modeling/layers.py b/dygraph/ppdet/modeling/layers.py index 2b96e2c984ca3156f48ec8c935c481881f061cf5..a499d528d9648a79bd56d303eaccefb85727d9f2 100644 --- a/dygraph/ppdet/modeling/layers.py +++ b/dygraph/ppdet/modeling/layers.py @@ -316,14 +316,12 @@ class RCNNBox(object): # [N, C*4] bbox = paddle.concat(roi) - bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) + if bbox.shape[0] == 0: + bbox = paddle.zeros([0, bbox_pred.shape[1]], dtype='float32') + else: + bbox = delta2bbox(bbox_pred, bbox, self.prior_box_var) scores = cls_prob[:, :-1] - # [N*C, 4] - - bbox_num_class = bbox.shape[1] // 4 - bbox = paddle.reshape(bbox, [-1, bbox_num_class, 4]) - origin_h = paddle.unsqueeze(origin_shape[:, 0], axis=1) origin_w = paddle.unsqueeze(origin_shape[:, 1], axis=1) zeros = paddle.zeros_like(origin_h) diff --git a/dygraph/ppdet/modeling/post_process.py b/dygraph/ppdet/modeling/post_process.py index 4cfc6beaeeaf59a1907042a483769cfbf69ef3fd..232c0d8592298cac2b3a02c4ca4340a12d114470 100644 --- a/dygraph/ppdet/modeling/post_process.py +++ b/dygraph/ppdet/modeling/post_process.py @@ -54,8 +54,6 @@ class BBoxPostProcess(object): including labels, scores and bboxes. The size of bboxes are corresponding to the original image. """ - if bboxes.shape[0] == 0: - return paddle.zeros(shape=[1, 6]) origin_shape = paddle.floor(im_shape / scale_factor + 0.5) @@ -65,9 +63,12 @@ class BBoxPostProcess(object): for i in range(bbox_num.shape[0]): expand_shape = paddle.expand(origin_shape[i:i + 1, :], [bbox_num[i], 2]) - scale_y, scale_x = scale_factor[i] + scale_y, scale_x = scale_factor[i][0], scale_factor[i][1] scale = paddle.concat([scale_x, scale_y, scale_x, scale_y]) expand_scale = paddle.expand(scale, [bbox_num[i], 4]) + # TODO: Because paddle.expand transform error when dygraph + # to static, use reshape to avoid mistakes. + expand_scale = paddle.reshape(expand_scale, [bbox_num[i], 4]) origin_shape_list.append(expand_shape) scale_factor_list.append(expand_scale) @@ -121,6 +122,10 @@ class MaskPostProcess(object): gx = paddle.expand(img_x, [N, img_y.shape[1], img_x.shape[2]]) gy = paddle.expand(img_y, [N, img_y.shape[1], img_x.shape[2]]) + # TODO: Because paddle.expand transform error when dygraph + # to static, use reshape to avoid mistakes. + gx = paddle.reshape(gx, [N, img_y.shape[1], img_x.shape[2]]) + gy = paddle.reshape(gy, [N, img_y.shape[1], img_x.shape[2]]) grid = paddle.stack([gx, gy], axis=3) img_masks = F.grid_sample(masks, grid, align_corners=False) return img_masks[:, 0] @@ -129,19 +134,24 @@ class MaskPostProcess(object): """ Paste the mask prediction to the original image. """ - assert bboxes.shape[0] > 0, 'There is no detection output' - num_mask = mask_out.shape[0] - # TODO: support bs > 1 + origin_shape = paddle.cast(origin_shape, 'int32') + # TODO: support bs > 1 and mask output dtype is bool pred_result = paddle.zeros( - [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='bool') + [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='int32') + if bboxes.shape[0] == 0: + return pred_result + # TODO: optimize chunk paste + pred_result = [] for i in range(bboxes.shape[0]): - im_h, im_w = origin_shape[i] + im_h, im_w = origin_shape[i][0], origin_shape[i][1] pred_mask = self.paste_mask(mask_out[i], bboxes[i:i + 1, 2:], im_h, im_w) pred_mask = pred_mask >= self.binary_thresh - pred_result[i] = pred_mask + pred_mask = paddle.cast(pred_mask, 'int32') + pred_result.append(pred_mask) + pred_result = paddle.concat(pred_result) return pred_result diff --git a/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py b/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py index dcb5bf0c709396bfd6b98e1d818d5c0ecf368305..1ca0319d3ad13d3650022d8d958c0f92954914c9 100644 --- a/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py +++ b/dygraph/ppdet/modeling/proposal_generator/anchor_generator.py @@ -24,7 +24,7 @@ from .. import ops @register -class AnchorGenerator(object): +class AnchorGenerator(nn.Layer): def __init__(self, anchor_sizes=[32, 64, 128, 256, 512], aspect_ratios=[0.5, 1.0, 2.0], @@ -64,17 +64,21 @@ class AnchorGenerator(object): self.generate_cell_anchors(s, a) for s, a in zip(sizes, aspect_ratios) ] + [ + self.register_buffer( + t.name, t, persistable=False) for t in cell_anchors + ] return cell_anchors def _create_grid_offsets(self, size, stride, offset): - grid_height, grid_width = size + grid_height, grid_width = size[0], size[1] shifts_x = paddle.arange( offset * stride, grid_width * stride, step=stride, dtype='float32') shifts_y = paddle.arange( offset * stride, grid_height * stride, step=stride, dtype='float32') shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x) - shift_x = shift_x.reshape([-1]) - shift_y = shift_y.reshape([-1]) + shift_x = paddle.reshape(shift_x, [-1]) + shift_y = paddle.reshape(shift_y, [-1]) return shift_x, shift_y def _grid_anchors(self, grid_sizes): @@ -84,14 +88,15 @@ class AnchorGenerator(object): shift_x, shift_y = self._create_grid_offsets(size, stride, self.offset) shifts = paddle.stack((shift_x, shift_y, shift_x, shift_y), axis=1) + shifts = paddle.reshape(shifts, [-1, 1, 4]) + base_anchors = paddle.reshape(base_anchors, [1, -1, 4]) - anchors.append((shifts.reshape([-1, 1, 4]) + base_anchors.reshape( - [1, -1, 4])).reshape([-1, 4])) + anchors.append(paddle.reshape(shifts + base_anchors, [-1, 4])) return anchors - def __call__(self, input): - grid_sizes = [feature_map.shape[-2:] for feature_map in input] + def forward(self, input): + grid_sizes = [paddle.shape(feature_map)[-2:] for feature_map in input] anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) return anchors_over_all_feature_maps @@ -105,4 +110,4 @@ class AnchorGenerator(object): ratios and 5 sizes, the number of anchors is 15. For FPN models, `num_anchors` on every feature map is the same. """ - return self.cell_anchors[0].shape[0] + return len(self.cell_anchors[0]) diff --git a/dygraph/ppdet/modeling/proposal_generator/rpn_head.py b/dygraph/ppdet/modeling/proposal_generator/rpn_head.py index c7a615cfb74b3a47b6feb0fc299c553291f6fd8c..be399be9e98437c44e66c0deec1afed0b82f1445 100644 --- a/dygraph/ppdet/modeling/proposal_generator/rpn_head.py +++ b/dygraph/ppdet/modeling/proposal_generator/rpn_head.py @@ -108,7 +108,14 @@ class RPNHead(nn.Layer): anchors = self.anchor_generator(rpn_feats) - rois, rois_num = self._gen_proposal(scores, deltas, anchors, inputs) + # TODO: Fix batch_size > 1 when testing. + if self.training: + batch_size = im_shape.shape[0] + else: + batch_size = 1 + + rois, rois_num = self._gen_proposal(scores, deltas, anchors, inputs, + batch_size) if self.training: loss = self.get_loss(scores, deltas, anchors, inputs) @@ -116,16 +123,15 @@ class RPNHead(nn.Layer): else: return rois, rois_num, None - def _gen_proposal(self, scores, bbox_deltas, anchors, inputs): + def _gen_proposal(self, scores, bbox_deltas, anchors, inputs, batch_size): """ - scores (list[Tensor]): Multi-level scores prediction + scores (list[Tensor]): Multi-level scores prediction bbox_deltas (list[Tensor]): Multi-level deltas prediction - anchors (list[Tensor]): Multi-level anchors + anchors (list[Tensor]): Multi-level anchors inputs (dict): ground truth info """ prop_gen = self.train_proposal if self.training else self.test_proposal im_shape = inputs['im_shape'] - batch_size = im_shape.shape[0] rpn_rois_list = [[] for i in range(batch_size)] rpn_prob_list = [[] for i in range(batch_size)] rpn_rois_num_list = [[] for i in range(batch_size)] diff --git a/dygraph/ppdet/py_op/post_process.py b/dygraph/ppdet/py_op/post_process.py index e7c5d9dbf4432e6b584bf0ce84a16739aadf0e6d..41d366787fe6468404b08a24c5ec0c5fef4163b9 100755 --- a/dygraph/ppdet/py_op/post_process.py +++ b/dygraph/ppdet/py_op/post_process.py @@ -1,29 +1,35 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import os import numpy as np import cv2 -def get_det_res(bboxes, - scores, - labels, - bbox_nums, - image_id, - label_to_cat_id_map, - bias=0): +def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0): det_res = [] k = 0 for i in range(len(bbox_nums)): cur_image_id = int(image_id[i][0]) det_nums = bbox_nums[i] for j in range(det_nums): - box = bboxes[k] - score = float(scores[k]) - label = int(labels[k]) - if label < 0: continue + dt = bboxes[k] k = k + 1 - xmin, ymin, xmax, ymax = box.tolist() - category_id = label_to_cat_id_map[label] + num_id, score, xmin, ymin, xmax, ymax = dt.tolist() + if int(num_id) < 0: + continue + category_id = label_to_cat_id_map[int(num_id)] w = xmax - xmin + bias h = ymax - ymin + bias bbox = [xmin, ymin, w, h] @@ -37,8 +43,7 @@ def get_det_res(bboxes, return det_res -def get_seg_res(masks, scores, labels, mask_nums, image_id, - label_to_cat_id_map): +def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map): import pycocotools.mask as mask_util seg_res = [] k = 0 @@ -46,9 +51,9 @@ def get_seg_res(masks, scores, labels, mask_nums, image_id, cur_image_id = int(image_id[i][0]) det_nums = mask_nums[i] for j in range(det_nums): - mask = masks[k] - score = float(scores[k]) - label = int(labels[k]) + mask = masks[k].astype(np.uint8) + score = float(bboxes[k][1]) + label = int(bboxes[k][0]) k = k + 1 cat_id = label_to_cat_id_map[label] rle = mask_util.encode(