diff --git a/fluid/PaddleCV/faster_rcnn/README.md b/fluid/PaddleCV/faster_rcnn/README.md index b15ad3b51917dac6b4b4ed35b017fa3ed2a44bc5..0c5c15245c6aa03c5861853946379093701c7475 100644 --- a/fluid/PaddleCV/faster_rcnn/README.md +++ b/fluid/PaddleCV/faster_rcnn/README.md @@ -111,10 +111,10 @@ Evalutaion result is shown as below: | Model | RoI function | Batch size | Max iteration | mAP | | :--------------- | :--------: | :------------: | :------------------: |------: | -| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.314 | -| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | -| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.345 | -| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.364 | +| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | +| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.318 | +| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.348 | +| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.367 | * Fluid RoIPool minibatch padding: Use RoIPool. Images in one batch padding to the same size. This method is same as detectron. * Fluid RoIPool no padding: Images without padding. diff --git a/fluid/PaddleCV/faster_rcnn/README_cn.md b/fluid/PaddleCV/faster_rcnn/README_cn.md index 75265fe91491aec90b47b85a3c478dbcdee1683d..a0f9fac8e9ea60bb7b2a34fe032554aae8fbcf7b 100644 --- a/fluid/PaddleCV/faster_rcnn/README_cn.md +++ b/fluid/PaddleCV/faster_rcnn/README_cn.md @@ -105,10 +105,10 @@ Faster RCNN 目标检测模型 | 模型 | RoI处理方式 | 批量大小 | 迭代次数 | mAP | | :--------------- | :--------: | :------------: | :------------------: |------: | -| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.314 | -| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | -| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.345 | -| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.364 | +| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | +| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.318 | +| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.348 | +| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.367 | diff --git a/fluid/PaddleCV/faster_rcnn/eval_helper.py b/fluid/PaddleCV/faster_rcnn/eval_helper.py index ec8449b791f792a230a8b81dac5dc183a47f0dd4..852b52955915bf268f930ce3b0fa35de5734b1ea 100644 --- a/fluid/PaddleCV/faster_rcnn/eval_helper.py +++ b/fluid/PaddleCV/faster_rcnn/eval_helper.py @@ -23,29 +23,43 @@ from PIL import ImageFont from config import cfg -def box_decoder(target_box, prior_box, prior_box_var): - proposals = np.zeros_like(target_box, dtype=np.float32) - prior_box_loc = np.zeros_like(prior_box, dtype=np.float32) - prior_box_loc[:, 0] = prior_box[:, 2] - prior_box[:, 0] + 1. - prior_box_loc[:, 1] = prior_box[:, 3] - prior_box[:, 1] + 1. - prior_box_loc[:, 2] = (prior_box[:, 2] + prior_box[:, 0]) / 2 - prior_box_loc[:, 3] = (prior_box[:, 3] + prior_box[:, 1]) / 2 - pred_bbox = np.zeros_like(target_box, dtype=np.float32) - for i in range(prior_box.shape[0]): - dw = np.minimum(prior_box_var[2] * target_box[i, 2::4], cfg.bbox_clip) - dh = np.minimum(prior_box_var[3] * target_box[i, 3::4], cfg.bbox_clip) - pred_bbox[i, 0::4] = prior_box_var[0] * target_box[ - i, 0::4] * prior_box_loc[i, 0] + prior_box_loc[i, 2] - pred_bbox[i, 1::4] = prior_box_var[1] * target_box[ - i, 1::4] * prior_box_loc[i, 1] + prior_box_loc[i, 3] - pred_bbox[i, 2::4] = np.exp(dw) * prior_box_loc[i, 0] - pred_bbox[i, 3::4] = np.exp(dh) * prior_box_loc[i, 1] - proposals[:, 0::4] = pred_bbox[:, 0::4] - pred_bbox[:, 2::4] / 2 - proposals[:, 1::4] = pred_bbox[:, 1::4] - pred_bbox[:, 3::4] / 2 - proposals[:, 2::4] = pred_bbox[:, 0::4] + pred_bbox[:, 2::4] / 2 - 1 - proposals[:, 3::4] = pred_bbox[:, 1::4] + pred_bbox[:, 3::4] / 2 - 1 - - return proposals +def box_decoder(deltas, boxes, weights): + if boxes.shape[0] == 0: + return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) + + boxes = boxes.astype(deltas.dtype, copy=False) + + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = weights + dx = deltas[:, 0::4] * wx + dy = deltas[:, 1::4] * wy + dw = deltas[:, 2::4] * ww + dh = deltas[:, 3::4] * wh + + # Prevent sending too large values into np.exp() + dw = np.minimum(dw, cfg.bbox_clip) + dh = np.minimum(dh, cfg.bbox_clip) + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h + # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 + # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 + + return pred_boxes def clip_tiled_boxes(boxes, im_shape): @@ -73,7 +87,6 @@ def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info, variance_v = np.array(cfg.bbox_reg_weights) confs_v = np.array(confs) locs_v = np.array(locs) - rois = box_decoder(locs_v, rpn_rois_v, variance_v) im_results = [[] for _ in range(len(lod) - 1)] new_lod = [0] for i in range(len(lod) - 1): @@ -81,9 +94,11 @@ def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info, end = lod[i + 1] if start == end: continue - rois_n = rois[start:end, :] + locs_n = locs_v[start:end, :] + rois_n = rpn_rois_v[start:end, :] rois_n = rois_n / im_info[i][2] - rois_n = clip_tiled_boxes(rois_n, im_info[i][:2]) + rois_n = box_decoder(locs_n, rois_n, variance_v) + rois_n = clip_tiled_boxes(rois_n, im_info[i][:2] / im_info[i][2]) cls_boxes = [[] for _ in range(class_nums)] scores_n = confs_v[start:end, :]