From 68ad52d47565e3c843111365591240354f067bf4 Mon Sep 17 00:00:00 2001 From: jerrywgz Date: Sat, 2 Feb 2019 15:14:08 +0800 Subject: [PATCH] fix bug in Faster RCNN eval & refine code (#1737) * fix bug in eval faster & refine code --- fluid/PaddleCV/rcnn/config.py | 6 -- fluid/PaddleCV/rcnn/eval_coco_map.py | 30 +++++----- fluid/PaddleCV/rcnn/infer.py | 25 ++++---- fluid/PaddleCV/rcnn/models/model_builder.py | 64 +++++++++++---------- fluid/PaddleCV/rcnn/profile.py | 2 +- fluid/PaddleCV/rcnn/reader.py | 27 ++------- fluid/PaddleCV/rcnn/roidbs.py | 30 +++++++--- fluid/PaddleCV/rcnn/train.py | 12 ++-- fluid/PaddleCV/rcnn/utility.py | 5 +- 9 files changed, 93 insertions(+), 108 deletions(-) diff --git a/fluid/PaddleCV/rcnn/config.py b/fluid/PaddleCV/rcnn/config.py index f535ad70..2a8ebdf7 100644 --- a/fluid/PaddleCV/rcnn/config.py +++ b/fluid/PaddleCV/rcnn/config.py @@ -219,12 +219,6 @@ _C.pixel_means = [102.9801, 115.9465, 122.7717] # clip box to prevent overflowing _C.bbox_clip = np.log(1000. / 16.) -# dataset path -_C.train_file_list = 'annotations/instances_train2017.json' -_C.train_data_dir = 'train2017' -_C.val_file_list = 'annotations/instances_val2017.json' -_C.val_data_dir = 'val2017' - def merge_cfg_from_args(args, mode): """Merge config keys, values in args into the global config.""" diff --git a/fluid/PaddleCV/rcnn/eval_coco_map.py b/fluid/PaddleCV/rcnn/eval_coco_map.py index b9a18b5b..a8b2556b 100644 --- a/fluid/PaddleCV/rcnn/eval_coco_map.py +++ b/fluid/PaddleCV/rcnn/eval_coco_map.py @@ -29,20 +29,20 @@ import json from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval, Params from config import cfg +from roidbs import DatasetPath def eval(): - if '2014' in cfg.dataset: - test_list = 'annotations/instances_val2014.json' - elif '2017' in cfg.dataset: - test_list = 'annotations/instances_val2017.json' + + data_path = DatasetPath('val') + test_list = data_path.get_file_list() image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch - cocoGt = COCO(os.path.join(cfg.data_dir, test_list)) + cocoGt = COCO(test_list) num_id_to_cat_id_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())} category_ids = cocoGt.getCatIds() label_list = { @@ -51,14 +51,13 @@ def eval(): } label_list[0] = ['background'] - model = model_builder.FasterRCNN( + model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=False, is_train=False) model.build_model(image_shape) - rpn_rois, confs, locs = model.eval_bbox_out() - pred_boxes = model.eval() + pred_boxes = model.eval_bbox_out() if cfg.MASK_ON: masks = model.eval_mask_out() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() @@ -77,9 +76,10 @@ def eval(): dts_res = [] segms_res = [] if cfg.MASK_ON: - fetch_list = [rpn_rois, confs, locs, pred_boxes, masks] + fetch_list = [pred_boxes, masks] else: - fetch_list = [rpn_rois, confs, locs] + fetch_list = [pred_boxes] + eval_start = time.time() for batch_id, batch_data in enumerate(test_reader()): start = time.time() im_info = [] @@ -89,12 +89,9 @@ def eval(): feed=feeder.feed(batch_data), return_numpy=False) - rpn_rois_v = result[0] - confs_v = result[1] - locs_v = result[2] + pred_boxes_v = result[0] if cfg.MASK_ON: - pred_boxes_v = result[3] - masks_v = result[4] + masks_v = result[1] new_lod = pred_boxes_v.lod() nmsed_out = pred_boxes_v @@ -108,6 +105,9 @@ def eval(): batch_data, num_id_to_cat_id_map) end = time.time() print('batch id: {}, time: {}'.format(batch_id, end - start)) + eval_end = time.time() + total_time = eval_end - eval_start + print('average time of eval is: {}'.format(total_time / (batch_id + 1))) with open("detection_bbox_result.json", 'w') as outfile: json.dump(dts_res, outfile) print("start evaluate bbox using coco api") diff --git a/fluid/PaddleCV/rcnn/infer.py b/fluid/PaddleCV/rcnn/infer.py index 73fc7882..53365c01 100644 --- a/fluid/PaddleCV/rcnn/infer.py +++ b/fluid/PaddleCV/rcnn/infer.py @@ -12,16 +12,15 @@ import json from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval, Params from config import cfg +from roidbs import DatasetPath def infer(): - if '2014' in cfg.dataset: - test_list = 'annotations/instances_val2014.json' - elif '2017' in cfg.dataset: - test_list = 'annotations/instances_val2017.json' + data_path = DatasetPath('val') + test_list = data_path.get_file_list() - cocoGt = COCO(os.path.join(cfg.data_dir, test_list)) + cocoGt = COCO(test_list) num_id_to_cat_id_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())} category_ids = cocoGt.getCatIds() label_list = { @@ -32,14 +31,13 @@ def infer(): image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num - model = model_builder.FasterRCNN( + model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=False, is_train=False) model.build_model(image_shape) - rpn_rois, confs, locs = model.eval_bbox_out() - pred_boxes = model.eval() + pred_boxes = model.eval_bbox_out() if cfg.MASK_ON: masks = model.eval_mask_out() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() @@ -56,20 +54,17 @@ def infer(): dts_res = [] segms_res = [] if cfg.MASK_ON: - fetch_list = [rpn_rois, confs, locs, pred_boxes, masks] + fetch_list = [pred_boxes, masks] else: - fetch_list = [rpn_rois, confs, locs] + fetch_list = [pred_boxes] data = next(infer_reader()) im_info = [data[0][1]] result = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data), return_numpy=False) - rpn_rois_v = result[0] - confs_v = result[1] - locs_v = result[2] + pred_boxes_v = result[0] if cfg.MASK_ON: - pred_boxes_v = result[3] - masks_v = result[4] + masks_v = result[1] new_lod = pred_boxes_v.lod() nmsed_out = pred_boxes_v path = os.path.join(cfg.image_path, cfg.image_name) diff --git a/fluid/PaddleCV/rcnn/models/model_builder.py b/fluid/PaddleCV/rcnn/models/model_builder.py index eea0e16a..9cd4d786 100644 --- a/fluid/PaddleCV/rcnn/models/model_builder.py +++ b/fluid/PaddleCV/rcnn/models/model_builder.py @@ -23,7 +23,7 @@ import cPickle as cp import numpy as np -class FasterRCNN(object): +class RCNN(object): def __init__(self, add_conv_body_func=None, add_roi_box_head_func=None, @@ -64,14 +64,10 @@ class FasterRCNN(object): rloss = [loss] + losses return rloss, rkeys - def eval_bbox_out(self): - cls_prob = fluid.layers.softmax(self.cls_score, use_cudnn=False) - return [self.rpn_rois, cls_prob, self.bbox_pred] - def eval_mask_out(self): return self.mask_fcn_logits - def eval(self): + def eval_bbox_out(self): return self.pred_result def build_input(self, image_shape): @@ -131,6 +127,33 @@ class FasterRCNN(object): self.im_id, self.gt_masks ] + def eval_bbox(self): + self.im_scale = fluid.layers.slice( + self.im_info, [1], starts=[2], ends=[3]) + im_scale_lod = fluid.layers.sequence_expand(self.im_scale, + self.rpn_rois) + boxes = self.rpn_rois / im_scale_lod + cls_prob = fluid.layers.softmax(self.cls_score, use_cudnn=False) + bbox_pred_reshape = fluid.layers.reshape(self.bbox_pred, + (-1, cfg.class_num, 4)) + decoded_box = fluid.layers.box_coder( + prior_box=boxes, + prior_box_var=cfg.bbox_reg_weights, + target_box=bbox_pred_reshape, + code_type='decode_center_size', + box_normalized=False, + axis=1) + cliped_box = fluid.layers.box_clip( + input=decoded_box, im_info=self.im_info) + self.pred_result = fluid.layers.multiclass_nms( + bboxes=cliped_box, + scores=cls_prob, + score_threshold=cfg.TEST.score_thresh, + nms_top_k=-1, + nms_threshold=cfg.TEST.nms_thresh, + keep_top_k=cfg.TEST.detections_per_im, + normalized=False) + def rpn_heads(self, rpn_input): # RPN hidden representation dim_out = rpn_input.shape[1] @@ -275,6 +298,8 @@ class FasterRCNN(object): name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) + if not self.is_train: + self.eval_bbox() def SuffixNet(self, conv5): mask_out = fluid.layers.conv2d_transpose( @@ -313,30 +338,7 @@ class FasterRCNN(object): self.roi_has_mask_int32) self.mask_fcn_logits = self.SuffixNet(conv5) else: - im_scale = fluid.layers.slice( - self.im_info, [1], starts=[2], ends=[3]) - im_scale_lod = fluid.layers.sequence_expand(im_scale, self.rpn_rois) - boxes = self.rpn_rois / im_scale_lod - cls_prob = fluid.layers.softmax(self.cls_score, use_cudnn=False) - bbox_pred_reshape = fluid.layers.reshape(self.bbox_pred, - (-1, cfg.class_num, 4)) - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - prior_box_var=cfg.bbox_reg_weights, - target_box=bbox_pred_reshape, - code_type='decode_center_size', - box_normalized=False, - axis=1) - cliped_box = fluid.layers.box_clip( - input=decoded_box, im_info=self.im_info) - self.pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=cfg.TEST.score_thresh, - nms_top_k=-1, - nms_threshold=cfg.TEST.nms_thresh, - keep_top_k=cfg.TEST.detections_per_im, - normalized=False) + self.eval_bbox() pred_res_shape = fluid.layers.shape(self.pred_result) shape = fluid.layers.reduce_prod(pred_res_shape) shape = fluid.layers.reshape(shape, [1, 1]) @@ -351,7 +353,7 @@ class FasterRCNN(object): pred_res = ie.input(self.pred_result) pred_boxes = fluid.layers.slice( pred_res, [1], starts=[2], ends=[6]) - im_scale_lod = fluid.layers.sequence_expand(im_scale, + im_scale_lod = fluid.layers.sequence_expand(self.im_scale, pred_boxes) mask_rois = pred_boxes * im_scale_lod conv5 = self.add_roi_box_head_func(mask_input, mask_rois) diff --git a/fluid/PaddleCV/rcnn/profile.py b/fluid/PaddleCV/rcnn/profile.py index b19d51b6..92f089b4 100644 --- a/fluid/PaddleCV/rcnn/profile.py +++ b/fluid/PaddleCV/rcnn/profile.py @@ -37,7 +37,7 @@ def train(): devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch - model = model_builder.FasterRCNN( + model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=cfg.use_pyreader, diff --git a/fluid/PaddleCV/rcnn/reader.py b/fluid/PaddleCV/rcnn/reader.py index bdfab3d3..a15e774d 100644 --- a/fluid/PaddleCV/rcnn/reader.py +++ b/fluid/PaddleCV/rcnn/reader.py @@ -36,7 +36,7 @@ def roidb_reader(roidb, mode): im_height = np.round(roidb['height'] * im_scales) im_width = np.round(roidb['width'] * im_scales) im_info = np.array([im_height, im_width, im_scales], dtype=np.float32) - if mode == 'test' or mode == 'infer': + if mode == 'val' or mode == 'infer': return im, im_info, im_id gt_boxes = roidb['gt_boxes'].astype('float32') @@ -74,31 +74,12 @@ def coco(mode, total_batch_size=None, padding_total=False, shuffle=False): - if 'coco2014' in cfg.dataset: - cfg.train_file_list = 'annotations/instances_train2014.json' - cfg.train_data_dir = 'train2014' - cfg.val_file_list = 'annotations/instances_val2014.json' - cfg.val_data_dir = 'val2014' - elif 'coco2017' in cfg.dataset: - cfg.train_file_list = 'annotations/instances_train2017.json' - cfg.train_data_dir = 'train2017' - cfg.val_file_list = 'annotations/instances_val2017.json' - cfg.val_data_dir = 'val2017' - else: - raise NotImplementedError('Dataset {} not supported'.format( - cfg.dataset)) cfg.mean_value = np.array(cfg.pixel_means)[np.newaxis, np.newaxis, :].astype('float32') total_batch_size = total_batch_size if total_batch_size else batch_size if mode != 'infer': assert total_batch_size % batch_size == 0 - if mode == 'train': - cfg.train_file_list = os.path.join(cfg.data_dir, cfg.train_file_list) - cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir) - elif mode == 'test' or mode == 'infer': - cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list) - cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir) - json_dataset = JsonDataset(train=(mode == 'train')) + json_dataset = JsonDataset(mode) roidbs = json_dataset.get_roidb() print("{} on {} with {} roidbs".format(mode, cfg.dataset, len(roidbs))) @@ -166,7 +147,7 @@ def coco(mode, iter_id = count // device_num if iter_id >= cfg.max_iter: return - elif mode == "test": + elif mode == "val": batch_out = [] for roidb in roidbs: im, im_info, im_id = roidb_reader(roidb, mode) @@ -194,7 +175,7 @@ def train(batch_size, total_batch_size=None, padding_total=False, shuffle=True): def test(batch_size, total_batch_size=None, padding_total=False): - return coco('test', batch_size, total_batch_size, shuffle=False) + return coco('val', batch_size, total_batch_size, shuffle=False) def infer(): diff --git a/fluid/PaddleCV/rcnn/roidbs.py b/fluid/PaddleCV/rcnn/roidbs.py index accc4f61..bd7e5819 100644 --- a/fluid/PaddleCV/rcnn/roidbs.py +++ b/fluid/PaddleCV/rcnn/roidbs.py @@ -42,19 +42,33 @@ from config import cfg logger = logging.getLogger(__name__) +class DatasetPath(object): + def __init__(self, mode): + self.mode = mode + mode_name = 'train' if mode == 'train' else 'val' + if cfg.dataset != 'coco2014' and cfg.dataset != 'coco2017': + raise NotImplementedError('Dataset {} not supported'.format( + cfg.dataset)) + self.sub_name = mode_name + cfg.dataset[-4:] + + def get_data_dir(self): + return os.path.join(cfg.data_dir, self.sub_name) + + def get_file_list(self): + sfile_list = 'annotations/instances_' + self.sub_name + '.json' + return os.path.join(cfg.data_dir, sfile_list) + + class JsonDataset(object): """A class representing a COCO json dataset.""" - def __init__(self, train=False): + def __init__(self, mode): print('Creating: {}'.format(cfg.dataset)) self.name = cfg.dataset - self.is_train = train - if self.is_train: - data_dir = cfg.train_data_dir - file_list = cfg.train_file_list - else: - data_dir = cfg.val_data_dir - file_list = cfg.val_file_list + self.is_train = mode == 'train' + data_path = DatasetPath(mode) + data_dir = data_path.get_data_dir() + file_list = data_path.get_file_list() self.image_directory = data_dir self.COCO = COCO(file_list) # Set up dataset classes diff --git a/fluid/PaddleCV/rcnn/train.py b/fluid/PaddleCV/rcnn/train.py index 46ee4c40..8404de31 100644 --- a/fluid/PaddleCV/rcnn/train.py +++ b/fluid/PaddleCV/rcnn/train.py @@ -36,7 +36,7 @@ def train(): learning_rate = cfg.learning_rate image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] - if cfg.debug or cfg.enable_ce: + if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random @@ -50,7 +50,7 @@ def train(): use_random = True if cfg.enable_ce: use_random = False - model = model_builder.FasterRCNN( + model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=cfg.use_pyreader, @@ -131,8 +131,8 @@ def train(): stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() - strs = '{}, lr: {:.5f}, {}, time: {:.3f}'.format( - now_time(), + strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( + now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() @@ -164,8 +164,8 @@ def train(): stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() - strs = '{}, lr: {:.5f}, {}, time: {:.3f}'.format( - now_time(), + strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( + now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() diff --git a/fluid/PaddleCV/rcnn/utility.py b/fluid/PaddleCV/rcnn/utility.py index 2dbe74f6..7948bc13 100644 --- a/fluid/PaddleCV/rcnn/utility.py +++ b/fluid/PaddleCV/rcnn/utility.py @@ -148,7 +148,7 @@ def parse_args(): add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.") add_arg('rpn_stride', float, [16.,16.], "Stride of the feature map that RPN is attached.") add_arg('rpn_nms_thresh', float, 0.7, "NMS threshold used on RPN proposals") - # TRAIN TEST INFER + # TRAIN VAL INFER add_arg('MASK_ON', bool, False, "Option for different models. If False, choose faster_rcnn. If True, choose mask_rcnn") add_arg('im_per_batch', int, 1, "Minibatch size.") add_arg('max_size', int, 1333, "The resized image height.") @@ -158,7 +158,6 @@ def parse_args(): add_arg('nms_thresh', float, 0.5, "NMS threshold.") add_arg('score_thresh', float, 0.05, "score threshold for NMS.") add_arg('snapshot_stride', int, 10000, "save model every snapshot stride.") - add_arg('debug', bool, False, "Debug mode") # SINGLE EVAL AND DRAW add_arg('draw_threshold', float, 0.8, "Confidence threshold to draw bbox.") add_arg('image_path', str, 'dataset/coco/val2017', "The image path used to inference and visualize.") @@ -172,5 +171,5 @@ def parse_args(): if 'train' in file_name or 'profile' in file_name: merge_cfg_from_args(args, 'train') else: - merge_cfg_from_args(args, 'test') + merge_cfg_from_args(args, 'val') return args -- GitLab