From b3d26cd3adb2a8979179a52b4765582bc23bc59f Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Mon, 12 Mar 2018 15:03:04 +0800 Subject: [PATCH] Fix bug in detection_output and mAP calculation in SSD. (#8985) * Clipping bbox in the mAP evaluator calculation. * Fix bug in detection_output and mAP calculation in SSD. * Fix bug in detection.py. * Fix bug in test_detection_map_op.py. --- paddle/fluid/operators/detection_map_op.h | 10 ++-- paddle/fluid/operators/prior_box_op.cc | 3 +- paddle/fluid/operators/prior_box_op.h | 45 ++++++---------- python/paddle/fluid/layers/detection.py | 51 ++++++++++--------- .../tests/unittests/test_detection_map_op.py | 2 - 5 files changed, 50 insertions(+), 61 deletions(-) diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index a009e9dfc..8c15bfa36 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -273,7 +273,6 @@ class DetectionMAPOpKernel : public framework::OpKernel { std::map>>& true_pos, std::map>>& false_pos, const int class_num) const { - constexpr T kEPS = static_cast(1e-6); const int* pos_count_data = input_pos_count.data(); for (int i = 0; i < class_num; ++i) { label_pos_count[i] = pos_count_data[i]; @@ -282,12 +281,11 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto SetData = [](const framework::LoDTensor& pos_tensor, std::map>>& pos) { const T* pos_data = pos_tensor.data(); - auto pos_data_lod = pos_tensor.lod(); - for (size_t i = 0; i < pos_data_lod.size(); ++i) { - for (size_t j = pos_data_lod[0][i]; j < pos_data_lod[0][i + 1]; ++j) { + auto pos_data_lod = pos_tensor.lod()[0]; + for (size_t i = 0; i < pos_data_lod.size() - 1; ++i) { + for (size_t j = pos_data_lod[i]; j < pos_data_lod[i + 1]; ++j) { T score = pos_data[j * 2]; - int flag = 1; - if (pos_data[j * 2 + 1] < kEPS) flag = 0; + int flag = pos_data[j * 2 + 1]; pos[i].push_back(std::make_pair(score, flag)); } } diff --git a/paddle/fluid/operators/prior_box_op.cc b/paddle/fluid/operators/prior_box_op.cc index be7898c22..7ba55437c 100644 --- a/paddle/fluid/operators/prior_box_op.cc +++ b/paddle/fluid/operators/prior_box_op.cc @@ -111,7 +111,8 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { }); AddAttr>( "max_sizes", - "(vector) List of max sizes of generated prior boxes."); + "(vector) List of max sizes of generated prior boxes.") + .SetDefault(std::vector{}); AddAttr>( "aspect_ratios", "(vector) List of aspect ratios of generated prior boxes."); diff --git a/paddle/fluid/operators/prior_box_op.h b/paddle/fluid/operators/prior_box_op.h index 0113d2f09..18bb2deb6 100644 --- a/paddle/fluid/operators/prior_box_op.h +++ b/paddle/fluid/operators/prior_box_op.h @@ -97,9 +97,6 @@ class PriorBoxOpKernel : public framework::OpKernel { boxes->mutable_data(ctx.GetPlace()); vars->mutable_data(ctx.GetPlace()); - T inv_img_width = 1.0 / img_width; - T inv_img_height = 1.0 / img_height; - auto e_boxes = framework::EigenTensor::From(*boxes); for (int h = 0; h < feature_height; ++h) { for (int w = 0; w < feature_width; ++w) { @@ -110,36 +107,30 @@ class PriorBoxOpKernel : public framework::OpKernel { for (size_t s = 0; s < min_sizes.size(); ++s) { auto min_size = min_sizes[s]; // first prior: aspect_ratio = 1, size = min_size - box_width = box_height = min_size; + box_width = box_height = min_size / 2.; // xmin - e_boxes(h, w, idx, 0) = (center_x - box_width * 0.5) * inv_img_width; + e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; // ymin - e_boxes(h, w, idx, 1) = - (center_y - box_height * 0.5) * inv_img_height; + e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; // xmax - e_boxes(h, w, idx, 2) = (center_x + box_width * 0.5) * inv_img_width; + e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; // ymax - e_boxes(h, w, idx, 3) = - (center_y + box_height * 0.5) * inv_img_height; + e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; idx++; if (max_sizes.size() > 0) { auto max_size = max_sizes[s]; // second prior: aspect_ratio = 1, // size = sqrt(min_size * max_size) - box_width = box_height = sqrt(min_size * max_size); + box_width = box_height = sqrt(min_size * max_size) / 2.; // xmin - e_boxes(h, w, idx, 0) = - (center_x - box_width * 0.5) * inv_img_width; + e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; // ymin - e_boxes(h, w, idx, 1) = - (center_y - box_height * 0.5) * inv_img_height; + e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; // xmax - e_boxes(h, w, idx, 2) = - (center_x + box_width * 0.5) * inv_img_width; + e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; // ymax - e_boxes(h, w, idx, 3) = - (center_y + box_height * 0.5) * inv_img_height; + e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; idx++; } @@ -149,20 +140,16 @@ class PriorBoxOpKernel : public framework::OpKernel { if (fabs(ar - 1.) < 1e-6) { continue; } - box_width = min_size * sqrt(ar); - box_height = min_size / sqrt(ar); + box_width = min_size * sqrt(ar) / 2.; + box_height = min_size / sqrt(ar) / 2.; // xmin - e_boxes(h, w, idx, 0) = - (center_x - box_width * 0.5) * inv_img_width; + e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width; // ymin - e_boxes(h, w, idx, 1) = - (center_y - box_height * 0.5) * inv_img_height; + e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height; // xmax - e_boxes(h, w, idx, 2) = - (center_x + box_width * 0.5) * inv_img_width; + e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width; // ymax - e_boxes(h, w, idx, 3) = - (center_y + box_height * 0.5) * inv_img_height; + e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height; idx++; } } diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 2bf7cf21c..ea189749b 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -130,8 +130,13 @@ def detection_output(loc, target_box=loc, code_type='decode_center_size') - nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) + old_shape = scores.shape + scores = ops.reshape(x=scores, shape=(-1, old_shape[-1])) + scores = ops.softmax(x=scores) + scores = ops.reshape(x=scores, shape=old_shape) scores = nn.transpose(scores, perm=[0, 2, 1]) + + nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) helper.append_op( type="multiclass_nms", inputs={'Scores': scores, @@ -562,16 +567,16 @@ def multi_box_head(inputs, base_size, num_classes, aspect_ratios, - min_ratio, - max_ratio, + min_ratio=None, + max_ratio=None, min_sizes=None, max_sizes=None, steps=None, step_w=None, step_h=None, offset=0.5, - variance=[0.1, 0.1, 0.1, 0.1], - flip=False, + variance=[0.1, 0.1, 0.2, 0.2], + flip=True, clip=False, kernel_size=1, pad=0, @@ -614,7 +619,7 @@ def multi_box_head(inputs, the inputs[i] will be automatically calculated. Default: None. offset(float): Prior boxes center offset. Default: 0.5 variance(list|tuple): the variances to be encoded in prior boxes. - Default:[0.1, 0.1, 0.1, 0.1]. + Default:[0.1, 0.1, 0.2, 0.2]. flip(bool): Whether to flip aspect ratios. Default:False. clip(bool): Whether to clip out-of-boundary boxes. Default: False. kernel_size(int): The kernel size of conv2d. Default: 1. @@ -668,6 +673,19 @@ def multi_box_head(inputs, helper = LayerHelper("prior_box", **locals()) dtype = helper.input_dtype() + attrs = { + 'min_sizes': min_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'flip': flip, + 'clip': clip, + 'step_w': step_w, + 'step_h': step_h, + 'offset': offset + } + if len(max_sizes) > 0 and max_sizes[0] > 0: + attrs['max_sizes'] = max_sizes + box = helper.create_tmp_variable(dtype) var = helper.create_tmp_variable(dtype) helper.append_op( @@ -676,17 +694,7 @@ def multi_box_head(inputs, "Image": image}, outputs={"Boxes": box, "Variances": var}, - attrs={ - 'min_sizes': min_sizes, - 'max_sizes': max_sizes, - 'aspect_ratios': aspect_ratios, - 'variances': variance, - 'flip': flip, - 'clip': clip, - 'step_w': step_w, - 'step_h': step_h, - 'offset': offset - }) + attrs=attrs, ) return box, var def _reshape_with_axis_(input, axis=1): @@ -714,7 +722,7 @@ def multi_box_head(inputs, if num_layer <= 2: assert min_sizes is not None and max_sizes is not None assert len(min_sizes) == num_layer and len(max_sizes) == num_layer - else: + elif min_sizes is None and max_sizes is None: min_sizes = [] max_sizes = [] step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) @@ -759,9 +767,6 @@ def multi_box_head(inputs, min_size = [min_size] if not _is_list_or_tuple_(max_size): max_size = [max_size] - if not (len(max_size) == len(min_size)): - raise ValueError( - 'the length of max_size and min_size should be equal.') aspect_ratio = [] if aspect_ratios is not None: @@ -779,7 +784,7 @@ def multi_box_head(inputs, num_boxes = box.shape[2] - # get box_loc + # get loc num_loc_output = num_boxes * 4 mbox_loc = nn.conv2d( input=input, @@ -796,7 +801,7 @@ def multi_box_head(inputs, mbox_loc_flatten = ops.reshape(mbox_loc, shape=new_shape) mbox_locs.append(mbox_loc_flatten) - # get conf_loc + # get conf num_conf_output = num_boxes * num_classes conf_loc = nn.conv2d( input=input, diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py index f3197a623..a905a854a 100644 --- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py +++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py @@ -166,8 +166,6 @@ class TestDetectionMAPOp(OpTest): elif not difficult: label_count[label] += 1 - true_pos = collections.defaultdict(list) - false_pos = collections.defaultdict(list) for (label, score, tp, fp) in tf_pos: true_pos[label].append([score, tp]) false_pos[label].append([score, fp]) -- GitLab