diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 0ccf701b61349274ce0627dfeaf7cfad384215cd..716c8625d35308f98582e6802e90d99d643e188b 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -51,7 +51,8 @@ class DetectionMAPOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(label_dims.size(), 2, "The rank of Input(Label) must be 2, " "the shape is [N, 6]."); - PADDLE_ENFORCE_EQ(label_dims[1], 6, "The shape is of Input(Label) [N, 6]."); + PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5, + "The shape of Input(Label) is [N, 6] or [N, 5]."); if (ctx->HasInput("PosCount")) { PADDLE_ENFORCE(ctx->HasInput("TruePos"), @@ -88,9 +89,10 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " "no detected data."); AddInput("Label", - "(LoDTensor) A 2-D LoDTensor with shape[N, 6] represents the" + "(LoDTensor) A 2-D LoDTensor represents the" "Labeled ground-truth data. Each row has 6 values: " - "[label, is_difficult, xmin, ymin, xmax, ymax], N is the total " + "[label, xmin, ymin, xmax, ymax, is_difficult] or 5 values: " + "[label, xmin, ymin, xmax, ymax], where N is the total " "number of ground-truth data in this mini-batch. For each " "instance, the offsets in first dimension are called LoD, " "the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, " diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index 431812e2bfcf926cadf8d7be6a7d1a79e78c7762..dd1ab85fd8d0c8170afcd9dd2a49ee55c41dc8be 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -72,7 +72,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto* out_false_pos = ctx.Output("AccumFalsePos"); float overlap_threshold = ctx.Attr("overlap_threshold"); - float evaluate_difficult = ctx.Attr("evaluate_difficult"); + bool evaluate_difficult = ctx.Attr("evaluate_difficult"); auto ap_type = GetAPType(ctx.Attr("ap_type")); int class_num = ctx.Attr("class_num"); @@ -175,14 +175,20 @@ class DetectionMAPOpKernel : public framework::OpKernel { for (int n = 0; n < batch_size; ++n) { std::map> boxes; for (size_t i = label_index[n]; i < label_index[n + 1]; ++i) { - Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5)); int label = labels(i, 0); - auto is_difficult = labels(i, 1); - if (std::abs(is_difficult - 0.0) < 1e-6) - box.is_difficult = false; - else - box.is_difficult = true; - boxes[label].push_back(box); + if (input_label.dims()[1] == 6) { + Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5)); + auto is_difficult = labels(i, 1); + if (std::abs(is_difficult - 0.0) < 1e-6) + box.is_difficult = false; + else + box.is_difficult = true; + boxes[label].push_back(box); + } else { + PADDLE_ENFORCE_EQ(input_label.dims()[1], 5); + Box box(labels(i, 1), labels(i, 2), labels(i, 3), labels(i, 4)); + boxes[label].push_back(box); + } } gt_boxes->push_back(boxes); } diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py index 1ee1d3727174c079d2c217dede27ff1a0316c01c..7c6ad6f27dcfd7040f79c72c01413c8cc84a28ba 100644 --- a/python/paddle/fluid/evaluator.py +++ b/python/paddle/fluid/evaluator.py @@ -273,10 +273,11 @@ class DetectionMAP(Evaluator): [M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax]. gt_label (Variable): The ground truth label index, which is a LoDTensor with shape [N, 1]. - gt_difficult (Variable): Whether this ground truth is a difficult - bounding box (bbox), which is a LoDTensor [N, 1]. gt_box (Variable): The ground truth bounding box (bbox), which is a LoDTensor with shape [N, 6]. The layout is [xmin, ymin, xmax, ymax]. + gt_difficult (Variable|None): Whether this ground truth is a difficult + bounding bbox, which can be a LoDTensor [N, 1] or not set. If None, + it means all the ground truth labels are not difficult bbox. class_num (int): The class number. background_label (int): The index of background label, the background label will be ignored. If set to -1, then all categories will be @@ -284,7 +285,8 @@ class DetectionMAP(Evaluator): overlap_threshold (float): The threshold for deciding true/false positive, 0.5 by defalut. evaluate_difficult (bool): Whether to consider difficult ground truth - for evaluation, True by defalut. + for evaluation, True by defalut. This argument does not work when + gt_difficult is None. ap_version (string): The average precision calculation ways, it must be 'integral' or '11point'. Please check https://sanchom.wordpress.com/tag/average-precision/ for details. @@ -295,7 +297,7 @@ class DetectionMAP(Evaluator): exe = fluid.executor(place) map_evaluator = fluid.Evaluator.DetectionMAP(input, - gt_label, gt_difficult, gt_box) + gt_label, gt_box, gt_difficult) cur_map, accum_map = map_evaluator.get_map_var() fetch = [cost, cur_map, accum_map] for epoch in PASS_NUM: @@ -313,8 +315,8 @@ class DetectionMAP(Evaluator): input, gt_label, gt_box, - gt_difficult, - class_num, + gt_difficult=None, + class_num=None, background_label=0, overlap_threshold=0.5, evaluate_difficult=True, @@ -322,8 +324,11 @@ class DetectionMAP(Evaluator): super(DetectionMAP, self).__init__("map_eval") gt_label = layers.cast(x=gt_label, dtype=gt_box.dtype) - gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype) - label = layers.concat([gt_label, gt_difficult, gt_box], axis=1) + if gt_difficult: + gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype) + label = layers.concat([gt_label, gt_difficult, gt_box], axis=1) + else: + label = layers.concat([gt_label, gt_box], axis=1) # calculate mean average precision (mAP) of current mini-batch map = layers.detection_map( diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index b33adf55cf1ded9795043e108f5814d3fc0e3ded..3a83db12fd13651578deeac6b562bac2f1e4e4b6 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -569,7 +569,7 @@ def prior_box(input, image, min_sizes, max_sizes=None, - aspect_ratios=None, + aspect_ratios=[1.], variance=[0.1, 0.1, 0.2, 0.2], flip=False, clip=False, @@ -589,19 +589,19 @@ def prior_box(input, input(Variable): The Input Variables, the format is NCHW. image(Variable): The input image data of PriorBoxOp, the layout is NCHW. - min_sizes(list|tuple): min sizes of generated prior boxes. + min_sizes(list|tuple|float value): min sizes of generated prior boxes. max_sizes(list|tuple|None): max sizes of generated prior boxes. Default: None. - aspect_ratios(list|tuple): the aspect ratios of generated prior - boxes. Default: None. + aspect_ratios(list|tuple|float value): the aspect ratios of generated + prior boxes. Default: [1.]. variance(list|tuple): the variances to be encoded in prior boxes. Default:[0.1, 0.1, 0.2, 0.2]. flip(bool): Whether to flip aspect ratios. Default:False. clip(bool): Whether to clip out-of-boundary boxes. Default: False. - step(list|turple): Prior boxes step across weight and height, If + step(list|turple): Prior boxes step across width and height, If step[0] == 0.0/step[1] == 0.0, the prior boxes step across - height/weight of the input will be automatically calculated. - Default: [0.0] + height/weight of the input will be automatically calculated. + Default: [0., 0.] offset(float): Prior boxes center offset. Default: 0.5 name(str): Name of the prior box op. Default: None. @@ -630,6 +630,21 @@ def prior_box(input, helper = LayerHelper("prior_box", **locals()) dtype = helper.input_dtype() + def _is_list_or_tuple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if not _is_list_or_tuple_(min_sizes): + min_sizes = [min_sizes] + if not _is_list_or_tuple_(aspect_ratios): + aspect_ratios = [aspect_ratios] + if not (_is_list_or_tuple_(steps) and len(steps) == 2): + raise ValueError('steps should be a list or tuple ', + 'with length 2, (step_width, step_height).') + + min_sizes = list(map(float, min_sizes)) + aspect_ratios = list(map(float, aspect_ratios)) + steps = list(map(float, steps)) + attrs = { 'min_sizes': min_sizes, 'aspect_ratios': aspect_ratios, @@ -641,6 +656,8 @@ def prior_box(input, 'offset': offset } if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0: + if not _is_list_or_tuple_(max_sizes): + max_sizes = [max_sizes] attrs['max_sizes'] = max_sizes box = helper.create_tmp_variable(dtype) diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py index a905a854ad157ffa3d7816dfbd445f3e344a1249..f545ad155ccd28c2d34e424d307eed49b37f20fb 100644 --- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py +++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py @@ -160,7 +160,9 @@ class TestDetectionMAPOp(OpTest): label_count, true_pos, false_pos = get_input_pos( self.class_pos_count, self.true_pos, self.true_pos_lod, self.false_pos, self.false_pos_lod) - for (label, difficult, xmin, ymin, xmax, ymax) in self.label: + for v in self.label: + label = v[0] + difficult = False if len(v) == 5 else v[1] if self.evaluate_difficult: label_count[label] += 1 elif not difficult: @@ -245,6 +247,15 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp): [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]] +class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp): + def init_test_case(self): + super(TestDetectionMAPOpWithoutDiff, self).init_test_case() + + # label xmin ymin xmax ymax + self.label = [[1, 0.1, 0.1, 0.3, 0.3], [1, 0.6, 0.6, 0.8, 0.8], + [2, 0.3, 0.3, 0.6, 0.5], [1, 0.7, 0.1, 0.9, 0.3]] + + class TestDetectionMAPOp11Point(TestDetectionMAPOp): def init_test_case(self): super(TestDetectionMAPOp11Point, self).init_test_case()