未验证 提交 2a77fc50 编写于 作者: Q qingqing01 提交者: GitHub

Enhance detection_map_op and more check in prior_box API. (#10796)

1. If all bboxes are not difficult ground truth, the users can not define the data layer for this flag and not the input can be None for detection_map API.
2. Set default value for aspect_ratios in prior_box API.
3. Add more check in prior_box API.
上级 be26b71b
...@@ -51,7 +51,8 @@ class DetectionMAPOp : public framework::OperatorWithKernel { ...@@ -51,7 +51,8 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(label_dims.size(), 2, PADDLE_ENFORCE_EQ(label_dims.size(), 2,
"The rank of Input(Label) must be 2, " "The rank of Input(Label) must be 2, "
"the shape is [N, 6]."); "the shape is [N, 6].");
PADDLE_ENFORCE_EQ(label_dims[1], 6, "The shape is of Input(Label) [N, 6]."); PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5,
"The shape of Input(Label) is [N, 6] or [N, 5].");
if (ctx->HasInput("PosCount")) { if (ctx->HasInput("PosCount")) {
PADDLE_ENFORCE(ctx->HasInput("TruePos"), PADDLE_ENFORCE(ctx->HasInput("TruePos"),
...@@ -88,9 +89,10 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -88,9 +89,10 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"no detected data."); "no detected data.");
AddInput("Label", AddInput("Label",
"(LoDTensor) A 2-D LoDTensor with shape[N, 6] represents the" "(LoDTensor) A 2-D LoDTensor represents the"
"Labeled ground-truth data. Each row has 6 values: " "Labeled ground-truth data. Each row has 6 values: "
"[label, is_difficult, xmin, ymin, xmax, ymax], N is the total " "[label, xmin, ymin, xmax, ymax, is_difficult] or 5 values: "
"[label, xmin, ymin, xmax, ymax], where N is the total "
"number of ground-truth data in this mini-batch. For each " "number of ground-truth data in this mini-batch. For each "
"instance, the offsets in first dimension are called LoD, " "instance, the offsets in first dimension are called LoD, "
"the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, " "the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, "
......
...@@ -72,7 +72,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> { ...@@ -72,7 +72,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto* out_false_pos = ctx.Output<framework::LoDTensor>("AccumFalsePos"); auto* out_false_pos = ctx.Output<framework::LoDTensor>("AccumFalsePos");
float overlap_threshold = ctx.Attr<float>("overlap_threshold"); float overlap_threshold = ctx.Attr<float>("overlap_threshold");
float evaluate_difficult = ctx.Attr<bool>("evaluate_difficult"); bool evaluate_difficult = ctx.Attr<bool>("evaluate_difficult");
auto ap_type = GetAPType(ctx.Attr<std::string>("ap_type")); auto ap_type = GetAPType(ctx.Attr<std::string>("ap_type"));
int class_num = ctx.Attr<int>("class_num"); int class_num = ctx.Attr<int>("class_num");
...@@ -175,14 +175,20 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> { ...@@ -175,14 +175,20 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
std::map<int, std::vector<Box>> boxes; std::map<int, std::vector<Box>> boxes;
for (size_t i = label_index[n]; i < label_index[n + 1]; ++i) { for (size_t i = label_index[n]; i < label_index[n + 1]; ++i) {
Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5));
int label = labels(i, 0); int label = labels(i, 0);
if (input_label.dims()[1] == 6) {
Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5));
auto is_difficult = labels(i, 1); auto is_difficult = labels(i, 1);
if (std::abs(is_difficult - 0.0) < 1e-6) if (std::abs(is_difficult - 0.0) < 1e-6)
box.is_difficult = false; box.is_difficult = false;
else else
box.is_difficult = true; box.is_difficult = true;
boxes[label].push_back(box); boxes[label].push_back(box);
} else {
PADDLE_ENFORCE_EQ(input_label.dims()[1], 5);
Box box(labels(i, 1), labels(i, 2), labels(i, 3), labels(i, 4));
boxes[label].push_back(box);
}
} }
gt_boxes->push_back(boxes); gt_boxes->push_back(boxes);
} }
......
...@@ -273,10 +273,11 @@ class DetectionMAP(Evaluator): ...@@ -273,10 +273,11 @@ class DetectionMAP(Evaluator):
[M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax]. [M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax].
gt_label (Variable): The ground truth label index, which is a LoDTensor gt_label (Variable): The ground truth label index, which is a LoDTensor
with shape [N, 1]. with shape [N, 1].
gt_difficult (Variable): Whether this ground truth is a difficult
bounding box (bbox), which is a LoDTensor [N, 1].
gt_box (Variable): The ground truth bounding box (bbox), which is a gt_box (Variable): The ground truth bounding box (bbox), which is a
LoDTensor with shape [N, 6]. The layout is [xmin, ymin, xmax, ymax]. LoDTensor with shape [N, 6]. The layout is [xmin, ymin, xmax, ymax].
gt_difficult (Variable|None): Whether this ground truth is a difficult
bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
it means all the ground truth labels are not difficult bbox.
class_num (int): The class number. class_num (int): The class number.
background_label (int): The index of background label, the background background_label (int): The index of background label, the background
label will be ignored. If set to -1, then all categories will be label will be ignored. If set to -1, then all categories will be
...@@ -284,7 +285,8 @@ class DetectionMAP(Evaluator): ...@@ -284,7 +285,8 @@ class DetectionMAP(Evaluator):
overlap_threshold (float): The threshold for deciding true/false overlap_threshold (float): The threshold for deciding true/false
positive, 0.5 by defalut. positive, 0.5 by defalut.
evaluate_difficult (bool): Whether to consider difficult ground truth evaluate_difficult (bool): Whether to consider difficult ground truth
for evaluation, True by defalut. for evaluation, True by defalut. This argument does not work when
gt_difficult is None.
ap_version (string): The average precision calculation ways, it must be ap_version (string): The average precision calculation ways, it must be
'integral' or '11point'. Please check 'integral' or '11point'. Please check
https://sanchom.wordpress.com/tag/average-precision/ for details. https://sanchom.wordpress.com/tag/average-precision/ for details.
...@@ -295,7 +297,7 @@ class DetectionMAP(Evaluator): ...@@ -295,7 +297,7 @@ class DetectionMAP(Evaluator):
exe = fluid.executor(place) exe = fluid.executor(place)
map_evaluator = fluid.Evaluator.DetectionMAP(input, map_evaluator = fluid.Evaluator.DetectionMAP(input,
gt_label, gt_difficult, gt_box) gt_label, gt_box, gt_difficult)
cur_map, accum_map = map_evaluator.get_map_var() cur_map, accum_map = map_evaluator.get_map_var()
fetch = [cost, cur_map, accum_map] fetch = [cost, cur_map, accum_map]
for epoch in PASS_NUM: for epoch in PASS_NUM:
...@@ -313,8 +315,8 @@ class DetectionMAP(Evaluator): ...@@ -313,8 +315,8 @@ class DetectionMAP(Evaluator):
input, input,
gt_label, gt_label,
gt_box, gt_box,
gt_difficult, gt_difficult=None,
class_num, class_num=None,
background_label=0, background_label=0,
overlap_threshold=0.5, overlap_threshold=0.5,
evaluate_difficult=True, evaluate_difficult=True,
...@@ -322,8 +324,11 @@ class DetectionMAP(Evaluator): ...@@ -322,8 +324,11 @@ class DetectionMAP(Evaluator):
super(DetectionMAP, self).__init__("map_eval") super(DetectionMAP, self).__init__("map_eval")
gt_label = layers.cast(x=gt_label, dtype=gt_box.dtype) gt_label = layers.cast(x=gt_label, dtype=gt_box.dtype)
if gt_difficult:
gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype) gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype)
label = layers.concat([gt_label, gt_difficult, gt_box], axis=1) label = layers.concat([gt_label, gt_difficult, gt_box], axis=1)
else:
label = layers.concat([gt_label, gt_box], axis=1)
# calculate mean average precision (mAP) of current mini-batch # calculate mean average precision (mAP) of current mini-batch
map = layers.detection_map( map = layers.detection_map(
......
...@@ -569,7 +569,7 @@ def prior_box(input, ...@@ -569,7 +569,7 @@ def prior_box(input,
image, image,
min_sizes, min_sizes,
max_sizes=None, max_sizes=None,
aspect_ratios=None, aspect_ratios=[1.],
variance=[0.1, 0.1, 0.2, 0.2], variance=[0.1, 0.1, 0.2, 0.2],
flip=False, flip=False,
clip=False, clip=False,
...@@ -589,19 +589,19 @@ def prior_box(input, ...@@ -589,19 +589,19 @@ def prior_box(input,
input(Variable): The Input Variables, the format is NCHW. input(Variable): The Input Variables, the format is NCHW.
image(Variable): The input image data of PriorBoxOp, image(Variable): The input image data of PriorBoxOp,
the layout is NCHW. the layout is NCHW.
min_sizes(list|tuple): min sizes of generated prior boxes. min_sizes(list|tuple|float value): min sizes of generated prior boxes.
max_sizes(list|tuple|None): max sizes of generated prior boxes. max_sizes(list|tuple|None): max sizes of generated prior boxes.
Default: None. Default: None.
aspect_ratios(list|tuple): the aspect ratios of generated prior aspect_ratios(list|tuple|float value): the aspect ratios of generated
boxes. Default: None. prior boxes. Default: [1.].
variance(list|tuple): the variances to be encoded in prior boxes. variance(list|tuple): the variances to be encoded in prior boxes.
Default:[0.1, 0.1, 0.2, 0.2]. Default:[0.1, 0.1, 0.2, 0.2].
flip(bool): Whether to flip aspect ratios. Default:False. flip(bool): Whether to flip aspect ratios. Default:False.
clip(bool): Whether to clip out-of-boundary boxes. Default: False. clip(bool): Whether to clip out-of-boundary boxes. Default: False.
step(list|turple): Prior boxes step across weight and height, If step(list|turple): Prior boxes step across width and height, If
step[0] == 0.0/step[1] == 0.0, the prior boxes step across step[0] == 0.0/step[1] == 0.0, the prior boxes step across
height/weight of the input will be automatically calculated. height/weight of the input will be automatically calculated.
Default: [0.0] Default: [0., 0.]
offset(float): Prior boxes center offset. Default: 0.5 offset(float): Prior boxes center offset. Default: 0.5
name(str): Name of the prior box op. Default: None. name(str): Name of the prior box op. Default: None.
...@@ -630,6 +630,21 @@ def prior_box(input, ...@@ -630,6 +630,21 @@ def prior_box(input,
helper = LayerHelper("prior_box", **locals()) helper = LayerHelper("prior_box", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if not _is_list_or_tuple_(min_sizes):
min_sizes = [min_sizes]
if not _is_list_or_tuple_(aspect_ratios):
aspect_ratios = [aspect_ratios]
if not (_is_list_or_tuple_(steps) and len(steps) == 2):
raise ValueError('steps should be a list or tuple ',
'with length 2, (step_width, step_height).')
min_sizes = list(map(float, min_sizes))
aspect_ratios = list(map(float, aspect_ratios))
steps = list(map(float, steps))
attrs = { attrs = {
'min_sizes': min_sizes, 'min_sizes': min_sizes,
'aspect_ratios': aspect_ratios, 'aspect_ratios': aspect_ratios,
...@@ -641,6 +656,8 @@ def prior_box(input, ...@@ -641,6 +656,8 @@ def prior_box(input,
'offset': offset 'offset': offset
} }
if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0: if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0:
if not _is_list_or_tuple_(max_sizes):
max_sizes = [max_sizes]
attrs['max_sizes'] = max_sizes attrs['max_sizes'] = max_sizes
box = helper.create_tmp_variable(dtype) box = helper.create_tmp_variable(dtype)
......
...@@ -160,7 +160,9 @@ class TestDetectionMAPOp(OpTest): ...@@ -160,7 +160,9 @@ class TestDetectionMAPOp(OpTest):
label_count, true_pos, false_pos = get_input_pos( label_count, true_pos, false_pos = get_input_pos(
self.class_pos_count, self.true_pos, self.true_pos_lod, self.class_pos_count, self.true_pos, self.true_pos_lod,
self.false_pos, self.false_pos_lod) self.false_pos, self.false_pos_lod)
for (label, difficult, xmin, ymin, xmax, ymax) in self.label: for v in self.label:
label = v[0]
difficult = False if len(v) == 5 else v[1]
if self.evaluate_difficult: if self.evaluate_difficult:
label_count[label] += 1 label_count[label] += 1
elif not difficult: elif not difficult:
...@@ -245,6 +247,15 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp): ...@@ -245,6 +247,15 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp):
[2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]] [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp):
def init_test_case(self):
super(TestDetectionMAPOpWithoutDiff, self).init_test_case()
# label xmin ymin xmax ymax
self.label = [[1, 0.1, 0.1, 0.3, 0.3], [1, 0.6, 0.6, 0.8, 0.8],
[2, 0.3, 0.3, 0.6, 0.5], [1, 0.7, 0.1, 0.9, 0.3]]
class TestDetectionMAPOp11Point(TestDetectionMAPOp): class TestDetectionMAPOp11Point(TestDetectionMAPOp):
def init_test_case(self): def init_test_case(self):
super(TestDetectionMAPOp11Point, self).init_test_case() super(TestDetectionMAPOp11Point, self).init_test_case()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册