未验证 提交 2a77fc50 编写于 作者: Q qingqing01 提交者: GitHub

Enhance detection_map_op and more check in prior_box API. (#10796)

1. If all bboxes are not difficult ground truth, the users can not define the data layer for this flag and not the input can be None for detection_map API.
2. Set default value for aspect_ratios in prior_box API.
3. Add more check in prior_box API.
上级 be26b71b
......@@ -51,7 +51,8 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(label_dims.size(), 2,
"The rank of Input(Label) must be 2, "
"the shape is [N, 6].");
PADDLE_ENFORCE_EQ(label_dims[1], 6, "The shape is of Input(Label) [N, 6].");
PADDLE_ENFORCE(label_dims[1] == 6 || label_dims[1] == 5,
"The shape of Input(Label) is [N, 6] or [N, 5].");
if (ctx->HasInput("PosCount")) {
PADDLE_ENFORCE(ctx->HasInput("TruePos"),
......@@ -88,9 +89,10 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"no detected data.");
AddInput("Label",
"(LoDTensor) A 2-D LoDTensor with shape[N, 6] represents the"
"(LoDTensor) A 2-D LoDTensor represents the"
"Labeled ground-truth data. Each row has 6 values: "
"[label, is_difficult, xmin, ymin, xmax, ymax], N is the total "
"[label, xmin, ymin, xmax, ymax, is_difficult] or 5 values: "
"[label, xmin, ymin, xmax, ymax], where N is the total "
"number of ground-truth data in this mini-batch. For each "
"instance, the offsets in first dimension are called LoD, "
"the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, "
......
......@@ -72,7 +72,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto* out_false_pos = ctx.Output<framework::LoDTensor>("AccumFalsePos");
float overlap_threshold = ctx.Attr<float>("overlap_threshold");
float evaluate_difficult = ctx.Attr<bool>("evaluate_difficult");
bool evaluate_difficult = ctx.Attr<bool>("evaluate_difficult");
auto ap_type = GetAPType(ctx.Attr<std::string>("ap_type"));
int class_num = ctx.Attr<int>("class_num");
......@@ -175,14 +175,20 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
for (int n = 0; n < batch_size; ++n) {
std::map<int, std::vector<Box>> boxes;
for (size_t i = label_index[n]; i < label_index[n + 1]; ++i) {
Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5));
int label = labels(i, 0);
auto is_difficult = labels(i, 1);
if (std::abs(is_difficult - 0.0) < 1e-6)
box.is_difficult = false;
else
box.is_difficult = true;
boxes[label].push_back(box);
if (input_label.dims()[1] == 6) {
Box box(labels(i, 2), labels(i, 3), labels(i, 4), labels(i, 5));
auto is_difficult = labels(i, 1);
if (std::abs(is_difficult - 0.0) < 1e-6)
box.is_difficult = false;
else
box.is_difficult = true;
boxes[label].push_back(box);
} else {
PADDLE_ENFORCE_EQ(input_label.dims()[1], 5);
Box box(labels(i, 1), labels(i, 2), labels(i, 3), labels(i, 4));
boxes[label].push_back(box);
}
}
gt_boxes->push_back(boxes);
}
......
......@@ -273,10 +273,11 @@ class DetectionMAP(Evaluator):
[M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax].
gt_label (Variable): The ground truth label index, which is a LoDTensor
with shape [N, 1].
gt_difficult (Variable): Whether this ground truth is a difficult
bounding box (bbox), which is a LoDTensor [N, 1].
gt_box (Variable): The ground truth bounding box (bbox), which is a
LoDTensor with shape [N, 6]. The layout is [xmin, ymin, xmax, ymax].
gt_difficult (Variable|None): Whether this ground truth is a difficult
bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
it means all the ground truth labels are not difficult bbox.
class_num (int): The class number.
background_label (int): The index of background label, the background
label will be ignored. If set to -1, then all categories will be
......@@ -284,7 +285,8 @@ class DetectionMAP(Evaluator):
overlap_threshold (float): The threshold for deciding true/false
positive, 0.5 by defalut.
evaluate_difficult (bool): Whether to consider difficult ground truth
for evaluation, True by defalut.
for evaluation, True by defalut. This argument does not work when
gt_difficult is None.
ap_version (string): The average precision calculation ways, it must be
'integral' or '11point'. Please check
https://sanchom.wordpress.com/tag/average-precision/ for details.
......@@ -295,7 +297,7 @@ class DetectionMAP(Evaluator):
exe = fluid.executor(place)
map_evaluator = fluid.Evaluator.DetectionMAP(input,
gt_label, gt_difficult, gt_box)
gt_label, gt_box, gt_difficult)
cur_map, accum_map = map_evaluator.get_map_var()
fetch = [cost, cur_map, accum_map]
for epoch in PASS_NUM:
......@@ -313,8 +315,8 @@ class DetectionMAP(Evaluator):
input,
gt_label,
gt_box,
gt_difficult,
class_num,
gt_difficult=None,
class_num=None,
background_label=0,
overlap_threshold=0.5,
evaluate_difficult=True,
......@@ -322,8 +324,11 @@ class DetectionMAP(Evaluator):
super(DetectionMAP, self).__init__("map_eval")
gt_label = layers.cast(x=gt_label, dtype=gt_box.dtype)
gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype)
label = layers.concat([gt_label, gt_difficult, gt_box], axis=1)
if gt_difficult:
gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype)
label = layers.concat([gt_label, gt_difficult, gt_box], axis=1)
else:
label = layers.concat([gt_label, gt_box], axis=1)
# calculate mean average precision (mAP) of current mini-batch
map = layers.detection_map(
......
......@@ -569,7 +569,7 @@ def prior_box(input,
image,
min_sizes,
max_sizes=None,
aspect_ratios=None,
aspect_ratios=[1.],
variance=[0.1, 0.1, 0.2, 0.2],
flip=False,
clip=False,
......@@ -589,19 +589,19 @@ def prior_box(input,
input(Variable): The Input Variables, the format is NCHW.
image(Variable): The input image data of PriorBoxOp,
the layout is NCHW.
min_sizes(list|tuple): min sizes of generated prior boxes.
min_sizes(list|tuple|float value): min sizes of generated prior boxes.
max_sizes(list|tuple|None): max sizes of generated prior boxes.
Default: None.
aspect_ratios(list|tuple): the aspect ratios of generated prior
boxes. Default: None.
aspect_ratios(list|tuple|float value): the aspect ratios of generated
prior boxes. Default: [1.].
variance(list|tuple): the variances to be encoded in prior boxes.
Default:[0.1, 0.1, 0.2, 0.2].
flip(bool): Whether to flip aspect ratios. Default:False.
clip(bool): Whether to clip out-of-boundary boxes. Default: False.
step(list|turple): Prior boxes step across weight and height, If
step(list|turple): Prior boxes step across width and height, If
step[0] == 0.0/step[1] == 0.0, the prior boxes step across
height/weight of the input will be automatically calculated.
Default: [0.0]
height/weight of the input will be automatically calculated.
Default: [0., 0.]
offset(float): Prior boxes center offset. Default: 0.5
name(str): Name of the prior box op. Default: None.
......@@ -630,6 +630,21 @@ def prior_box(input,
helper = LayerHelper("prior_box", **locals())
dtype = helper.input_dtype()
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if not _is_list_or_tuple_(min_sizes):
min_sizes = [min_sizes]
if not _is_list_or_tuple_(aspect_ratios):
aspect_ratios = [aspect_ratios]
if not (_is_list_or_tuple_(steps) and len(steps) == 2):
raise ValueError('steps should be a list or tuple ',
'with length 2, (step_width, step_height).')
min_sizes = list(map(float, min_sizes))
aspect_ratios = list(map(float, aspect_ratios))
steps = list(map(float, steps))
attrs = {
'min_sizes': min_sizes,
'aspect_ratios': aspect_ratios,
......@@ -641,6 +656,8 @@ def prior_box(input,
'offset': offset
}
if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0:
if not _is_list_or_tuple_(max_sizes):
max_sizes = [max_sizes]
attrs['max_sizes'] = max_sizes
box = helper.create_tmp_variable(dtype)
......
......@@ -160,7 +160,9 @@ class TestDetectionMAPOp(OpTest):
label_count, true_pos, false_pos = get_input_pos(
self.class_pos_count, self.true_pos, self.true_pos_lod,
self.false_pos, self.false_pos_lod)
for (label, difficult, xmin, ymin, xmax, ymax) in self.label:
for v in self.label:
label = v[0]
difficult = False if len(v) == 5 else v[1]
if self.evaluate_difficult:
label_count[label] += 1
elif not difficult:
......@@ -245,6 +247,15 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp):
[2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
class TestDetectionMAPOpWithoutDiff(TestDetectionMAPOp):
def init_test_case(self):
super(TestDetectionMAPOpWithoutDiff, self).init_test_case()
# label xmin ymin xmax ymax
self.label = [[1, 0.1, 0.1, 0.3, 0.3], [1, 0.6, 0.6, 0.8, 0.8],
[2, 0.3, 0.3, 0.6, 0.5], [1, 0.7, 0.1, 0.9, 0.3]]
class TestDetectionMAPOp11Point(TestDetectionMAPOp):
def init_test_case(self):
super(TestDetectionMAPOp11Point, self).init_test_case()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册