未验证 提交 b3d26cd3 编写于 作者: Q qingqing01 提交者: GitHub

Fix bug in detection_output and mAP calculation in SSD. (#8985)

* Clipping bbox in the mAP evaluator calculation.

* Fix bug in detection_output and mAP calculation in SSD.

* Fix bug in detection.py.

* Fix bug in test_detection_map_op.py.
上级 e4ce4795
...@@ -273,7 +273,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> { ...@@ -273,7 +273,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
std::map<int, std::vector<std::pair<T, int>>>& true_pos, std::map<int, std::vector<std::pair<T, int>>>& true_pos,
std::map<int, std::vector<std::pair<T, int>>>& false_pos, std::map<int, std::vector<std::pair<T, int>>>& false_pos,
const int class_num) const { const int class_num) const {
constexpr T kEPS = static_cast<T>(1e-6);
const int* pos_count_data = input_pos_count.data<int>(); const int* pos_count_data = input_pos_count.data<int>();
for (int i = 0; i < class_num; ++i) { for (int i = 0; i < class_num; ++i) {
label_pos_count[i] = pos_count_data[i]; label_pos_count[i] = pos_count_data[i];
...@@ -282,12 +281,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> { ...@@ -282,12 +281,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto SetData = [](const framework::LoDTensor& pos_tensor, auto SetData = [](const framework::LoDTensor& pos_tensor,
std::map<int, std::vector<std::pair<T, int>>>& pos) { std::map<int, std::vector<std::pair<T, int>>>& pos) {
const T* pos_data = pos_tensor.data<T>(); const T* pos_data = pos_tensor.data<T>();
auto pos_data_lod = pos_tensor.lod(); auto pos_data_lod = pos_tensor.lod()[0];
for (size_t i = 0; i < pos_data_lod.size(); ++i) { for (size_t i = 0; i < pos_data_lod.size() - 1; ++i) {
for (size_t j = pos_data_lod[0][i]; j < pos_data_lod[0][i + 1]; ++j) { for (size_t j = pos_data_lod[i]; j < pos_data_lod[i + 1]; ++j) {
T score = pos_data[j * 2]; T score = pos_data[j * 2];
int flag = 1; int flag = pos_data[j * 2 + 1];
if (pos_data[j * 2 + 1] < kEPS) flag = 0;
pos[i].push_back(std::make_pair(score, flag)); pos[i].push_back(std::make_pair(score, flag));
} }
} }
......
...@@ -111,7 +111,8 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -111,7 +111,8 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
}); });
AddAttr<std::vector<float>>( AddAttr<std::vector<float>>(
"max_sizes", "max_sizes",
"(vector<float>) List of max sizes of generated prior boxes."); "(vector<float>) List of max sizes of generated prior boxes.")
.SetDefault(std::vector<float>{});
AddAttr<std::vector<float>>( AddAttr<std::vector<float>>(
"aspect_ratios", "aspect_ratios",
"(vector<float>) List of aspect ratios of generated prior boxes."); "(vector<float>) List of aspect ratios of generated prior boxes.");
......
...@@ -97,9 +97,6 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -97,9 +97,6 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
boxes->mutable_data<T>(ctx.GetPlace()); boxes->mutable_data<T>(ctx.GetPlace());
vars->mutable_data<T>(ctx.GetPlace()); vars->mutable_data<T>(ctx.GetPlace());
T inv_img_width = 1.0 / img_width;
T inv_img_height = 1.0 / img_height;
auto e_boxes = framework::EigenTensor<T, 4>::From(*boxes); auto e_boxes = framework::EigenTensor<T, 4>::From(*boxes);
for (int h = 0; h < feature_height; ++h) { for (int h = 0; h < feature_height; ++h) {
for (int w = 0; w < feature_width; ++w) { for (int w = 0; w < feature_width; ++w) {
...@@ -110,36 +107,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -110,36 +107,30 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
for (size_t s = 0; s < min_sizes.size(); ++s) { for (size_t s = 0; s < min_sizes.size(); ++s) {
auto min_size = min_sizes[s]; auto min_size = min_sizes[s];
// first prior: aspect_ratio = 1, size = min_size // first prior: aspect_ratio = 1, size = min_size
box_width = box_height = min_size; box_width = box_height = min_size / 2.;
// xmin // xmin
e_boxes(h, w, idx, 0) = (center_x - box_width * 0.5) * inv_img_width; e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
// ymin // ymin
e_boxes(h, w, idx, 1) = e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
(center_y - box_height * 0.5) * inv_img_height;
// xmax // xmax
e_boxes(h, w, idx, 2) = (center_x + box_width * 0.5) * inv_img_width; e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
// ymax // ymax
e_boxes(h, w, idx, 3) = e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
(center_y + box_height * 0.5) * inv_img_height;
idx++; idx++;
if (max_sizes.size() > 0) { if (max_sizes.size() > 0) {
auto max_size = max_sizes[s]; auto max_size = max_sizes[s];
// second prior: aspect_ratio = 1, // second prior: aspect_ratio = 1,
// size = sqrt(min_size * max_size) // size = sqrt(min_size * max_size)
box_width = box_height = sqrt(min_size * max_size); box_width = box_height = sqrt(min_size * max_size) / 2.;
// xmin // xmin
e_boxes(h, w, idx, 0) = e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
(center_x - box_width * 0.5) * inv_img_width;
// ymin // ymin
e_boxes(h, w, idx, 1) = e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
(center_y - box_height * 0.5) * inv_img_height;
// xmax // xmax
e_boxes(h, w, idx, 2) = e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
(center_x + box_width * 0.5) * inv_img_width;
// ymax // ymax
e_boxes(h, w, idx, 3) = e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
(center_y + box_height * 0.5) * inv_img_height;
idx++; idx++;
} }
...@@ -149,20 +140,16 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -149,20 +140,16 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
if (fabs(ar - 1.) < 1e-6) { if (fabs(ar - 1.) < 1e-6) {
continue; continue;
} }
box_width = min_size * sqrt(ar); box_width = min_size * sqrt(ar) / 2.;
box_height = min_size / sqrt(ar); box_height = min_size / sqrt(ar) / 2.;
// xmin // xmin
e_boxes(h, w, idx, 0) = e_boxes(h, w, idx, 0) = (center_x - box_width) / img_width;
(center_x - box_width * 0.5) * inv_img_width;
// ymin // ymin
e_boxes(h, w, idx, 1) = e_boxes(h, w, idx, 1) = (center_y - box_height) / img_height;
(center_y - box_height * 0.5) * inv_img_height;
// xmax // xmax
e_boxes(h, w, idx, 2) = e_boxes(h, w, idx, 2) = (center_x + box_width) / img_width;
(center_x + box_width * 0.5) * inv_img_width;
// ymax // ymax
e_boxes(h, w, idx, 3) = e_boxes(h, w, idx, 3) = (center_y + box_height) / img_height;
(center_y + box_height * 0.5) * inv_img_height;
idx++; idx++;
} }
} }
......
...@@ -130,8 +130,13 @@ def detection_output(loc, ...@@ -130,8 +130,13 @@ def detection_output(loc,
target_box=loc, target_box=loc,
code_type='decode_center_size') code_type='decode_center_size')
nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) old_shape = scores.shape
scores = ops.reshape(x=scores, shape=(-1, old_shape[-1]))
scores = ops.softmax(x=scores)
scores = ops.reshape(x=scores, shape=old_shape)
scores = nn.transpose(scores, perm=[0, 2, 1]) scores = nn.transpose(scores, perm=[0, 2, 1])
nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype)
helper.append_op( helper.append_op(
type="multiclass_nms", type="multiclass_nms",
inputs={'Scores': scores, inputs={'Scores': scores,
...@@ -562,16 +567,16 @@ def multi_box_head(inputs, ...@@ -562,16 +567,16 @@ def multi_box_head(inputs,
base_size, base_size,
num_classes, num_classes,
aspect_ratios, aspect_ratios,
min_ratio, min_ratio=None,
max_ratio, max_ratio=None,
min_sizes=None, min_sizes=None,
max_sizes=None, max_sizes=None,
steps=None, steps=None,
step_w=None, step_w=None,
step_h=None, step_h=None,
offset=0.5, offset=0.5,
variance=[0.1, 0.1, 0.1, 0.1], variance=[0.1, 0.1, 0.2, 0.2],
flip=False, flip=True,
clip=False, clip=False,
kernel_size=1, kernel_size=1,
pad=0, pad=0,
...@@ -614,7 +619,7 @@ def multi_box_head(inputs, ...@@ -614,7 +619,7 @@ def multi_box_head(inputs,
the inputs[i] will be automatically calculated. Default: None. the inputs[i] will be automatically calculated. Default: None.
offset(float): Prior boxes center offset. Default: 0.5 offset(float): Prior boxes center offset. Default: 0.5
variance(list|tuple): the variances to be encoded in prior boxes. variance(list|tuple): the variances to be encoded in prior boxes.
Default:[0.1, 0.1, 0.1, 0.1]. Default:[0.1, 0.1, 0.2, 0.2].
flip(bool): Whether to flip aspect ratios. Default:False. flip(bool): Whether to flip aspect ratios. Default:False.
clip(bool): Whether to clip out-of-boundary boxes. Default: False. clip(bool): Whether to clip out-of-boundary boxes. Default: False.
kernel_size(int): The kernel size of conv2d. Default: 1. kernel_size(int): The kernel size of conv2d. Default: 1.
...@@ -668,6 +673,19 @@ def multi_box_head(inputs, ...@@ -668,6 +673,19 @@ def multi_box_head(inputs,
helper = LayerHelper("prior_box", **locals()) helper = LayerHelper("prior_box", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
attrs = {
'min_sizes': min_sizes,
'aspect_ratios': aspect_ratios,
'variances': variance,
'flip': flip,
'clip': clip,
'step_w': step_w,
'step_h': step_h,
'offset': offset
}
if len(max_sizes) > 0 and max_sizes[0] > 0:
attrs['max_sizes'] = max_sizes
box = helper.create_tmp_variable(dtype) box = helper.create_tmp_variable(dtype)
var = helper.create_tmp_variable(dtype) var = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
...@@ -676,17 +694,7 @@ def multi_box_head(inputs, ...@@ -676,17 +694,7 @@ def multi_box_head(inputs,
"Image": image}, "Image": image},
outputs={"Boxes": box, outputs={"Boxes": box,
"Variances": var}, "Variances": var},
attrs={ attrs=attrs, )
'min_sizes': min_sizes,
'max_sizes': max_sizes,
'aspect_ratios': aspect_ratios,
'variances': variance,
'flip': flip,
'clip': clip,
'step_w': step_w,
'step_h': step_h,
'offset': offset
})
return box, var return box, var
def _reshape_with_axis_(input, axis=1): def _reshape_with_axis_(input, axis=1):
...@@ -714,7 +722,7 @@ def multi_box_head(inputs, ...@@ -714,7 +722,7 @@ def multi_box_head(inputs,
if num_layer <= 2: if num_layer <= 2:
assert min_sizes is not None and max_sizes is not None assert min_sizes is not None and max_sizes is not None
assert len(min_sizes) == num_layer and len(max_sizes) == num_layer assert len(min_sizes) == num_layer and len(max_sizes) == num_layer
else: elif min_sizes is None and max_sizes is None:
min_sizes = [] min_sizes = []
max_sizes = [] max_sizes = []
step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
...@@ -759,9 +767,6 @@ def multi_box_head(inputs, ...@@ -759,9 +767,6 @@ def multi_box_head(inputs,
min_size = [min_size] min_size = [min_size]
if not _is_list_or_tuple_(max_size): if not _is_list_or_tuple_(max_size):
max_size = [max_size] max_size = [max_size]
if not (len(max_size) == len(min_size)):
raise ValueError(
'the length of max_size and min_size should be equal.')
aspect_ratio = [] aspect_ratio = []
if aspect_ratios is not None: if aspect_ratios is not None:
...@@ -779,7 +784,7 @@ def multi_box_head(inputs, ...@@ -779,7 +784,7 @@ def multi_box_head(inputs,
num_boxes = box.shape[2] num_boxes = box.shape[2]
# get box_loc # get loc
num_loc_output = num_boxes * 4 num_loc_output = num_boxes * 4
mbox_loc = nn.conv2d( mbox_loc = nn.conv2d(
input=input, input=input,
...@@ -796,7 +801,7 @@ def multi_box_head(inputs, ...@@ -796,7 +801,7 @@ def multi_box_head(inputs,
mbox_loc_flatten = ops.reshape(mbox_loc, shape=new_shape) mbox_loc_flatten = ops.reshape(mbox_loc, shape=new_shape)
mbox_locs.append(mbox_loc_flatten) mbox_locs.append(mbox_loc_flatten)
# get conf_loc # get conf
num_conf_output = num_boxes * num_classes num_conf_output = num_boxes * num_classes
conf_loc = nn.conv2d( conf_loc = nn.conv2d(
input=input, input=input,
......
...@@ -166,8 +166,6 @@ class TestDetectionMAPOp(OpTest): ...@@ -166,8 +166,6 @@ class TestDetectionMAPOp(OpTest):
elif not difficult: elif not difficult:
label_count[label] += 1 label_count[label] += 1
true_pos = collections.defaultdict(list)
false_pos = collections.defaultdict(list)
for (label, score, tp, fp) in tf_pos: for (label, score, tp, fp) in tf_pos:
true_pos[label].append([score, tp]) true_pos[label].append([score, tp])
false_pos[label].append([score, fp]) false_pos[label].append([score, fp])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册