提交 8bb55f5b 编写于 作者: F FlyingQianMM

add box format in prediction

上级 b6581a26
......@@ -66,7 +66,7 @@ int main(int argc, char** argv) {
std::cout << "image file: " << image_path
<< ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:("
<< ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
......@@ -89,7 +89,7 @@ int main(int argc, char** argv) {
for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:("
<< ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
......
......@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None)
>
> **返回值**
>
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景
......@@ -117,12 +117,12 @@ class FasterRCNN(BaseAPI):
model_out = model.build_net(inputs)
loss = model_out['loss']
self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']),
('loss_rpn_bbox',
model_out['loss_rpn_bbox'])])
outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']), (
'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else:
outputs = model.build_net(inputs)
return inputs, outputs
......@@ -310,11 +310,10 @@ class FasterRCNN(BaseAPI):
'im_info': im_infos,
'im_shape': im_shapes,
}
outputs = self.exe.run(
self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths())
......@@ -339,13 +338,13 @@ class FasterRCNN(BaseAPI):
res['is_difficult'] = (np.array(res_is_difficult),
[res_is_difficult_lod])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=True)
metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
box_ap_stats))
zip(['bbox_mmap'
if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details:
return metrics, eval_details
return metrics
......@@ -359,7 +358,8 @@ class FasterRCNN(BaseAPI):
Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。
预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
......@@ -373,15 +373,14 @@ class FasterRCNN(BaseAPI):
im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run(
self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......
......@@ -81,13 +81,13 @@ class MaskRCNN(FasterRCNN):
model_out = model.build_net(inputs)
loss = model_out['loss']
self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_mask', model_out['loss_mask']),
('loss_rpn_cls', model_out['loss_rpn_cls']),
('loss_rpn_bbox',
model_out['loss_rpn_bbox'])])
outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']),
('loss_mask', model_out['loss_mask']),
('loss_rpn_cls', model_out['loss_rpn_cls']), (
'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else:
outputs = model.build_net(inputs)
return inputs, outputs
......@@ -276,11 +276,10 @@ class MaskRCNN(FasterRCNN):
'im_info': im_infos,
'im_shape': im_shapes,
}
outputs = self.exe.run(
self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()),
......@@ -292,8 +291,8 @@ class MaskRCNN(FasterRCNN):
res['im_shape'] = (im_shapes, [])
res['im_id'] = (np.array(res_im_id), [])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
ap_stats, eval_details = eval_results(
results,
......@@ -302,8 +301,8 @@ class MaskRCNN(FasterRCNN):
with_background=True,
resolution=self.mask_head_resolution)
if metric == 'VOC':
if isinstance(ap_stats[0], np.ndarray) and isinstance(
ap_stats[1], np.ndarray):
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray):
metrics = OrderedDict(
zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]]))
......@@ -311,8 +310,8 @@ class MaskRCNN(FasterRCNN):
metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance(
ap_stats[1], np.ndarray):
if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
np.ndarray):
metrics = OrderedDict(
zip(['bbox_mmap', 'segm_mmap'],
[ap_stats[0][0], ap_stats[1][0]]))
......@@ -331,8 +330,10 @@ class MaskRCNN(FasterRCNN):
transforms (paddlex.det.transforms): 数据预处理操作。
Returns:
dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、
预测框得分组成。
dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
预测框坐标(坐标格式为[xmin, ymin, w, h])、
原图大小的预测二值图(1表示预测框类别,0表示背景类)、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
......@@ -346,15 +347,14 @@ class MaskRCNN(FasterRCNN):
im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run(
self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed={
'image': im,
'im_info': im_resize_info,
'im_shape': im_shape
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......@@ -368,8 +368,8 @@ class MaskRCNN(FasterRCNN):
import pycocotools.mask as mask_util
for index, xywh_res in enumerate(xywh_results):
del xywh_res['image_id']
xywh_res['mask'] = mask_util.decode(
segm_results[index]['segmentation'])
xywh_res['mask'] = mask_util.decode(segm_results[index][
'segmentation'])
xywh_res['category'] = self.labels[xywh_res['category_id']]
results.append(xywh_res)
return results
......@@ -306,11 +306,10 @@ class YOLOv3(BaseAPI):
images = np.array([d[0] for d in data])
im_sizes = np.array([d[1] for d in data])
feed_data = {'image': images, 'im_size': im_sizes}
outputs = self.exe.run(
self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed=[feed_data],
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths())
......@@ -326,13 +325,13 @@ class YOLOv3(BaseAPI):
res['gt_label'] = (res_gt_label, [])
res['is_difficult'] = (res_is_difficult, [])
results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
epoch_id, step + 1, total_steps))
logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
1, total_steps))
box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=False)
evaluate_metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
box_ap_stats))
zip(['bbox_mmap'
if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details:
return evaluate_metrics, eval_details
return evaluate_metrics
......@@ -346,7 +345,8 @@ class YOLOv3(BaseAPI):
Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。
预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
"""
if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.")
......@@ -359,14 +359,11 @@ class YOLOv3(BaseAPI):
im, im_size = self.test_transforms(img_file)
im = np.expand_dims(im, axis=0)
im_size = np.expand_dims(im_size, axis=0)
outputs = self.exe.run(
self.test_prog,
feed={
'image': im,
'im_size': im_size
},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
outputs = self.exe.run(self.test_prog,
feed={'image': im,
'im_size': im_size},
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册