提交 8bb55f5b 编写于 作者: F FlyingQianMM

add box format in prediction

上级 b6581a26
...@@ -66,7 +66,7 @@ int main(int argc, char** argv) { ...@@ -66,7 +66,7 @@ int main(int argc, char** argv) {
std::cout << "image file: " << image_path std::cout << "image file: " << image_path
<< ", predict label: " << result.boxes[i].category << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id << ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:(" << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", " << result.boxes[i].coordinate[2] << ", "
...@@ -89,7 +89,7 @@ int main(int argc, char** argv) { ...@@ -89,7 +89,7 @@ int main(int argc, char** argv) {
for (int i = 0; i < result.boxes.size(); ++i) { for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << ", predict label: " << result.boxes[i].category std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id << ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:(" << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", " << result.boxes[i].coordinate[2] << ", "
......
...@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None) ...@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None)
> >
> **返回值** > **返回值**
> >
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高 > > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景
...@@ -117,12 +117,12 @@ class FasterRCNN(BaseAPI): ...@@ -117,12 +117,12 @@ class FasterRCNN(BaseAPI):
model_out = model.build_net(inputs) model_out = model.build_net(inputs)
loss = model_out['loss'] loss = model_out['loss']
self.optimizer.minimize(loss) self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']), outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']), ('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']), ('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']), ('loss_rpn_cls', model_out['loss_rpn_cls']), (
('loss_rpn_bbox', 'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
model_out['loss_rpn_bbox'])])
else: else:
outputs = model.build_net(inputs) outputs = model.build_net(inputs)
return inputs, outputs return inputs, outputs
...@@ -310,8 +310,7 @@ class FasterRCNN(BaseAPI): ...@@ -310,8 +310,7 @@ class FasterRCNN(BaseAPI):
'im_info': im_infos, 'im_info': im_infos,
'im_shape': im_shapes, 'im_shape': im_shapes,
} }
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog,
feed=[feed_data], feed=[feed_data],
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False)
...@@ -339,13 +338,13 @@ class FasterRCNN(BaseAPI): ...@@ -339,13 +338,13 @@ class FasterRCNN(BaseAPI):
res['is_difficult'] = (np.array(res_is_difficult), res['is_difficult'] = (np.array(res_is_difficult),
[res_is_difficult_lod]) [res_is_difficult_lod])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
box_ap_stats, eval_details = eval_results( box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=True) results, metric, eval_dataset.coco_gt, with_background=True)
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], zip(['bbox_mmap'
box_ap_stats)) if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details: if return_details:
return metrics, eval_details return metrics, eval_details
return metrics return metrics
...@@ -359,7 +358,8 @@ class FasterRCNN(BaseAPI): ...@@ -359,7 +358,8 @@ class FasterRCNN(BaseAPI):
Returns: Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、 list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
""" """
if transforms is None and not hasattr(self, 'test_transforms'): if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.") raise Exception("transforms need to be defined, now is None.")
...@@ -373,8 +373,7 @@ class FasterRCNN(BaseAPI): ...@@ -373,8 +373,7 @@ class FasterRCNN(BaseAPI):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0) im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog,
feed={ feed={
'image': im, 'image': im,
'im_info': im_resize_info, 'im_info': im_resize_info,
......
...@@ -81,13 +81,13 @@ class MaskRCNN(FasterRCNN): ...@@ -81,13 +81,13 @@ class MaskRCNN(FasterRCNN):
model_out = model.build_net(inputs) model_out = model.build_net(inputs)
loss = model_out['loss'] loss = model_out['loss']
self.optimizer.minimize(loss) self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']), outputs = OrderedDict(
[('loss', model_out['loss']),
('loss_cls', model_out['loss_cls']), ('loss_cls', model_out['loss_cls']),
('loss_bbox', model_out['loss_bbox']), ('loss_bbox', model_out['loss_bbox']),
('loss_mask', model_out['loss_mask']), ('loss_mask', model_out['loss_mask']),
('loss_rpn_cls', model_out['loss_rpn_cls']), ('loss_rpn_cls', model_out['loss_rpn_cls']), (
('loss_rpn_bbox', 'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
model_out['loss_rpn_bbox'])])
else: else:
outputs = model.build_net(inputs) outputs = model.build_net(inputs)
return inputs, outputs return inputs, outputs
...@@ -276,8 +276,7 @@ class MaskRCNN(FasterRCNN): ...@@ -276,8 +276,7 @@ class MaskRCNN(FasterRCNN):
'im_info': im_infos, 'im_info': im_infos,
'im_shape': im_shapes, 'im_shape': im_shapes,
} }
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog,
feed=[feed_data], feed=[feed_data],
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False)
...@@ -292,8 +291,8 @@ class MaskRCNN(FasterRCNN): ...@@ -292,8 +291,8 @@ class MaskRCNN(FasterRCNN):
res['im_shape'] = (im_shapes, []) res['im_shape'] = (im_shapes, [])
res['im_id'] = (np.array(res_im_id), []) res['im_id'] = (np.array(res_im_id), [])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
ap_stats, eval_details = eval_results( ap_stats, eval_details = eval_results(
results, results,
...@@ -302,8 +301,8 @@ class MaskRCNN(FasterRCNN): ...@@ -302,8 +301,8 @@ class MaskRCNN(FasterRCNN):
with_background=True, with_background=True,
resolution=self.mask_head_resolution) resolution=self.mask_head_resolution)
if metric == 'VOC': if metric == 'VOC':
if isinstance(ap_stats[0], np.ndarray) and isinstance( if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
ap_stats[1], np.ndarray): np.ndarray):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]])) [ap_stats[0][1], ap_stats[1][1]]))
...@@ -311,8 +310,8 @@ class MaskRCNN(FasterRCNN): ...@@ -311,8 +310,8 @@ class MaskRCNN(FasterRCNN):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], [0.0, 0.0])) zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO': elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance( if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
ap_stats[1], np.ndarray): np.ndarray):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_mmap', 'segm_mmap'], zip(['bbox_mmap', 'segm_mmap'],
[ap_stats[0][0], ap_stats[1][0]])) [ap_stats[0][0], ap_stats[1][0]]))
...@@ -331,7 +330,9 @@ class MaskRCNN(FasterRCNN): ...@@ -331,7 +330,9 @@ class MaskRCNN(FasterRCNN):
transforms (paddlex.det.transforms): 数据预处理操作。 transforms (paddlex.det.transforms): 数据预处理操作。
Returns: Returns:
dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、 dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
预测框坐标(坐标格式为[xmin, ymin, w, h])、
原图大小的预测二值图(1表示预测框类别,0表示背景类)、
预测框得分组成。 预测框得分组成。
""" """
if transforms is None and not hasattr(self, 'test_transforms'): if transforms is None and not hasattr(self, 'test_transforms'):
...@@ -346,8 +347,7 @@ class MaskRCNN(FasterRCNN): ...@@ -346,8 +347,7 @@ class MaskRCNN(FasterRCNN):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0) im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog,
feed={ feed={
'image': im, 'image': im,
'im_info': im_resize_info, 'im_info': im_resize_info,
...@@ -368,8 +368,8 @@ class MaskRCNN(FasterRCNN): ...@@ -368,8 +368,8 @@ class MaskRCNN(FasterRCNN):
import pycocotools.mask as mask_util import pycocotools.mask as mask_util
for index, xywh_res in enumerate(xywh_results): for index, xywh_res in enumerate(xywh_results):
del xywh_res['image_id'] del xywh_res['image_id']
xywh_res['mask'] = mask_util.decode( xywh_res['mask'] = mask_util.decode(segm_results[index][
segm_results[index]['segmentation']) 'segmentation'])
xywh_res['category'] = self.labels[xywh_res['category_id']] xywh_res['category'] = self.labels[xywh_res['category_id']]
results.append(xywh_res) results.append(xywh_res)
return results return results
...@@ -306,8 +306,7 @@ class YOLOv3(BaseAPI): ...@@ -306,8 +306,7 @@ class YOLOv3(BaseAPI):
images = np.array([d[0] for d in data]) images = np.array([d[0] for d in data])
im_sizes = np.array([d[1] for d in data]) im_sizes = np.array([d[1] for d in data])
feed_data = {'image': images, 'im_size': im_sizes} feed_data = {'image': images, 'im_size': im_sizes}
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog,
feed=[feed_data], feed=[feed_data],
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False)
...@@ -326,13 +325,13 @@ class YOLOv3(BaseAPI): ...@@ -326,13 +325,13 @@ class YOLOv3(BaseAPI):
res['gt_label'] = (res_gt_label, []) res['gt_label'] = (res_gt_label, [])
res['is_difficult'] = (res_is_difficult, []) res['is_difficult'] = (res_is_difficult, [])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
box_ap_stats, eval_details = eval_results( box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=False) results, metric, eval_dataset.coco_gt, with_background=False)
evaluate_metrics = OrderedDict( evaluate_metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], zip(['bbox_mmap'
box_ap_stats)) if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details: if return_details:
return evaluate_metrics, eval_details return evaluate_metrics, eval_details
return evaluate_metrics return evaluate_metrics
...@@ -346,7 +345,8 @@ class YOLOv3(BaseAPI): ...@@ -346,7 +345,8 @@ class YOLOv3(BaseAPI):
Returns: Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、 list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
""" """
if transforms is None and not hasattr(self, 'test_transforms'): if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.") raise Exception("transforms need to be defined, now is None.")
...@@ -359,12 +359,9 @@ class YOLOv3(BaseAPI): ...@@ -359,12 +359,9 @@ class YOLOv3(BaseAPI):
im, im_size = self.test_transforms(img_file) im, im_size = self.test_transforms(img_file)
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_size = np.expand_dims(im_size, axis=0) im_size = np.expand_dims(im_size, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed={'image': im,
feed={ 'im_size': im_size},
'image': im,
'im_size': im_size
},
fetch_list=list(self.test_outputs.values()), fetch_list=list(self.test_outputs.values()),
return_numpy=False) return_numpy=False)
res = { res = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册