From 8bb55f5bb554d5ac93053f0ea06b3ae48155e723 Mon Sep 17 00:00:00 2001 From: FlyingQianMM <245467267@qq.com> Date: Mon, 25 May 2020 21:21:55 +0800 Subject: [PATCH] add box format in prediction --- deploy/cpp/demo/detector.cpp | 4 +- docs/apis/models/instance_segmentation.md | 2 +- paddlex/cv/models/faster_rcnn.py | 49 +++++++++--------- paddlex/cv/models/mask_rcnn.py | 62 +++++++++++------------ paddlex/cv/models/yolo_v3.py | 33 ++++++------ 5 files changed, 73 insertions(+), 77 deletions(-) diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index e5fc280..e42288f 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -66,7 +66,7 @@ int main(int argc, char** argv) { std::cout << "image file: " << image_path << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box:(" + << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " @@ -89,7 +89,7 @@ int main(int argc, char** argv) { for (int i = 0; i < result.boxes.size(); ++i) { std::cout << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box:(" + << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " diff --git a/docs/apis/models/instance_segmentation.md b/docs/apis/models/instance_segmentation.md index f086454..e3f6734 100755 --- a/docs/apis/models/instance_segmentation.md +++ b/docs/apis/models/instance_segmentation.md @@ -82,4 +82,4 @@ predict(self, img_file, transforms=None) > > **返回值** > -> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。 +> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景。 diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index 3b7144f..83bf925 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -117,12 +117,12 @@ class FasterRCNN(BaseAPI): model_out = model.build_net(inputs) loss = model_out['loss'] self.optimizer.minimize(loss) - outputs = OrderedDict([('loss', model_out['loss']), - ('loss_cls', model_out['loss_cls']), - ('loss_bbox', model_out['loss_bbox']), - ('loss_rpn_cls', model_out['loss_rpn_cls']), - ('loss_rpn_bbox', - model_out['loss_rpn_bbox'])]) + outputs = OrderedDict( + [('loss', model_out['loss']), + ('loss_cls', model_out['loss_cls']), + ('loss_bbox', model_out['loss_bbox']), + ('loss_rpn_cls', model_out['loss_rpn_cls']), ( + 'loss_rpn_bbox', model_out['loss_rpn_bbox'])]) else: outputs = model.build_net(inputs) return inputs, outputs @@ -310,11 +310,10 @@ class FasterRCNN(BaseAPI): 'im_info': im_infos, 'im_shape': im_shapes, } - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) @@ -339,13 +338,13 @@ class FasterRCNN(BaseAPI): res['is_difficult'] = (np.array(res_is_difficult), [res_is_difficult_lod]) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) box_ap_stats, eval_details = eval_results( results, metric, eval_dataset.coco_gt, with_background=True) metrics = OrderedDict( - zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], - box_ap_stats)) + zip(['bbox_mmap' + if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return metrics, eval_details return metrics @@ -359,7 +358,8 @@ class FasterRCNN(BaseAPI): Returns: list: 预测结果列表,每个预测结果由预测框类别标签、 - 预测框类别名称、预测框坐标、预测框得分组成。 + 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -373,15 +373,14 @@ class FasterRCNN(BaseAPI): im = np.expand_dims(im, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0) im_shape = np.expand_dims(im_shape, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_info': im_resize_info, - 'im_shape': im_shape - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={ + 'image': im, + 'im_info': im_resize_info, + 'im_shape': im_shape + }, + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index ba5da33..c1a0f5f 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -81,13 +81,13 @@ class MaskRCNN(FasterRCNN): model_out = model.build_net(inputs) loss = model_out['loss'] self.optimizer.minimize(loss) - outputs = OrderedDict([('loss', model_out['loss']), - ('loss_cls', model_out['loss_cls']), - ('loss_bbox', model_out['loss_bbox']), - ('loss_mask', model_out['loss_mask']), - ('loss_rpn_cls', model_out['loss_rpn_cls']), - ('loss_rpn_bbox', - model_out['loss_rpn_bbox'])]) + outputs = OrderedDict( + [('loss', model_out['loss']), + ('loss_cls', model_out['loss_cls']), + ('loss_bbox', model_out['loss_bbox']), + ('loss_mask', model_out['loss_mask']), + ('loss_rpn_cls', model_out['loss_rpn_cls']), ( + 'loss_rpn_bbox', model_out['loss_rpn_bbox'])]) else: outputs = model.build_net(inputs) return inputs, outputs @@ -276,11 +276,10 @@ class MaskRCNN(FasterRCNN): 'im_info': im_infos, 'im_shape': im_shapes, } - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()), @@ -292,8 +291,8 @@ class MaskRCNN(FasterRCNN): res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) ap_stats, eval_details = eval_results( results, @@ -302,8 +301,8 @@ class MaskRCNN(FasterRCNN): with_background=True, resolution=self.mask_head_resolution) if metric == 'VOC': - if isinstance(ap_stats[0], np.ndarray) and isinstance( - ap_stats[1], np.ndarray): + if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], + np.ndarray): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [ap_stats[0][1], ap_stats[1][1]])) @@ -311,8 +310,8 @@ class MaskRCNN(FasterRCNN): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [0.0, 0.0])) elif metric == 'COCO': - if isinstance(ap_stats[0], np.ndarray) and isinstance( - ap_stats[1], np.ndarray): + if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], + np.ndarray): metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [ap_stats[0][0], ap_stats[1][0]])) @@ -331,8 +330,10 @@ class MaskRCNN(FasterRCNN): transforms (paddlex.det.transforms): 数据预处理操作。 Returns: - dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、 - 预测框得分组成。 + dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、 + 预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 原图大小的预测二值图(1表示预测框类别,0表示背景类)、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -346,15 +347,14 @@ class MaskRCNN(FasterRCNN): im = np.expand_dims(im, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0) im_shape = np.expand_dims(im_shape, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_info': im_resize_info, - 'im_shape': im_shape - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={ + 'image': im, + 'im_info': im_resize_info, + 'im_shape': im_shape + }, + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) @@ -368,8 +368,8 @@ class MaskRCNN(FasterRCNN): import pycocotools.mask as mask_util for index, xywh_res in enumerate(xywh_results): del xywh_res['image_id'] - xywh_res['mask'] = mask_util.decode( - segm_results[index]['segmentation']) + xywh_res['mask'] = mask_util.decode(segm_results[index][ + 'segmentation']) xywh_res['category'] = self.labels[xywh_res['category_id']] results.append(xywh_res) return results diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py index e1cd2d6..9646c81 100644 --- a/paddlex/cv/models/yolo_v3.py +++ b/paddlex/cv/models/yolo_v3.py @@ -306,11 +306,10 @@ class YOLOv3(BaseAPI): images = np.array([d[0] for d in data]) im_sizes = np.array([d[1] for d in data]) feed_data = {'image': images, 'im_size': im_sizes} - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) @@ -326,13 +325,13 @@ class YOLOv3(BaseAPI): res['gt_label'] = (res_gt_label, []) res['is_difficult'] = (res_is_difficult, []) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) box_ap_stats, eval_details = eval_results( results, metric, eval_dataset.coco_gt, with_background=False) evaluate_metrics = OrderedDict( - zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], - box_ap_stats)) + zip(['bbox_mmap' + if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return evaluate_metrics, eval_details return evaluate_metrics @@ -346,7 +345,8 @@ class YOLOv3(BaseAPI): Returns: list: 预测结果列表,每个预测结果由预测框类别标签、 - 预测框类别名称、预测框坐标、预测框得分组成。 + 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -359,14 +359,11 @@ class YOLOv3(BaseAPI): im, im_size = self.test_transforms(img_file) im = np.expand_dims(im, axis=0) im_size = np.expand_dims(im_size, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_size': im_size - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={'image': im, + 'im_size': im_size}, + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) -- GitLab