From 31a9096c999f7672d1190bec9bac8bd4cd314e76 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Sat, 15 Aug 2020 11:21:04 +0800 Subject: [PATCH] fix(detection): pad before norm in preparing (#57) --- official/vision/detection/models/faster_rcnn.py | 6 ++++-- official/vision/detection/models/retinanet.py | 9 ++++++--- official/vision/detection/tools/inference.py | 2 +- official/vision/detection/tools/test.py | 2 +- official/vision/detection/tools/utils.py | 4 ++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/official/vision/detection/models/faster_rcnn.py b/official/vision/detection/models/faster_rcnn.py index bfade46..23dc425 100644 --- a/official/vision/detection/models/faster_rcnn.py +++ b/official/vision/detection/models/faster_rcnn.py @@ -67,10 +67,12 @@ class FasterRCNN(M.Module): } def preprocess_image(self, image): + padded_image = layers.get_padded_tensor(image, 32, 0.0) normed_image = ( - image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] + padded_image + - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] - return layers.get_padded_tensor(normed_image, 32, 0.0) + return normed_image def forward(self, inputs): images = inputs["image"] diff --git a/official/vision/detection/models/retinanet.py b/official/vision/detection/models/retinanet.py index 696238e..56ad116 100644 --- a/official/vision/detection/models/retinanet.py +++ b/official/vision/detection/models/retinanet.py @@ -81,10 +81,12 @@ class RetinaNet(M.Module): self.loss_normalizer = mge.tensor(100.0) def preprocess_image(self, image): + padded_image = layers.get_padded_tensor(image, 32, 0.0) normed_image = ( - image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] + padded_image + - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] - return layers.get_padded_tensor(normed_image, 32, 0.0) + return normed_image def forward(self, inputs): image = self.preprocess_image(inputs["image"]) @@ -98,7 +100,8 @@ class RetinaNet(M.Module): for _ in box_logits ] box_offsets_list = [ - _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_offsets + _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) + for _ in box_offsets ] anchors_list = [ diff --git a/official/vision/detection/tools/inference.py b/official/vision/detection/tools/inference.py index 01298c7..f49f4cc 100644 --- a/official/vision/detection/tools/inference.py +++ b/official/vision/detection/tools/inference.py @@ -57,8 +57,8 @@ def main(): data, im_info = DetEvaluator.process_inputs( ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size, ) + model.inputs["image"].set_value(data) model.inputs["im_info"].set_value(im_info) - model.inputs["image"].set_value(data.astype(np.float32)) pred_res = evaluator.predict(val_func) res_img = DetEvaluator.vis_det( ori_img, pred_res, is_show_label=True, classes=COCO.class_names, diff --git a/official/vision/detection/tools/test.py b/official/vision/detection/tools/test.py index 1ef1bd4..25b4da8 100644 --- a/official/vision/detection/tools/test.py +++ b/official/vision/detection/tools/test.py @@ -175,8 +175,8 @@ def worker( model.cfg.test_image_short_size, model.cfg.test_image_max_size, ) + model.inputs["image"].set_value(data) model.inputs["im_info"].set_value(im_info) - model.inputs["image"].set_value(data.astype(np.float32)) pred_res = evaluator.predict(val_func) result_queue.put_nowait( diff --git a/official/vision/detection/tools/utils.py b/official/vision/detection/tools/utils.py index 9add012..d48748a 100644 --- a/official/vision/detection/tools/utils.py +++ b/official/vision/detection/tools/utils.py @@ -89,7 +89,7 @@ class DetectionPadCollator(Collator): batch_data = defaultdict(list) for image, boxes, boxes_category, info in inputs: - batch_data["data"].append(image) + batch_data["data"].append(image.astype(np.float32)) batch_data["gt_boxes"].append( np.concatenate([boxes, boxes_category[:, np.newaxis]], axis=1).astype( np.float32 @@ -172,7 +172,7 @@ class DetEvaluator: ) resized_img = cv2.flip(resized_img, 1) if flip else resized_img trans_img = np.ascontiguousarray( - resized_img.transpose(2, 0, 1)[None, :, :, :], dtype=np.uint8 + resized_img.transpose(2, 0, 1)[None, :, :, :], dtype=np.float32 ) im_info = np.array( [(resized_height, resized_width, original_height, original_width)], -- GitLab