diff --git a/official/vision/detection/models/faster_rcnn.py b/official/vision/detection/models/faster_rcnn.py index bfade468d958169b23c78c8652731a00450d4cbe..23dc425311ea6c5c3b85703eaff09fea270831f4 100644 --- a/official/vision/detection/models/faster_rcnn.py +++ b/official/vision/detection/models/faster_rcnn.py @@ -67,10 +67,12 @@ class FasterRCNN(M.Module): } def preprocess_image(self, image): + padded_image = layers.get_padded_tensor(image, 32, 0.0) normed_image = ( - image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] + padded_image + - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] - return layers.get_padded_tensor(normed_image, 32, 0.0) + return normed_image def forward(self, inputs): images = inputs["image"] diff --git a/official/vision/detection/models/retinanet.py b/official/vision/detection/models/retinanet.py index 696238ef7871a1ff3fa8fa9d45423b418ea0e825..56ad1164f23cf32a9bea27ac4f4c3edc5d696642 100644 --- a/official/vision/detection/models/retinanet.py +++ b/official/vision/detection/models/retinanet.py @@ -81,10 +81,12 @@ class RetinaNet(M.Module): self.loss_normalizer = mge.tensor(100.0) def preprocess_image(self, image): + padded_image = layers.get_padded_tensor(image, 32, 0.0) normed_image = ( - image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] + padded_image + - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] - return layers.get_padded_tensor(normed_image, 32, 0.0) + return normed_image def forward(self, inputs): image = self.preprocess_image(inputs["image"]) @@ -98,7 +100,8 @@ class RetinaNet(M.Module): for _ in box_logits ] box_offsets_list = [ - _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_offsets + _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) + for _ in box_offsets ] anchors_list = [ diff --git a/official/vision/detection/tools/inference.py b/official/vision/detection/tools/inference.py index 01298c7983b1658bc701a64809d32bce2aa4ccc6..f49f4cc6cd4ab43e8eadf7f3b8abed42c9533dd3 100644 --- a/official/vision/detection/tools/inference.py +++ b/official/vision/detection/tools/inference.py @@ -57,8 +57,8 @@ def main(): data, im_info = DetEvaluator.process_inputs( ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size, ) + model.inputs["image"].set_value(data) model.inputs["im_info"].set_value(im_info) - model.inputs["image"].set_value(data.astype(np.float32)) pred_res = evaluator.predict(val_func) res_img = DetEvaluator.vis_det( ori_img, pred_res, is_show_label=True, classes=COCO.class_names, diff --git a/official/vision/detection/tools/test.py b/official/vision/detection/tools/test.py index 1ef1bd473edde76f274974a26a22a941f60609cd..25b4da88949590cebfcd6374f965f0d7c3e4d89f 100644 --- a/official/vision/detection/tools/test.py +++ b/official/vision/detection/tools/test.py @@ -175,8 +175,8 @@ def worker( model.cfg.test_image_short_size, model.cfg.test_image_max_size, ) + model.inputs["image"].set_value(data) model.inputs["im_info"].set_value(im_info) - model.inputs["image"].set_value(data.astype(np.float32)) pred_res = evaluator.predict(val_func) result_queue.put_nowait( diff --git a/official/vision/detection/tools/utils.py b/official/vision/detection/tools/utils.py index 9add01265422406f85c18633fc0a377cfd9d1d36..d48748a4c5f96959630f437c58564ce0850e13ba 100644 --- a/official/vision/detection/tools/utils.py +++ b/official/vision/detection/tools/utils.py @@ -89,7 +89,7 @@ class DetectionPadCollator(Collator): batch_data = defaultdict(list) for image, boxes, boxes_category, info in inputs: - batch_data["data"].append(image) + batch_data["data"].append(image.astype(np.float32)) batch_data["gt_boxes"].append( np.concatenate([boxes, boxes_category[:, np.newaxis]], axis=1).astype( np.float32 @@ -172,7 +172,7 @@ class DetEvaluator: ) resized_img = cv2.flip(resized_img, 1) if flip else resized_img trans_img = np.ascontiguousarray( - resized_img.transpose(2, 0, 1)[None, :, :, :], dtype=np.uint8 + resized_img.transpose(2, 0, 1)[None, :, :, :], dtype=np.float32 ) im_info = np.array( [(resized_height, resized_width, original_height, original_width)],