未验证 提交 31a9096c 编写于 作者: J Jianfeng Wang 提交者: GitHub

fix(detection): pad before norm in preparing (#57)

上级 a25570d6
...@@ -67,10 +67,12 @@ class FasterRCNN(M.Module): ...@@ -67,10 +67,12 @@ class FasterRCNN(M.Module):
} }
def preprocess_image(self, image): def preprocess_image(self, image):
padded_image = layers.get_padded_tensor(image, 32, 0.0)
normed_image = ( normed_image = (
image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] padded_image
- np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None]
) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None]
return layers.get_padded_tensor(normed_image, 32, 0.0) return normed_image
def forward(self, inputs): def forward(self, inputs):
images = inputs["image"] images = inputs["image"]
......
...@@ -81,10 +81,12 @@ class RetinaNet(M.Module): ...@@ -81,10 +81,12 @@ class RetinaNet(M.Module):
self.loss_normalizer = mge.tensor(100.0) self.loss_normalizer = mge.tensor(100.0)
def preprocess_image(self, image): def preprocess_image(self, image):
padded_image = layers.get_padded_tensor(image, 32, 0.0)
normed_image = ( normed_image = (
image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] padded_image
- np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None]
) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None]
return layers.get_padded_tensor(normed_image, 32, 0.0) return normed_image
def forward(self, inputs): def forward(self, inputs):
image = self.preprocess_image(inputs["image"]) image = self.preprocess_image(inputs["image"])
...@@ -98,7 +100,8 @@ class RetinaNet(M.Module): ...@@ -98,7 +100,8 @@ class RetinaNet(M.Module):
for _ in box_logits for _ in box_logits
] ]
box_offsets_list = [ box_offsets_list = [
_.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_offsets _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4)
for _ in box_offsets
] ]
anchors_list = [ anchors_list = [
......
...@@ -57,8 +57,8 @@ def main(): ...@@ -57,8 +57,8 @@ def main():
data, im_info = DetEvaluator.process_inputs( data, im_info = DetEvaluator.process_inputs(
ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size, ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size,
) )
model.inputs["image"].set_value(data)
model.inputs["im_info"].set_value(im_info) model.inputs["im_info"].set_value(im_info)
model.inputs["image"].set_value(data.astype(np.float32))
pred_res = evaluator.predict(val_func) pred_res = evaluator.predict(val_func)
res_img = DetEvaluator.vis_det( res_img = DetEvaluator.vis_det(
ori_img, pred_res, is_show_label=True, classes=COCO.class_names, ori_img, pred_res, is_show_label=True, classes=COCO.class_names,
......
...@@ -175,8 +175,8 @@ def worker( ...@@ -175,8 +175,8 @@ def worker(
model.cfg.test_image_short_size, model.cfg.test_image_short_size,
model.cfg.test_image_max_size, model.cfg.test_image_max_size,
) )
model.inputs["image"].set_value(data)
model.inputs["im_info"].set_value(im_info) model.inputs["im_info"].set_value(im_info)
model.inputs["image"].set_value(data.astype(np.float32))
pred_res = evaluator.predict(val_func) pred_res = evaluator.predict(val_func)
result_queue.put_nowait( result_queue.put_nowait(
......
...@@ -89,7 +89,7 @@ class DetectionPadCollator(Collator): ...@@ -89,7 +89,7 @@ class DetectionPadCollator(Collator):
batch_data = defaultdict(list) batch_data = defaultdict(list)
for image, boxes, boxes_category, info in inputs: for image, boxes, boxes_category, info in inputs:
batch_data["data"].append(image) batch_data["data"].append(image.astype(np.float32))
batch_data["gt_boxes"].append( batch_data["gt_boxes"].append(
np.concatenate([boxes, boxes_category[:, np.newaxis]], axis=1).astype( np.concatenate([boxes, boxes_category[:, np.newaxis]], axis=1).astype(
np.float32 np.float32
...@@ -172,7 +172,7 @@ class DetEvaluator: ...@@ -172,7 +172,7 @@ class DetEvaluator:
) )
resized_img = cv2.flip(resized_img, 1) if flip else resized_img resized_img = cv2.flip(resized_img, 1) if flip else resized_img
trans_img = np.ascontiguousarray( trans_img = np.ascontiguousarray(
resized_img.transpose(2, 0, 1)[None, :, :, :], dtype=np.uint8 resized_img.transpose(2, 0, 1)[None, :, :, :], dtype=np.float32
) )
im_info = np.array( im_info = np.array(
[(resized_height, resized_width, original_height, original_width)], [(resized_height, resized_width, original_height, original_width)],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册