fix(detection): optimize dtype (#55)

a25570d6 · Jianfeng Wang · GitHub · 61d87c76 · a25570d6 · a25570d6
6 changed file
--- a/official/vision/detection/layers/basic/functional.py
+++ b/official/vision/detection/layers/basic/functional.py
@@ -13,41 +13,6 @@ import megengine.functional as F
 from megengine.core import Tensor
-def get_padded_array_np(
-    array: np.ndarray, multiple_number: int = 32, pad_value: float = 0
-) -> np.ndarray:
-    """ pad the nd-array to multiple stride of th e
-    Args:
-        array (np.ndarray):
-            the array with the shape of [batch, channel, height, width]
-        multiple_number (int):
-            make the height and width can be divided by multiple_number
-        pad_value (int): the value to be padded
-    Returns:
-        padded_array (np.ndarray)
-    """
-    batch, chl, t_height, t_width = array.shape
-    padded_height = (
-        (t_height + multiple_number - 1) // multiple_number * multiple_number
-    )
-    padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number
-    padded_array = (
-        np.ones([batch, chl, padded_height, padded_width], dtype=np.float32) * pad_value
-    )
-    ndim = array.ndim
-    if ndim == 4:
-        padded_array[:, :, :t_height, :t_width] = array
-    elif ndim == 3:
-        padded_array[:, :t_height, :t_width] = array
-    else:
-        raise Exception("Not supported tensor dim: %d" % ndim)
-    return padded_array
 def get_padded_tensor(
    array: Tensor, multiple_number: int = 32, pad_value: float = 0
 ) -> Tensor:

--- a/official/vision/detection/layers/det/anchor.py
+++ b/official/vision/detection/layers/det/anchor.py
@@ -48,8 +48,8 @@ class DefaultAnchorGenerator(BaseAnchorGenerator):
    ):
        super().__init__()
        self.base_size = base_size
-        self.anchor_scales = np.array(anchor_scales)
+        self.anchor_scales = np.array(anchor_scales, dtype=np.float32)
-        self.anchor_ratios = np.array(anchor_ratios)
+        self.anchor_ratios = np.array(anchor_ratios, dtype=np.float32)
        self.offset = offset
    def _whctrs(self, anchor):

--- a/official/vision/detection/layers/det/box_utils.py
+++ b/official/vision/detection/layers/det/box_utils.py
@@ -42,8 +42,8 @@ class BoxCoder(BoxCoderBase, metaclass=ABCMeta):
            reg_std(np.ndarray):  [x0_std, x1_std, y0_std, y1_std] or None
        """
-        self.reg_mean = np.array(reg_mean)[None, :]
+        self.reg_mean = np.array(reg_mean, dtype=np.float32)[None, :]
-        self.reg_std = np.array(reg_std)[None, :]
+        self.reg_std = np.array(reg_std, dtype=np.float32)[None, :]
        super().__init__()
    @staticmethod

--- a/official/vision/detection/models/faster_rcnn.py
+++ b/official/vision/detection/models/faster_rcnn.py
@@ -68,8 +68,8 @@ class FasterRCNN(M.Module):
    def preprocess_image(self, image):
        normed_image = (
-            image - np.array(self.cfg.img_mean)[None, :, None, None]
+            image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None]
-        ) / np.array(self.cfg.img_std)[None, :, None, None]
+        ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None]
        return layers.get_padded_tensor(normed_image, 32, 0.0)
    def forward(self, inputs):

--- a/official/vision/detection/models/retinanet.py
+++ b/official/vision/detection/models/retinanet.py
@@ -33,7 +33,7 @@ class RetinaNet(M.Module):
        )
        self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std)
-        self.stride_list = np.array(cfg.stride).astype(np.float32)
+        self.stride_list = np.array(cfg.stride, dtype=np.float32)
        self.in_features = ["p3", "p4", "p5", "p6", "p7"]
        # ----------------------- build the backbone ------------------------ #
@@ -82,8 +82,8 @@ class RetinaNet(M.Module):
    def preprocess_image(self, image):
        normed_image = (
-            image - np.array(self.cfg.img_mean)[None, :, None, None]
+            image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None]
-        ) / np.array(self.cfg.img_std)[None, :, None, None]
+        ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None]
        return layers.get_padded_tensor(normed_image, 32, 0.0)
    def forward(self, inputs):

--- a/official/vision/detection/tools/test.py
+++ b/official/vision/detection/tools/test.py
@@ -64,7 +64,7 @@ def main():
            args.start_epoch = cfg.max_epoch - 1
        if args.end_epoch == -1:
            args.end_epoch = args.start_epoch
-    assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch
+        assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch
    for epoch_num in range(args.start_epoch, args.end_epoch + 1):
        if args.weight_file:
@@ -182,7 +182,7 @@ def worker(
        result_queue.put_nowait(
            {
                "det_res": pred_res,
-                "image_id": int(data_dict[1][2][0].split(".")[0].split("_")[-1]),
+                "image_id": int(data_dict[1][2][0].split(".")[0]),
            }
        )