fix(mge/data): fix an nan bug in coco dataset

GitOrigin-RevId: 689fbaafa5563a048d7753a3021494acf093047f

fix(mge/data): fix an nan bug in coco dataset
GitOrigin-RevId: 689fbaafa5563a048d7753a3021494acf093047f
d8d3f405 · Megvii Engine Team · Xinran Xu · 41464e18 · d8d3f405 · d8d3f405
3 changed file
--- a/python_module/megengine/data/dataset/vision/coco.py
+++ b/python_module/megengine/data/dataset/vision/coco.py
@@ -14,7 +14,7 @@
 # ---------------------------------------------------------------------
 import json
 import os
-from collections import OrderedDict, defaultdict
+from collections import defaultdict
 import cv2
 import numpy as np
@@ -28,26 +28,21 @@ def _count_visible_keypoints(anno):
    return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
-def _has_only_empty_bbox(anno):
+def has_valid_annotation(anno, order):
-    return all(any(o <= 0 for o in obj["bbox"][2:]) for obj in anno)
-def has_valid_annotation(anno):
    # if it"s empty, there is no annotation
    if len(anno) == 0:
        return False
-    # if all boxes have close to zero area, there is no annotation
+    if "boxes" in order or "boxes_category" in order:
-    if _has_only_empty_bbox(anno):
+        if "bbox" not in anno[0]:
-        return False
+            return False
-    # keypoints task have a slight different critera for considering
+    if "keypoints" in order:
-    # if an annotation is valid
+        if "keypoints" not in anno[0]:
-    if "keypoints" not in anno[0]:
+            return False
-        return True
+        # for keypoint detection tasks, only consider valid images those
-    # for keypoint detection tasks, only consider valid images those
+        # containing at least min_keypoints_per_image
-    # containing at least min_keypoints_per_image
+        if _count_visible_keypoints(anno) < min_keypoints_per_image:
-    if _count_visible_keypoints(anno) >= min_keypoints_per_image:
+            return False
-        return True
+    return True
-    return False
 class COCO(VisionDataset):
@@ -58,8 +53,8 @@ class COCO(VisionDataset):
        "image",
        "boxes",
        "boxes_category",
+        "keypoints",
        # TODO: need to check
-        # "keypoints",
        # "polygons",
        "info",
    )
@@ -72,7 +67,7 @@ class COCO(VisionDataset):
        with open(ann_file, "r") as f:
            dataset = json.load(f)
-        self.imgs = OrderedDict()
+        self.imgs = dict()
        for img in dataset["images"]:
            # for saving memory
            if "license" in img:
@@ -98,7 +93,7 @@ class COCO(VisionDataset):
                del ann["segmentation"]
            self.img_to_anns[ann["image_id"]].append(ann)
-        self.cats = OrderedDict()
+        self.cats = dict()
        for cat in dataset["categories"]:
            self.cats[cat["id"]] = cat
@@ -109,8 +104,17 @@ class COCO(VisionDataset):
            ids = []
            for img_id in self.ids:
                anno = self.img_to_anns[img_id]
-                if has_valid_annotation(anno):
+                # filter crowd annotations
+                anno = [obj for obj in anno if obj["iscrowd"] == 0]
+                anno = [
+                    obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0
+                ]
+                if has_valid_annotation(anno, order):
                    ids.append(img_id)
+                    self.img_to_anns[img_id] = anno
+                else:
+                    del self.imgs[img_id]
+                    del self.img_to_anns[img_id]
            self.ids = ids
        self.json_category_id_to_contiguous_id = {
@@ -125,11 +129,6 @@ class COCO(VisionDataset):
        img_id = self.ids[index]
        anno = self.img_to_anns[img_id]
-        # filter crowd annotations
-        anno = [obj for obj in anno if obj["iscrowd"] == 0]
-        # filter empty annotations
-        anno = [obj for obj in anno if obj["area"] > 0]
        target = []
        for k in self.order:
            if k == "image":
@@ -181,7 +180,6 @@ class COCO(VisionDataset):
        return img_info
    class_names = (
-        "background",
        "person",
        "bicycle",
        "car",

--- a/python_module/megengine/data/dataset/vision/objects365.py
+++ b/python_module/megengine/data/dataset/vision/objects365.py
@@ -69,7 +69,9 @@ class Objects365(VisionDataset):
                anno = self.img_to_anns[img_id]
                # filter crowd annotations
                anno = [obj for obj in anno if obj["iscrowd"] == 0]
-                anno = [obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0]
+                anno = [
+                    obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0
+                ]
                if len(anno) > 0:
                    ids.append(img_id)
                    self.img_to_anns[img_id] = anno

--- a/python_module/megengine/data/dataset/vision/voc.py
+++ b/python_module/megengine/data/dataset/vision/voc.py
@@ -37,7 +37,9 @@ class PascalVOC(VisionDataset):
    def __init__(self, root, image_set, *, order=None):
        if ("boxes" in order or "boxes_category" in order) and "mask" in order:
-            raise ValueError("PascalVOC only supports boxes & boxes_category or mask, not both.")
+            raise ValueError(
+                "PascalVOC only supports boxes & boxes_category or mask, not both."
+            )
        super().__init__(root, order=order, supported_order=self.supported_order)
@@ -48,13 +50,15 @@ class PascalVOC(VisionDataset):
        image_dir = os.path.join(self.root, "JPEGImages")
        if "boxes" in order or "boxes_category" in order:
-            annotation_dir = os.path.join(self.root, 'Annotations')
+            annotation_dir = os.path.join(self.root, "Annotations")
            splitdet_dir = os.path.join(self.root, "ImageSets/Main")
            split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt")
            with open(os.path.join(split_f), "r") as f:
                self.file_names = [x.strip() for x in f.readlines()]
            self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names]
-            self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names]
+            self.annotations = [
+                os.path.join(annotation_dir, x + ".xml") for x in self.file_names
+            ]
            assert len(self.images) == len(self.annotations)
        elif "mask" in order:
            if "aug" in image_set:
@@ -81,13 +85,17 @@ class PascalVOC(VisionDataset):
                anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot())
                boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]]
                # boxes type xyxy
-                boxes = [(bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax']) for bb in boxes]
+                boxes = [
+                    (bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes
+                ]
                boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
                target.append(boxes)
            elif k == "boxes_category":
                anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot())
                boxes_category = [obj["name"] for obj in anno["annotation"]["object"]]
-                boxes_category = [self.class_names.index(bc)-1 for bc in boxes_category]
+                boxes_category = [
+                    self.class_names.index(bc) - 1 for bc in boxes_category
+                ]
                boxes_category = np.array(boxes_category, dtype=np.int32)
                target.append(boxes_category)
            elif k == "mask":