diff --git a/python_module/megengine/data/dataset/vision/coco.py b/python_module/megengine/data/dataset/vision/coco.py index 7f40b9b591bc68e50904e064a793c62d25c44755..d247e52b4f6567d03dd390864ef5b9c1ee4f600c 100644 --- a/python_module/megengine/data/dataset/vision/coco.py +++ b/python_module/megengine/data/dataset/vision/coco.py @@ -14,7 +14,7 @@ # --------------------------------------------------------------------- import json import os -from collections import OrderedDict, defaultdict +from collections import defaultdict import cv2 import numpy as np @@ -28,26 +28,21 @@ def _count_visible_keypoints(anno): return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) -def _has_only_empty_bbox(anno): - return all(any(o <= 0 for o in obj["bbox"][2:]) for obj in anno) - - -def has_valid_annotation(anno): +def has_valid_annotation(anno, order): # if it"s empty, there is no annotation if len(anno) == 0: return False - # if all boxes have close to zero area, there is no annotation - if _has_only_empty_bbox(anno): - return False - # keypoints task have a slight different critera for considering - # if an annotation is valid - if "keypoints" not in anno[0]: - return True - # for keypoint detection tasks, only consider valid images those - # containing at least min_keypoints_per_image - if _count_visible_keypoints(anno) >= min_keypoints_per_image: - return True - return False + if "boxes" in order or "boxes_category" in order: + if "bbox" not in anno[0]: + return False + if "keypoints" in order: + if "keypoints" not in anno[0]: + return False + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) < min_keypoints_per_image: + return False + return True class COCO(VisionDataset): @@ -58,8 +53,8 @@ class COCO(VisionDataset): "image", "boxes", "boxes_category", + "keypoints", # TODO: need to check - # "keypoints", # "polygons", "info", ) @@ -72,7 +67,7 @@ class COCO(VisionDataset): with open(ann_file, "r") as f: dataset = json.load(f) - self.imgs = OrderedDict() + self.imgs = dict() for img in dataset["images"]: # for saving memory if "license" in img: @@ -98,7 +93,7 @@ class COCO(VisionDataset): del ann["segmentation"] self.img_to_anns[ann["image_id"]].append(ann) - self.cats = OrderedDict() + self.cats = dict() for cat in dataset["categories"]: self.cats[cat["id"]] = cat @@ -109,8 +104,17 @@ class COCO(VisionDataset): ids = [] for img_id in self.ids: anno = self.img_to_anns[img_id] - if has_valid_annotation(anno): + # filter crowd annotations + anno = [obj for obj in anno if obj["iscrowd"] == 0] + anno = [ + obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0 + ] + if has_valid_annotation(anno, order): ids.append(img_id) + self.img_to_anns[img_id] = anno + else: + del self.imgs[img_id] + del self.img_to_anns[img_id] self.ids = ids self.json_category_id_to_contiguous_id = { @@ -125,11 +129,6 @@ class COCO(VisionDataset): img_id = self.ids[index] anno = self.img_to_anns[img_id] - # filter crowd annotations - anno = [obj for obj in anno if obj["iscrowd"] == 0] - # filter empty annotations - anno = [obj for obj in anno if obj["area"] > 0] - target = [] for k in self.order: if k == "image": @@ -181,7 +180,6 @@ class COCO(VisionDataset): return img_info class_names = ( - "background", "person", "bicycle", "car", diff --git a/python_module/megengine/data/dataset/vision/objects365.py b/python_module/megengine/data/dataset/vision/objects365.py index a2a50a4407bf09020529b56fbd4df05b5a7b115a..84556d89e224e5f294a84b413b5369a2eb080c7d 100644 --- a/python_module/megengine/data/dataset/vision/objects365.py +++ b/python_module/megengine/data/dataset/vision/objects365.py @@ -69,7 +69,9 @@ class Objects365(VisionDataset): anno = self.img_to_anns[img_id] # filter crowd annotations anno = [obj for obj in anno if obj["iscrowd"] == 0] - anno = [obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0] + anno = [ + obj for obj in anno if obj["bbox"][2] > 0 and obj["bbox"][3] > 0 + ] if len(anno) > 0: ids.append(img_id) self.img_to_anns[img_id] = anno diff --git a/python_module/megengine/data/dataset/vision/voc.py b/python_module/megengine/data/dataset/vision/voc.py index 8d0b43ffaefa57b3158c1cd4e205403ff3511996..c1eebe564b62ec03c46305e83a1d3ca27ddc3661 100644 --- a/python_module/megengine/data/dataset/vision/voc.py +++ b/python_module/megengine/data/dataset/vision/voc.py @@ -37,7 +37,9 @@ class PascalVOC(VisionDataset): def __init__(self, root, image_set, *, order=None): if ("boxes" in order or "boxes_category" in order) and "mask" in order: - raise ValueError("PascalVOC only supports boxes & boxes_category or mask, not both.") + raise ValueError( + "PascalVOC only supports boxes & boxes_category or mask, not both." + ) super().__init__(root, order=order, supported_order=self.supported_order) @@ -48,13 +50,15 @@ class PascalVOC(VisionDataset): image_dir = os.path.join(self.root, "JPEGImages") if "boxes" in order or "boxes_category" in order: - annotation_dir = os.path.join(self.root, 'Annotations') + annotation_dir = os.path.join(self.root, "Annotations") splitdet_dir = os.path.join(self.root, "ImageSets/Main") split_f = os.path.join(splitdet_dir, image_set.rstrip("\n") + ".txt") with open(os.path.join(split_f), "r") as f: self.file_names = [x.strip() for x in f.readlines()] self.images = [os.path.join(image_dir, x + ".jpg") for x in self.file_names] - self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in self.file_names] + self.annotations = [ + os.path.join(annotation_dir, x + ".xml") for x in self.file_names + ] assert len(self.images) == len(self.annotations) elif "mask" in order: if "aug" in image_set: @@ -81,13 +85,17 @@ class PascalVOC(VisionDataset): anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) boxes = [obj["bndbox"] for obj in anno["annotation"]["object"]] # boxes type xyxy - boxes = [(bb['xmin'], bb['ymin'], bb['xmax'], bb['ymax']) for bb in boxes] + boxes = [ + (bb["xmin"], bb["ymin"], bb["xmax"], bb["ymax"]) for bb in boxes + ] boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4) target.append(boxes) elif k == "boxes_category": anno = self.parse_voc_xml(ET.parse(self.annotations[index]).getroot()) boxes_category = [obj["name"] for obj in anno["annotation"]["object"]] - boxes_category = [self.class_names.index(bc)-1 for bc in boxes_category] + boxes_category = [ + self.class_names.index(bc) - 1 for bc in boxes_category + ] boxes_category = np.array(boxes_category, dtype=np.int32) target.append(boxes_category) elif k == "mask":