diff --git a/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml b/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml index 7329be68ce30e7a6f5de6b26a8710b380921f5f0..9b9abccd63e499bfa9402f3038425470e4a6e953 100644 --- a/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml +++ b/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml @@ -25,7 +25,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml b/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml index 7329be68ce30e7a6f5de6b26a8710b380921f5f0..9b9abccd63e499bfa9402f3038425470e4a6e953 100644 --- a/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml +++ b/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml @@ -25,7 +25,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/dota/_base_/s2anet_reader.yml b/configs/dota/_base_/s2anet_reader.yml index cbe64ee36afd1ff55ce03561d5731463e8053d48..b28dd5aad92ac6ae9f27ae0cfb759a153c55b2e7 100644 --- a/configs/dota/_base_/s2anet_reader.yml +++ b/configs/dota/_base_/s2anet_reader.yml @@ -26,7 +26,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/face_detection/_base_/face_reader.yml b/configs/face_detection/_base_/face_reader.yml index 7b31b49323fb9150f50390f3fc2f64844a385fda..5a25e8aa0f1acdd1b3b235a8c1a3923eb2af4ba6 100644 --- a/configs/face_detection/_base_/face_reader.yml +++ b/configs/face_detection/_base_/face_reader.yml @@ -34,7 +34,6 @@ EvalReader: - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false} - Permute: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/faster_rcnn/_base_/faster_fpn_reader.yml b/configs/faster_rcnn/_base_/faster_fpn_reader.yml index 7329be68ce30e7a6f5de6b26a8710b380921f5f0..9b9abccd63e499bfa9402f3038425470e4a6e953 100644 --- a/configs/faster_rcnn/_base_/faster_fpn_reader.yml +++ b/configs/faster_rcnn/_base_/faster_fpn_reader.yml @@ -25,7 +25,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/faster_rcnn/_base_/faster_reader.yml b/configs/faster_rcnn/_base_/faster_reader.yml index d0f44f006d4abe5d9c6ded2ef31cd0fc17dbf93b..e1c1bb6bc262e86ea69ae78919064aa2b6834311 100644 --- a/configs/faster_rcnn/_base_/faster_reader.yml +++ b/configs/faster_rcnn/_base_/faster_reader.yml @@ -25,7 +25,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml index e79af4ec7f069d5413f6a3e03ac5262065451269..7cea9d4a20c0240d6a5b31b6720af60eb39a36de 100644 --- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml +++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml @@ -119,7 +119,6 @@ EvalReader: is_scale: true - Permute: {} batch_size: 1 - drop_empty: false TestReader: sample_transforms: diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml index 599230e5764a5cc3909e1b07004440f564ed8697..2677d20bced425a7933ffa7f0cab2add825e4c2a 100644 --- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml +++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512_swahr.yml @@ -120,7 +120,6 @@ EvalReader: is_scale: true - Permute: {} batch_size: 1 - drop_empty: false TestReader: sample_transforms: diff --git a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml index a310ce908cc27dc288f1ad1035afd71633e33c1d..7cbeb01d1f5797b7a4b04f0685dac9c69a2e67c6 100644 --- a/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml +++ b/configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_640.yml @@ -119,7 +119,6 @@ EvalReader: is_scale: true - Permute: {} batch_size: 1 - drop_empty: false TestReader: sample_transforms: diff --git a/configs/keypoint/hrnet/hrnet_w32_256x192.yml b/configs/keypoint/hrnet/hrnet_w32_256x192.yml index 78661f2c43b6463105ff77fa2e924244d9a543da..4a63617deeae3913d25eff78da8d30316904046c 100644 --- a/configs/keypoint/hrnet/hrnet_w32_256x192.yml +++ b/configs/keypoint/hrnet/hrnet_w32_256x192.yml @@ -128,7 +128,6 @@ EvalReader: is_scale: true - Permute: {} batch_size: 16 - drop_empty: false TestReader: sample_transforms: diff --git a/configs/keypoint/hrnet/hrnet_w32_384x288.yml b/configs/keypoint/hrnet/hrnet_w32_384x288.yml index b6c28585603aedd096f06131839fcd3119516943..b7240ee65d5dfe971a8d902305e83bba342e76d6 100644 --- a/configs/keypoint/hrnet/hrnet_w32_384x288.yml +++ b/configs/keypoint/hrnet/hrnet_w32_384x288.yml @@ -129,7 +129,6 @@ EvalReader: is_scale: true - Permute: {} batch_size: 16 - drop_empty: false TestReader: sample_transforms: diff --git a/configs/mask_rcnn/_base_/mask_fpn_reader.yml b/configs/mask_rcnn/_base_/mask_fpn_reader.yml index a78b4ef15578f9e06342d3b533d3f960e8803631..6d95dc6a7cb2fe8c49a0fba79f9b6b71232d4c20 100644 --- a/configs/mask_rcnn/_base_/mask_fpn_reader.yml +++ b/configs/mask_rcnn/_base_/mask_fpn_reader.yml @@ -25,7 +25,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/mask_rcnn/_base_/mask_reader.yml b/configs/mask_rcnn/_base_/mask_reader.yml index 9ae6a0182324a85146193ef64d56ba127766ea38..7001af7ac980eeb8ca688a8e39cca9dfcf950129 100644 --- a/configs/mask_rcnn/_base_/mask_reader.yml +++ b/configs/mask_rcnn/_base_/mask_reader.yml @@ -26,7 +26,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: @@ -40,4 +39,3 @@ TestReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false diff --git a/configs/mot/jde/_base_/jde_reader_1088x608.yml b/configs/mot/jde/_base_/jde_reader_1088x608.yml index a86e9f7ff2a653549691fff3d0e6f602b79cf468..0d05af3aaacbe74905d4ba49c917ae3fa324cddf 100644 --- a/configs/mot/jde/_base_/jde_reader_1088x608.yml +++ b/configs/mot/jde/_base_/jde_reader_1088x608.yml @@ -45,7 +45,6 @@ EvalReader: - BboxCXCYWH2XYXY: {} - Norm2PixelBbox: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/mot/jde/_base_/jde_reader_576x320.yml b/configs/mot/jde/_base_/jde_reader_576x320.yml index d07208dcb873b406a1ce0a569abe02cf56c1d432..0a137fe04eb1b4b2400554cc3d64b70795048489 100644 --- a/configs/mot/jde/_base_/jde_reader_576x320.yml +++ b/configs/mot/jde/_base_/jde_reader_576x320.yml @@ -45,7 +45,6 @@ EvalReader: - BboxCXCYWH2XYXY: {} - Norm2PixelBbox: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/mot/jde/_base_/jde_reader_864x480.yml b/configs/mot/jde/_base_/jde_reader_864x480.yml index 869c56648940a2a3789d8e056492f52ffbb9e101..6083a99c57c464f84aeb545bede1a3e81731730c 100644 --- a/configs/mot/jde/_base_/jde_reader_864x480.yml +++ b/configs/mot/jde/_base_/jde_reader_864x480.yml @@ -45,7 +45,6 @@ EvalReader: - BboxCXCYWH2XYXY: {} - Norm2PixelBbox: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/ppyolo/_base_/ppyolo_reader.yml b/configs/ppyolo/_base_/ppyolo_reader.yml index 0e9e0cc9767c1d902381cad4ebefebfbd639a0f2..1698539afc0b63bf002831a3a6cd0c63a1828db9 100644 --- a/configs/ppyolo/_base_/ppyolo_reader.yml +++ b/configs/ppyolo/_base_/ppyolo_reader.yml @@ -30,7 +30,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 - drop_empty: false TestReader: inputs_def: diff --git a/configs/ppyolo/_base_/ppyolo_tiny_reader.yml b/configs/ppyolo/_base_/ppyolo_tiny_reader.yml index 4cbc090c9baeea55af16237867783d84ff63751f..14c8a7f5aab0fca7d9c5dfccce4d8b590c9ab2ef 100644 --- a/configs/ppyolo/_base_/ppyolo_tiny_reader.yml +++ b/configs/ppyolo/_base_/ppyolo_tiny_reader.yml @@ -30,7 +30,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 - drop_empty: false TestReader: inputs_def: diff --git a/configs/ppyolo/_base_/ppyolov2_reader.yml b/configs/ppyolo/_base_/ppyolov2_reader.yml index d8a96dae22340a47b8df069fcda5a3ba91296090..f0dfd9f62207676c95988331a2d6ba8a07a0b2b1 100644 --- a/configs/ppyolo/_base_/ppyolov2_reader.yml +++ b/configs/ppyolo/_base_/ppyolov2_reader.yml @@ -30,7 +30,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 - drop_empty: false TestReader: inputs_def: diff --git a/configs/ppyolo/ppyolo_mbv3_large_coco.yml b/configs/ppyolo/ppyolo_mbv3_large_coco.yml index d51696d63950d78cb683136fa1126f968d17b899..01558786e5f75658a023883bca9c6accd3ef23a2 100644 --- a/configs/ppyolo/ppyolo_mbv3_large_coco.yml +++ b/configs/ppyolo/ppyolo_mbv3_large_coco.yml @@ -47,7 +47,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 - drop_empty: false TestReader: inputs_def: diff --git a/configs/ppyolo/ppyolo_mbv3_small_coco.yml b/configs/ppyolo/ppyolo_mbv3_small_coco.yml index 6dba29769c7717d753b7a9dcc9bbd3c415fbcdca..53554c40ccba90cbb8019b23f2b7a64ce3c35bc7 100644 --- a/configs/ppyolo/ppyolo_mbv3_small_coco.yml +++ b/configs/ppyolo/ppyolo_mbv3_small_coco.yml @@ -47,7 +47,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 - drop_empty: false TestReader: inputs_def: diff --git a/configs/ppyolo/ppyolo_r18vd_coco.yml b/configs/ppyolo/ppyolo_r18vd_coco.yml index c15800ec6ba0e5a4230f5986a905a029e7e3870d..311e3f16f7932bf493cddf21bcf05db9e8dd20cc 100644 --- a/configs/ppyolo/ppyolo_r18vd_coco.yml +++ b/configs/ppyolo/ppyolo_r18vd_coco.yml @@ -47,7 +47,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 - drop_empty: false TestReader: inputs_def: diff --git a/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml b/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml index 9e1560f0e47bbdb1ef470c87379ff4f79a6bf886..33ec222e2715ca4f4819ffc424a0c366f42dc7af 100644 --- a/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml +++ b/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml @@ -27,7 +27,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/solov2/_base_/solov2_reader.yml b/configs/solov2/_base_/solov2_reader.yml index cd980d7c2c173ac66fe6cf131817a1cada16a77d..3c96396ac214c38f7937b0449c6550f9cbfe06d0 100644 --- a/configs/solov2/_base_/solov2_reader.yml +++ b/configs/solov2/_base_/solov2_reader.yml @@ -28,7 +28,6 @@ EvalReader: batch_size: 1 shuffle: false drop_last: false - drop_empty: false TestReader: diff --git a/configs/ssd/_base_/ssd_mobilenet_reader.yml b/configs/ssd/_base_/ssd_mobilenet_reader.yml index 2af8da2aa1a273c44011927fae49992bca93baa6..fdf75f960d3296d83c296b89a598984902545ef6 100644 --- a/configs/ssd/_base_/ssd_mobilenet_reader.yml +++ b/configs/ssd/_base_/ssd_mobilenet_reader.yml @@ -26,7 +26,6 @@ EvalReader: - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false} - Permute: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/ssd/_base_/ssd_reader.yml b/configs/ssd/_base_/ssd_reader.yml index e25bed6fb02ae7e88ca258b8939ac22331ee1738..22f8cc0a3ad2d2f11ffc1c77e3354f34abc431b6 100644 --- a/configs/ssd/_base_/ssd_reader.yml +++ b/configs/ssd/_base_/ssd_reader.yml @@ -29,7 +29,6 @@ EvalReader: - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false} - Permute: {} batch_size: 1 - drop_empty: false TestReader: inputs_def: diff --git a/configs/ssd/_base_/ssdlite300_reader.yml b/configs/ssd/_base_/ssdlite300_reader.yml index cd131127516c68c5c5eee63397966418b4cf2da5..86b69737cbe9c5d35893da15fbbfe44579c91f9f 100644 --- a/configs/ssd/_base_/ssdlite300_reader.yml +++ b/configs/ssd/_base_/ssdlite300_reader.yml @@ -26,7 +26,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} - Permute: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/ssd/_base_/ssdlite320_reader.yml b/configs/ssd/_base_/ssdlite320_reader.yml index 51db6143c033af3fbc72eb82c2bf64088b5691c5..57eeadc6ebe751e6657e84359ecfaac8cdb67824 100644 --- a/configs/ssd/_base_/ssdlite320_reader.yml +++ b/configs/ssd/_base_/ssdlite320_reader.yml @@ -26,7 +26,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} - Permute: {} batch_size: 1 - drop_empty: false TestReader: diff --git a/configs/ttfnet/_base_/pafnet_lite_reader.yml b/configs/ttfnet/_base_/pafnet_lite_reader.yml index 446a13a3cfae2e51f5d48c49655e56b8907fe7ad..6afbd0b09cbc9fc454584f8dfdf0aadfff24f6e7 100644 --- a/configs/ttfnet/_base_/pafnet_lite_reader.yml +++ b/configs/ttfnet/_base_/pafnet_lite_reader.yml @@ -27,7 +27,6 @@ EvalReader: - Permute: {} batch_size: 1 drop_last: false - drop_empty: false TestReader: sample_transforms: @@ -37,4 +36,3 @@ TestReader: - Permute: {} batch_size: 1 drop_last: false - drop_empty: false diff --git a/configs/ttfnet/_base_/pafnet_reader.yml b/configs/ttfnet/_base_/pafnet_reader.yml index ea90a134f03ab90427e994b6a9991b5e4534c5be..be87a78839bcb7a54c6f81333711e03e7ad7c19b 100644 --- a/configs/ttfnet/_base_/pafnet_reader.yml +++ b/configs/ttfnet/_base_/pafnet_reader.yml @@ -27,7 +27,6 @@ EvalReader: - Permute: {} batch_size: 1 drop_last: false - drop_empty: false TestReader: sample_transforms: @@ -37,4 +36,3 @@ TestReader: - Permute: {} batch_size: 1 drop_last: false - drop_empty: false diff --git a/configs/ttfnet/_base_/ttfnet_reader.yml b/configs/ttfnet/_base_/ttfnet_reader.yml index f9ed6cc57d9609afb25d3027fac32570287a17d1..9c12af727db6e7a199b76ccfda2286bc891a5b22 100644 --- a/configs/ttfnet/_base_/ttfnet_reader.yml +++ b/configs/ttfnet/_base_/ttfnet_reader.yml @@ -22,7 +22,6 @@ EvalReader: - Permute: {} batch_size: 1 drop_last: false - drop_empty: false TestReader: sample_transforms: @@ -32,4 +31,3 @@ TestReader: - Permute: {} batch_size: 1 drop_last: false - drop_empty: false diff --git a/configs/yolov3/_base_/yolov3_reader.yml b/configs/yolov3/_base_/yolov3_reader.yml index f0130c167bbe6488412d2caf220e8536ea64ee11..5dab6742b120a68ea76599b911567ee753b68253 100644 --- a/configs/yolov3/_base_/yolov3_reader.yml +++ b/configs/yolov3/_base_/yolov3_reader.yml @@ -32,7 +32,6 @@ EvalReader: - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 1 - drop_empty: false TestReader: inputs_def: diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py index 144a84d452901df238d48c1b6f5d1036542d24da..e87b604d4f15dc397f01a5c170ee19e34db57d54 100644 --- a/ppdet/data/reader.py +++ b/ppdet/data/reader.py @@ -116,8 +116,6 @@ class BaseDataLoader(object): shuffle (bool): whether to shuffle samples drop_last (bool): whether to drop the last incomplete, default False - drop_empty (bool): whether to drop samples with no ground - truth labels, default True num_classes (int): class number of dataset, default 80 collate_batch (bool): whether to collate batch in dataloader. If set to True, the samples will collate into batch according @@ -140,7 +138,6 @@ class BaseDataLoader(object): batch_size=1, shuffle=False, drop_last=False, - drop_empty=True, num_classes=80, collate_batch=True, use_shared_memory=False, @@ -231,13 +228,12 @@ class TrainReader(BaseDataLoader): batch_size=1, shuffle=True, drop_last=True, - drop_empty=True, num_classes=80, collate_batch=True, **kwargs): - super(TrainReader, self).__init__( - sample_transforms, batch_transforms, batch_size, shuffle, drop_last, - drop_empty, num_classes, collate_batch, **kwargs) + super(TrainReader, self).__init__(sample_transforms, batch_transforms, + batch_size, shuffle, drop_last, + num_classes, collate_batch, **kwargs) @register @@ -250,12 +246,11 @@ class EvalReader(BaseDataLoader): batch_size=1, shuffle=False, drop_last=True, - drop_empty=True, num_classes=80, **kwargs): super(EvalReader, self).__init__(sample_transforms, batch_transforms, batch_size, shuffle, drop_last, - drop_empty, num_classes, **kwargs) + num_classes, **kwargs) @register @@ -268,12 +263,11 @@ class TestReader(BaseDataLoader): batch_size=1, shuffle=False, drop_last=False, - drop_empty=True, num_classes=80, **kwargs): super(TestReader, self).__init__(sample_transforms, batch_transforms, batch_size, shuffle, drop_last, - drop_empty, num_classes, **kwargs) + num_classes, **kwargs) @register @@ -286,12 +280,11 @@ class EvalMOTReader(BaseDataLoader): batch_size=1, shuffle=False, drop_last=False, - drop_empty=True, num_classes=1, **kwargs): super(EvalMOTReader, self).__init__(sample_transforms, batch_transforms, batch_size, shuffle, drop_last, - drop_empty, num_classes, **kwargs) + num_classes, **kwargs) @register @@ -304,9 +297,8 @@ class TestMOTReader(BaseDataLoader): batch_size=1, shuffle=False, drop_last=False, - drop_empty=True, num_classes=1, **kwargs): super(TestMOTReader, self).__init__(sample_transforms, batch_transforms, batch_size, shuffle, drop_last, - drop_empty, num_classes, **kwargs) + num_classes, **kwargs) diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index c9d8d93ba39eebfd284b5965a7870736b9cd39e7..acf6165078e6fb49fbaf43f1e59668d0c7cd9c87 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -33,6 +33,12 @@ class COCODataSet(DetDataset): anno_path (str): coco annotation file path. data_fields (list): key name of data dictionary, at least have 'image'. sample_num (int): number of samples to load, -1 means all. + load_crowd (bool): whether to load crowded ground-truth. + False as default + allow_empty (bool): whether to load empty entry. False as default + empty_ratio (float): the ratio of empty record number to total + record's, if empty_ratio is out of [0. ,1.), do not sample the + records. 1. as default """ def __init__(self, @@ -40,11 +46,26 @@ class COCODataSet(DetDataset): image_dir=None, anno_path=None, data_fields=['image'], - sample_num=-1): + sample_num=-1, + load_crowd=False, + allow_empty=False, + empty_ratio=1.): super(COCODataSet, self).__init__(dataset_dir, image_dir, anno_path, data_fields, sample_num) self.load_image_only = False self.load_semantic = False + self.load_crowd = load_crowd + self.allow_empty = allow_empty + self.empty_ratio = empty_ratio + + def _sample_empty(self, records, num): + # if empty_ratio is out of [0. ,1.), do not sample the records + if self.empty_ratio < 0. or self.empty_ratio >= 1.: + return records + import random + sample_num = int(num * self.empty_ratio / (1 - self.empty_ratio)) + records = random.sample(records, sample_num) + return records def parse_dataset(self): anno_path = os.path.join(self.dataset_dir, self.anno_path) @@ -58,6 +79,7 @@ class COCODataSet(DetDataset): img_ids.sort() cat_ids = coco.getCatIds() records = [] + empty_records = [] ct = 0 self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)}) @@ -79,6 +101,7 @@ class COCODataSet(DetDataset): im_path = os.path.join(image_dir, im_fname) if image_dir else im_fname + is_empty = False if not os.path.exists(im_path): logger.warning('Illegal image file: {}, and it will be ' 'ignored'.format(im_path)) @@ -98,12 +121,16 @@ class COCODataSet(DetDataset): } if 'image' in self.data_fields else {} if not self.load_image_only: - ins_anno_ids = coco.getAnnIds(imgIds=[img_id], iscrowd=False) + ins_anno_ids = coco.getAnnIds( + imgIds=[img_id], iscrowd=None if self.load_crowd else False) instances = coco.loadAnns(ins_anno_ids) bboxes = [] + is_rbox_anno = False for inst in instances: # check gt bbox + if inst.get('ignore', False): + continue if 'bbox' not in inst.keys(): continue else: @@ -137,8 +164,10 @@ class COCODataSet(DetDataset): img_id, float(inst['area']), x1, y1, x2, y2)) num_bbox = len(bboxes) - if num_bbox <= 0: + if num_bbox <= 0 and not self.allow_empty: continue + elif num_bbox <= 0: + is_empty = True gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) if is_rbox_anno: @@ -165,7 +194,8 @@ class COCODataSet(DetDataset): gt_poly[i] = box['segmentation'] has_segmentation = True - if has_segmentation and not any(gt_poly): + if has_segmentation and not any( + gt_poly) and not self.allow_empty: continue if is_rbox_anno: @@ -196,10 +226,16 @@ class COCODataSet(DetDataset): logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format( im_path, img_id, im_h, im_w)) - records.append(coco_rec) + if is_empty: + empty_records.append(coco_rec) + else: + records.append(coco_rec) ct += 1 if self.sample_num > 0 and ct >= self.sample_num: break - assert len(records) > 0, 'not found any coco record in %s' % (anno_path) + assert ct > 0, 'not found any coco record in %s' % (anno_path) logger.debug('{} samples in file {}'.format(ct, anno_path)) + if len(empty_records) > 0: + empty_records = self._sample_empty(empty_records, len(records)) + records += empty_records self.roidbs = records diff --git a/ppdet/modeling/bbox_utils.py b/ppdet/modeling/bbox_utils.py index 6fa165c4ebd9081f26cc2ce61b3d78b1cf1aface..26d7ae49960cf7433ece4f276c4fc2060237d91e 100644 --- a/ppdet/modeling/bbox_utils.py +++ b/ppdet/modeling/bbox_utils.py @@ -124,6 +124,10 @@ def bbox_overlaps(boxes1, boxes2): Return: overlaps (Tensor): overlaps between boxes1 and boxes2 with shape [M, N] """ + M = boxes1.shape[0] + N = boxes2.shape[0] + if M * N == 0: + return paddle.zeros([M, N], dtype='float32') area1 = bbox_area(boxes1) area2 = bbox_area(boxes2) diff --git a/ppdet/modeling/heads/bbox_head.py b/ppdet/modeling/heads/bbox_head.py index 633fa398b1fcd0d1151889854aca5690ee37447e..cd73dff5b12b752e325bd607545ac785babdb29b 100644 --- a/ppdet/modeling/heads/bbox_head.py +++ b/ppdet/modeling/heads/bbox_head.py @@ -265,14 +265,26 @@ class BBoxHead(nn.Layer): targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds rois (List[Tensor]): RoIs generated in each batch """ + cls_name = 'loss_bbox_cls' + reg_name = 'loss_bbox_reg' + loss_bbox = {} + # TODO: better pass args tgt_labels, tgt_bboxes, tgt_gt_inds = targets + + # bbox cls tgt_labels = paddle.concat(tgt_labels) if len( tgt_labels) > 1 else tgt_labels[0] - tgt_labels = tgt_labels.cast('int64') - tgt_labels.stop_gradient = True - loss_bbox_cls = F.cross_entropy( - input=scores, label=tgt_labels, reduction='mean') + valid_inds = paddle.nonzero(tgt_labels >= 0).flatten() + if valid_inds.shape[0] == 0: + loss_bbox[cls_name] = paddle.zeros([1], dtype='float32') + else: + tgt_labels = tgt_labels.cast('int64') + tgt_labels.stop_gradient = True + loss_bbox_cls = F.cross_entropy( + input=scores, label=tgt_labels, reduction='mean') + loss_bbox[cls_name] = loss_bbox_cls + # bbox reg cls_agnostic_bbox_reg = deltas.shape[1] == 4 @@ -281,14 +293,9 @@ class BBoxHead(nn.Layer): paddle.logical_and(tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() - cls_name = 'loss_bbox_cls' - reg_name = 'loss_bbox_reg' - loss_bbox = {} - - loss_weight = 1. if fg_inds.numel() == 0: - fg_inds = paddle.zeros([1], dtype='int32') - loss_weight = 0. + loss_bbox[reg_name] = paddle.zeros([1], dtype='float32') + return loss_bbox if cls_agnostic_bbox_reg: reg_delta = paddle.gather(deltas, fg_inds) @@ -323,8 +330,7 @@ class BBoxHead(nn.Layer): loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum( ) / tgt_labels.shape[0] - loss_bbox[cls_name] = loss_bbox_cls * loss_weight - loss_bbox[reg_name] = loss_bbox_reg * loss_weight + loss_bbox[reg_name] = loss_bbox_reg return loss_bbox diff --git a/ppdet/modeling/proposal_generator/rpn_head.py b/ppdet/modeling/proposal_generator/rpn_head.py index ea9fb851c7dcf243c09df27788045f93a8079a02..89d569ebf2133056913666064f0ddf33ed743212 100644 --- a/ppdet/modeling/proposal_generator/rpn_head.py +++ b/ppdet/modeling/proposal_generator/rpn_head.py @@ -238,18 +238,24 @@ class RPNHead(nn.Layer): valid_ind = paddle.nonzero(valid_mask) # cls loss - score_pred = paddle.gather(scores, valid_ind) - score_label = paddle.gather(score_tgt, valid_ind).cast('float32') - score_label.stop_gradient = True - loss_rpn_cls = F.binary_cross_entropy_with_logits( - logit=score_pred, label=score_label, reduction="sum") + if valid_ind.shape[0] == 0: + loss_rpn_cls = paddle.zeros([1], dtype='float32') + else: + score_pred = paddle.gather(scores, valid_ind) + score_label = paddle.gather(score_tgt, valid_ind).cast('float32') + score_label.stop_gradient = True + loss_rpn_cls = F.binary_cross_entropy_with_logits( + logit=score_pred, label=score_label, reduction="sum") # reg loss - loc_pred = paddle.gather(deltas, pos_ind) - loc_tgt = paddle.concat(loc_tgt) - loc_tgt = paddle.gather(loc_tgt, pos_ind) - loc_tgt.stop_gradient = True - loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum() + if pos_ind.shape[0] == 0: + loss_rpn_reg = paddle.zeros([1], dtype='float32') + else: + loc_pred = paddle.gather(deltas, pos_ind) + loc_tgt = paddle.concat(loc_tgt) + loc_tgt = paddle.gather(loc_tgt, pos_ind) + loc_tgt.stop_gradient = True + loss_rpn_reg = paddle.abs(loc_pred - loc_tgt).sum() return { 'loss_rpn_cls': loss_rpn_cls / norm, 'loss_rpn_reg': loss_rpn_reg / norm diff --git a/ppdet/modeling/proposal_generator/target.py b/ppdet/modeling/proposal_generator/target.py index dddbb5bd942a524f316a7f43291a1ddbf61840c5..edd1386eae31331d12f5639e2873cd415b0a5a75 100644 --- a/ppdet/modeling/proposal_generator/target.py +++ b/ppdet/modeling/proposal_generator/target.py @@ -28,31 +28,38 @@ def rpn_anchor_target(anchors, rpn_fg_fraction, use_random=True, batch_size=1, + ignore_thresh=-1, + is_crowd=None, weights=[1., 1., 1., 1.]): tgt_labels = [] tgt_bboxes = [] - tgt_deltas = [] for i in range(batch_size): gt_bbox = gt_boxes[i] - + is_crowd_i = is_crowd[i] if is_crowd else None # Step1: match anchor and gt_bbox matches, match_labels = label_box( - anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True) + anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True, + ignore_thresh, is_crowd_i) # Step2: sample anchor fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im, rpn_fg_fraction, 0, use_random) # Fill with the ignore label (-1), then set positive and negative labels labels = paddle.full(match_labels.shape, -1, dtype='int32') - labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds)) - labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds)) + if bg_inds.shape[0] > 0: + labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds)) + if fg_inds.shape[0] > 0: + labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds)) # Step3: make output - matched_gt_boxes = paddle.gather(gt_bbox, matches) - - tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights) + if gt_bbox.shape[0] == 0: + matched_gt_boxes = paddle.zeros([0, 4]) + tgt_delta = paddle.zeros([0, 4]) + else: + matched_gt_boxes = paddle.gather(gt_bbox, matches) + tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights) + matched_gt_boxes.stop_gradient = True + tgt_delta.stop_gradient = True labels.stop_gradient = True - matched_gt_boxes.stop_gradient = True - tgt_delta.stop_gradient = True tgt_labels.append(labels) tgt_bboxes.append(matched_gt_boxes) tgt_deltas.append(tgt_delta) @@ -60,16 +67,46 @@ def rpn_anchor_target(anchors, return tgt_labels, tgt_bboxes, tgt_deltas -def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, - allow_low_quality): +def label_box(anchors, + gt_boxes, + positive_overlap, + negative_overlap, + allow_low_quality, + ignore_thresh, + is_crowd=None): iou = bbox_overlaps(gt_boxes, anchors) - if iou.numel() == 0: + n_gt = gt_boxes.shape[0] + if n_gt == 0 or is_crowd is None: + n_gt_crowd = 0 + else: + n_gt_crowd = paddle.nonzero(is_crowd).shape[0] + if iou.shape[0] == 0 or n_gt_crowd == n_gt: + # No truth, assign everything to background default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') - default_match_labels = paddle.full((iou.shape[1], ), -1, dtype='int32') + default_match_labels = paddle.full((iou.shape[1], ), 0, dtype='int32') return default_matches, default_match_labels + # if ignore_thresh > 0, remove anchor if it is closed to + # one of the crowded ground-truth + if n_gt_crowd > 0: + N_a = anchors.shape[0] + ones = paddle.ones([N_a]) + mask = is_crowd * ones + + if ignore_thresh > 0: + crowd_iou = iou * mask + valid = (paddle.sum((crowd_iou > ignore_thresh).cast('int32'), + axis=0) > 0).cast('float32') + iou = iou * (1 - valid) - valid + + # ignore the iou between anchor and crowded ground-truth + iou = iou * (1 - mask) - mask + matched_vals, matches = paddle.topk(iou, k=1, axis=0) match_labels = paddle.full(matches.shape, -1, dtype='int32') - match_labels = paddle.where(matched_vals < negative_overlap, + # set ignored anchor with iou = -1 + neg_cond = paddle.logical_and(matched_vals > -1, + matched_vals < negative_overlap) + match_labels = paddle.where(neg_cond, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) @@ -84,6 +121,7 @@ def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, matches = matches.flatten() match_labels = match_labels.flatten() + return matches, match_labels @@ -96,24 +134,36 @@ def subsample_labels(labels, paddle.logical_and(labels != -1, labels != bg_label)) negative = paddle.nonzero(labels == bg_label) - positive = positive.cast('int32').flatten() - negative = negative.cast('int32').flatten() - fg_num = int(num_samples * fg_fraction) fg_num = min(positive.numel(), fg_num) bg_num = num_samples - fg_num bg_num = min(negative.numel(), bg_num) + if fg_num == 0 and bg_num == 0: + fg_inds = paddle.zeros([0], dtype='int32') + bg_inds = paddle.zeros([0], dtype='int32') + return fg_inds, bg_inds + # randomly select positive and negative examples - fg_perm = paddle.randperm(positive.numel(), dtype='int32') - fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num]) + + negative = negative.cast('int32').flatten() bg_perm = paddle.randperm(negative.numel(), dtype='int32') bg_perm = paddle.slice(bg_perm, axes=[0], starts=[0], ends=[bg_num]) if use_random: - fg_inds = paddle.gather(positive, fg_perm) bg_inds = paddle.gather(negative, bg_perm) else: - fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num]) bg_inds = paddle.slice(negative, axes=[0], starts=[0], ends=[bg_num]) + if fg_num == 0: + fg_inds = paddle.zeros([0], dtype='int32') + return fg_inds, bg_inds + + positive = positive.cast('int32').flatten() + fg_perm = paddle.randperm(positive.numel(), dtype='int32') + fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num]) + if use_random: + fg_inds = paddle.gather(positive, fg_perm) + else: + fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num]) + return fg_inds, bg_inds @@ -125,6 +175,8 @@ def generate_proposal_target(rpn_rois, fg_thresh, bg_thresh, num_classes, + ignore_thresh=-1., + is_crowd=None, use_random=True, is_cascade=False, cascade_iou=0.5): @@ -141,17 +193,18 @@ def generate_proposal_target(rpn_rois, bg_thresh = cascade_iou if is_cascade else bg_thresh for i, rpn_roi in enumerate(rpn_rois): gt_bbox = gt_boxes[i] + is_crowd_i = is_crowd[i] if is_crowd else None gt_class = paddle.squeeze(gt_classes[i], axis=-1) - # Concat RoIs and gt boxes except cascade rcnn - if not is_cascade: + # Concat RoIs and gt boxes except cascade rcnn or none gt + if not is_cascade and gt_bbox.shape[0] > 0: bbox = paddle.concat([rpn_roi, gt_bbox]) else: bbox = rpn_roi # Step1: label bbox matches, match_labels = label_box(bbox, gt_bbox, fg_thresh, bg_thresh, - False) + False, ignore_thresh, is_crowd_i) # Step2: sample bbox sampled_inds, sampled_gt_classes = sample_bbox( matches, match_labels, gt_class, batch_size_per_im, fg_fraction, @@ -162,7 +215,10 @@ def generate_proposal_target(rpn_rois, sampled_inds) sampled_gt_ind = matches if is_cascade else paddle.gather(matches, sampled_inds) - sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind) + if gt_bbox.shape[0] > 0: + sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind) + else: + sampled_bbox = paddle.zeros([0, 4], dtype='float32') rois_per_image.stop_gradient = True sampled_gt_ind.stop_gradient = True @@ -184,19 +240,32 @@ def sample_bbox(matches, num_classes, use_random=True, is_cascade=False): - gt_classes = paddle.gather(gt_classes, matches) - gt_classes = paddle.where(match_labels == 0, - paddle.ones_like(gt_classes) * num_classes, - gt_classes) - gt_classes = paddle.where(match_labels == -1, - paddle.ones_like(gt_classes) * -1, gt_classes) + + n_gt = gt_classes.shape[0] + if n_gt == 0: + # No truth, assign everything to background + gt_classes = paddle.ones(matches.shape, dtype='int32') * num_classes + #return matches, match_labels + num_classes + else: + gt_classes = paddle.gather(gt_classes, matches) + gt_classes = paddle.where(match_labels == 0, + paddle.ones_like(gt_classes) * num_classes, + gt_classes) + gt_classes = paddle.where(match_labels == -1, + paddle.ones_like(gt_classes) * -1, gt_classes) if is_cascade: - return matches, gt_classes + index = paddle.arange(matches.shape[0]) + return index, gt_classes rois_per_image = int(batch_size_per_im) fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction, num_classes, use_random) - sampled_inds = paddle.concat([fg_inds, bg_inds]) + if fg_inds.shape[0] == 0 and bg_inds.shape[0] == 0: + # fake output labeled with -1 when all boxes are neither + # foreground nor background + sampled_inds = paddle.zeros([1], dtype='int32') + else: + sampled_inds = paddle.concat([fg_inds, bg_inds]) sampled_gt_classes = paddle.gather(gt_classes, sampled_inds) return sampled_inds, sampled_gt_classes @@ -268,20 +337,29 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, # to generate mask target with ground-truth boxes = fg_rois.numpy() gt_segms_per_im = gt_segms[k] + new_segm = [] inds_per_im = inds_per_im.numpy() - for i in inds_per_im: - new_segm.append(gt_segms_per_im[i]) + if len(gt_segms_per_im) > 0: + for i in inds_per_im: + new_segm.append(gt_segms_per_im[i]) fg_inds_new = fg_inds.reshape([-1]).numpy() results = [] - for j in fg_inds_new: - results.append( - rasterize_polygons_within_box(new_segm[j], boxes[j], - resolution)) + if len(gt_segms_per_im) > 0: + for j in fg_inds_new: + results.append( + rasterize_polygons_within_box(new_segm[j], boxes[j], + resolution)) + else: + results.append(paddle.ones([resolution, resolution], dtype='int32')) fg_classes = paddle.gather(labels_per_im, fg_inds) weight = paddle.ones([fg_rois.shape[0]], dtype='float32') if not has_fg: + # now all sampled classes are background + # which will cause error in loss calculation, + # make fake classes with weight of 0. + fg_classes = paddle.zeros([1], dtype='int32') weight = weight - 1 tgt_mask = paddle.stack(results) tgt_mask.stop_gradient = True diff --git a/ppdet/modeling/proposal_generator/target_layer.py b/ppdet/modeling/proposal_generator/target_layer.py index ff9cc31a47d974d5926abdb5010c41bc787217a8..f4576be4ad8d42e6aff25148383d3bd590b7c07e 100644 --- a/ppdet/modeling/proposal_generator/target_layer.py +++ b/ppdet/modeling/proposal_generator/target_layer.py @@ -44,6 +44,8 @@ class RPNTargetAssign(object): negative_overlap (float): Maximum overlap allowed between an anchor and ground-truth box for the (anchor, gt box) pair to be a background sample. default 0.3 + ignore_thresh(float): Threshold for ignoring the is_crowd ground-truth + if the value is larger than zero. use_random (bool): Use random sampling to choose foreground and background boxes, default true. """ @@ -53,12 +55,14 @@ class RPNTargetAssign(object): fg_fraction=0.5, positive_overlap=0.7, negative_overlap=0.3, + ignore_thresh=-1., use_random=True): super(RPNTargetAssign, self).__init__() self.batch_size_per_im = batch_size_per_im self.fg_fraction = fg_fraction self.positive_overlap = positive_overlap self.negative_overlap = negative_overlap + self.ignore_thresh = ignore_thresh self.use_random = use_random def __call__(self, inputs, anchors): @@ -67,11 +71,12 @@ class RPNTargetAssign(object): anchor_box (Tensor): [num_anchors, 4], num_anchors are all anchors in all feature maps. """ gt_boxes = inputs['gt_bbox'] + is_crowd = inputs.get('is_crowd', None) batch_size = len(gt_boxes) tgt_labels, tgt_bboxes, tgt_deltas = rpn_anchor_target( anchors, gt_boxes, self.batch_size_per_im, self.positive_overlap, self.negative_overlap, self.fg_fraction, self.use_random, - batch_size) + batch_size, self.ignore_thresh, is_crowd) norm = self.batch_size_per_im * batch_size return tgt_labels, tgt_bboxes, tgt_deltas, norm @@ -101,7 +106,9 @@ class BBoxAssigner(object): bg_thresh (float): Maximum overlap allowed between a RoI and ground-truth box for the (roi, gt box) pair to be a background sample. default 0.5 - use_random (bool): Use random sampling to choose foreground and + ignore_thresh(float): Threshold for ignoring the is_crowd ground-truth + if the value is larger than zero. + use_random (bool): Use random sampling to choose foreground and background boxes, default true cascade_iou (list[iou]): The list of overlap to select foreground and background of each stage, which is only used In Cascade RCNN. @@ -113,6 +120,7 @@ class BBoxAssigner(object): fg_fraction=.25, fg_thresh=.5, bg_thresh=.5, + ignore_thresh=-1., use_random=True, cascade_iou=[0.5, 0.6, 0.7], num_classes=80): @@ -121,6 +129,7 @@ class BBoxAssigner(object): self.fg_fraction = fg_fraction self.fg_thresh = fg_thresh self.bg_thresh = bg_thresh + self.ignore_thresh = ignore_thresh self.use_random = use_random self.cascade_iou = cascade_iou self.num_classes = num_classes @@ -133,12 +142,14 @@ class BBoxAssigner(object): is_cascade=False): gt_classes = inputs['gt_class'] gt_boxes = inputs['gt_bbox'] + is_crowd = inputs.get('is_crowd', None) # rois, tgt_labels, tgt_bboxes, tgt_gt_inds # new_rois_num outs = generate_proposal_target( rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im, self.fg_fraction, self.fg_thresh, self.bg_thresh, self.num_classes, - self.use_random, is_cascade, self.cascade_iou[stage]) + self.ignore_thresh, is_crowd, self.use_random, is_cascade, + self.cascade_iou[stage]) rois = outs[0] rois_num = outs[-1] # tgt_labels, tgt_bboxes, tgt_gt_inds