support no_labeling training on voc (#3668)

9a3bb366 · wangguanzhong · GitHub · ba7e185c · 9a3bb366 · 9a3bb366
隐藏空白更改
内联并排

Showing with 34 addition and 6 deletion

docs/tutorials/FAQ.md docs/tutorials/FAQ.md +4 -0

ppdet/data/source/coco.py ppdet/data/source/coco.py +3 -2

ppdet/data/source/voc.py ppdet/data/source/voc.py +27 -4

未找到文件。
--- a/docs/tutorials/FAQ.md
+++ b/docs/tutorials/FAQ.md
@@ -98,3 +98,7 @@ TestDataset:
 **Q:** 如何打印网络FLOPs？

 **A:** 在`configs/runtime.yml`中设置`print_flops: true`，同时需要安装PaddleSlim(比如：pip install paddleslim)，即可打印模型的FLOPs。
+
+**Q:** 如何使用无标注框进行训练？
+
+**A:** 在`configs/dataset/coco.py` 或者`configs/dataset/voc.py`中的TrainDataset下设置`allow_empty: true`, 此时允许数据集加载无标注框进行训练。该功能支持coco，voc数据格式，RCNN系列和YOLO系列模型验证能够正常训练。另外，如果无标注框数据过多，会影响模型收敛，在TrainDataset下可以设置`empty_ratio: 0.1`对无标注框数据进行随机采样，控制无标注框的数据量占总数据量的比例，默认值为1.，即使用全部无标注框
--- a/ppdet/data/source/coco.py
+++ b/ppdet/data/source/coco.py
@@ -38,7 +38,7 @@ class COCODataSet(DetDataset):
        allow_empty (bool): whether to load empty entry. False as default
        empty_ratio (float): the ratio of empty record number to total 
            record's, if empty_ratio is out of [0. ,1.), do not sample the 
-            records. 1. as default
+            records and use all the empty entries. 1. as default
    """

    def __init__(self,
@@ -63,7 +63,8 @@ class COCODataSet(DetDataset):
        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
            return records
        import random
-        sample_num = int(num * self.empty_ratio / (1 - self.empty_ratio))
+        sample_num = min(
+            int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
        records = random.sample(records, sample_num)
        return records


--- a/ppdet/data/source/voc.py
+++ b/ppdet/data/source/voc.py
@@ -42,6 +42,10 @@ class VOCDataSet(DetDataset):
        sample_num (int): number of samples to load, -1 means all.
        label_list (str): if use_default_label is False, will load
            mapping between category and class index.
+        allow_empty (bool): whether to load empty entry. False as default
+        empty_ratio (float): the ratio of empty record number to total 
+            record's, if empty_ratio is out of [0. ,1.), do not sample the 
+            records and use all the empty entries. 1. as default
    """

    def __init__(self,
@@ -50,7 +54,9 @@ class VOCDataSet(DetDataset):
                 anno_path=None,
                 data_fields=['image'],
                 sample_num=-1,
-                 label_list=None):
+                 label_list=None,
+                 allow_empty=False,
+                 empty_ratio=1.):
        super(VOCDataSet, self).__init__(
            dataset_dir=dataset_dir,
            image_dir=image_dir,
@@ -58,6 +64,18 @@ class VOCDataSet(DetDataset):
            data_fields=data_fields,
            sample_num=sample_num)
        self.label_list = label_list
+        self.allow_empty = allow_empty
+        self.empty_ratio = empty_ratio
+
+    def _sample_empty(self, records, num):
+        # if empty_ratio is out of [0. ,1.), do not sample the records
+        if self.empty_ratio < 0. or self.empty_ratio >= 1.:
+            return records
+        import random
+        sample_num = min(
+            int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
+        records = random.sample(records, sample_num)
+        return records

    def parse_dataset(self, ):
        anno_path = os.path.join(self.dataset_dir, self.anno_path)
@@ -66,6 +84,7 @@ class VOCDataSet(DetDataset):
        # mapping category name to class id
        # first_class:0, second_class:1, ...
        records = []
+        empty_records = []
        ct = 0
        cname2cid = {}
        if self.label_list:
@@ -164,15 +183,19 @@ class VOCDataSet(DetDataset):
                    if k in self.data_fields:
                        voc_rec[k] = v

-                if len(objs) != 0:
+                if len(objs) == 0:
+                    empty_records.append(voc_rec)
+                else:
                    records.append(voc_rec)

                ct += 1
                if self.sample_num > 0 and ct >= self.sample_num:
                    break
-        assert len(records) > 0, 'not found any voc record in %s' % (
-            self.anno_path)
+        assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
        logger.debug('{} samples in file {}'.format(ct, anno_path))
+        if len(empty_records) > 0:
+            empty_records = self._sample_empty(empty_records, len(records))
+            records += empty_records
        self.roidbs, self.cname2cid = records, cname2cid

    def get_label_list(self):