Refine reader.py and train.py

e76927fe · Dang Qingqing · 547b3918 · e76927fe · e76927fe
显示空白变更内容
内联并排

Showing with 196 addition and 173 deletion

fluid/object_detection/reader.py fluid/object_detection/reader.py +180 -166

fluid/object_detection/train.py fluid/object_detection/train.py +16 -7

未找到文件。
--- a/fluid/object_detection/reader.py
+++ b/fluid/object_detection/reader.py
@@ -102,107 +102,16 @@ class Settings(object):
        return self._img_mean
-def _reader_creator(settings, file_list, mode, shuffle):
+def preprocess(img, bbox_labels, mode, settings):
-    def reader():
-        if settings.dataset == 'coco':
-            # cocoapi 
-            from pycocotools.coco import COCO
-            from pycocotools.cocoeval import COCOeval
-            coco = COCO(file_list)
-            image_ids = coco.getImgIds()
-            images = coco.loadImgs(image_ids)
-            category_ids = coco.getCatIds()
-            category_names = [
-                item['name'] for item in coco.loadCats(category_ids)
-            ]
-        else:
-            flist = open(file_list)
-            images = [line.strip() for line in flist]
-        if not settings.toy == 0:
-            images = images[:settings.toy] if len(
-                images) > settings.toy else images
-        print("{} on {} with {} images".format(mode, settings.dataset,
-                                               len(images)))
-        if shuffle:
-            random.shuffle(images)
-        for image in images:
-            if settings.dataset == 'coco':
-                image_name = image['file_name']
-                image_path = os.path.join(settings.data_dir, image_name)
-            elif settings.dataset == 'pascalvoc':
-                if mode == 'train' or mode == 'test':
-                    image_path, label_path = image.split()
-                    image_path = os.path.join(settings.data_dir, image_path)
-                    label_path = os.path.join(settings.data_dir, label_path)
-                elif mode == 'infer':
-                    image_path = os.path.join(settings.data_dir, image)
-            img = Image.open(image_path)
-            if img.mode == 'L':
-                img = img.convert('RGB')
    img_width, img_height = img.size
+    sampled_labels = bbox_labels
-            if mode == 'train' or mode == 'test':
-                if settings.dataset == 'coco':
-                    # layout: category_id | xmin | ymin | xmax | ymax | iscrowd | origin_coco_bbox | segmentation | area | image_id | annotation_id
-                    bbox_labels = []
-                    annIds = coco.getAnnIds(imgIds=image['id'])
-                    anns = coco.loadAnns(annIds)
-                    for ann in anns:
-                        bbox_sample = []
-                        # start from 1, leave 0 to background
-                        bbox_sample.append(
-                            float(category_ids.index(ann['category_id'])) + 1)
-                        bbox = ann['bbox']
-                        xmin, ymin, w, h = bbox
-                        xmax = xmin + w
-                        ymax = ymin + h
-                        bbox_sample.append(float(xmin) / img_width)
-                        bbox_sample.append(float(ymin) / img_height)
-                        bbox_sample.append(float(xmax) / img_width)
-                        bbox_sample.append(float(ymax) / img_height)
-                        bbox_sample.append(float(ann['iscrowd']))
-                        #bbox_sample.append(ann['bbox'])
-                        #bbox_sample.append(ann['segmentation'])
-                        #bbox_sample.append(ann['area'])
-                        #bbox_sample.append(ann['image_id'])
-                        #bbox_sample.append(ann['id'])
-                        bbox_labels.append(bbox_sample)
-                elif settings.dataset == 'pascalvoc':
-                    # layout: label | xmin | ymin | xmax | ymax | difficult
-                    bbox_labels = []
-                    root = xml.etree.ElementTree.parse(label_path).getroot()
-                    for object in root.findall('object'):
-                        bbox_sample = []
-                        # start from 1
-                        bbox_sample.append(
-                            float(
-                                settings.label_list.index(
-                                    object.find('name').text)))
-                        bbox = object.find('bndbox')
-                        difficult = float(object.find('difficult').text)
-                        bbox_sample.append(
-                            float(bbox.find('xmin').text) / img_width)
-                        bbox_sample.append(
-                            float(bbox.find('ymin').text) / img_height)
-                        bbox_sample.append(
-                            float(bbox.find('xmax').text) / img_width)
-                        bbox_sample.append(
-                            float(bbox.find('ymax').text) / img_height)
-                        bbox_sample.append(difficult)
-                        bbox_labels.append(bbox_sample)
-                sample_labels = bbox_labels
    if mode == 'train':
        if settings._apply_distort:
            img = image_util.distort_image(img, settings)
        if settings._apply_expand:
            img, bbox_labels, img_width, img_height = image_util.expand_image(
                img, bbox_labels, img_width, img_height, settings)
+        # sampling
        batch_sampler = []
        # hard-code here
        batch_sampler.append(
@@ -219,31 +128,27 @@ def _reader_creator(settings, file_list, mode, shuffle):
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
        batch_sampler.append(
            image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
-                    """ random crop """
+        sampled_bbox = image_util.generate_batch_samples(batch_sampler,
-                    sampled_bbox = image_util.generate_batch_samples(
+                                                         bbox_labels)
-                        batch_sampler, bbox_labels)
        img = np.array(img)
        if len(sampled_bbox) > 0:
            idx = int(random.uniform(0, len(sampled_bbox)))
-                        img, sample_labels = image_util.crop_image(
+            img, sampled_labels = image_util.crop_image(
-                            img, bbox_labels, sampled_bbox[idx], img_width,
+                img, bbox_labels, sampled_bbox[idx], img_width, img_height)
-                            img_height)
        img = Image.fromarray(img)
-            img = img.resize((settings.resize_w, settings.resize_h),
+    img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
-                             Image.ANTIALIAS)
    img = np.array(img)
    if mode == 'train':
        mirror = int(random.uniform(0, 2))
        if mirror == 1:
            img = img[:, ::-1, :]
-                    for i in xrange(len(sample_labels)):
+            for i in xrange(len(sampled_labels)):
-                        tmp = sample_labels[i][1]
+                tmp = sampled_labels[i][1]
-                        sample_labels[i][1] = 1 - sample_labels[i][3]
+                sampled_labels[i][1] = 1 - sampled_labels[i][3]
-                        sample_labels[i][3] = 1 - tmp
+                sampled_labels[i][3] = 1 - tmp
    # HWC to CHW
    if len(img.shape) == 3:
        img = np.swapaxes(img, 1, 2)
@@ -252,19 +157,114 @@ def _reader_creator(settings, file_list, mode, shuffle):
    img = img[[2, 1, 0], :, :]
    img = img.astype('float32')
    img -= settings.img_mean
-            img = img.flatten()
+    #img = img.flatten()
    img = img * 0.007843
+    return img, sampled_labels
+def coco(settings, file_list, mode, shuffle):
+    # cocoapi
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+    coco = COCO(file_list)
+    image_ids = coco.getImgIds()
+    images = coco.loadImgs(image_ids)
+    category_ids = coco.getCatIds()
+    category_names = [item['name'] for item in coco.loadCats(category_ids)]
+    if not settings.toy == 0:
+        images = images[:settings.toy] if len(images) > settings.toy else images
+    print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
+    def reader():
+        if mode == 'train' and shuffle:
+            random.shuffle(images)
+        for image in images:
+            image_name = image['file_name']
+            image_path = os.path.join(settings.data_dir, image_name)
+            im = Image.open(image_path)
+            if im.mode == 'L':
+                im = im.convert('RGB')
+            im_width, im_height = im.size
+            # layout: category_id | xmin | ymin | xmax | ymax | iscrowd |
+            # origin_coco_bbox | segmentation | area | image_id | annotation_id
+            bbox_labels = []
+            annIds = coco.getAnnIds(imgIds=image['id'])
+            anns = coco.loadAnns(annIds)
+            for ann in anns:
+                bbox_sample = []
+                # start from 1, leave 0 to background
+                bbox_sample.append(
+                    float(category_ids.index(ann['category_id'])) + 1)
+                bbox = ann['bbox']
+                xmin, ymin, w, h = bbox
+                xmax = xmin + w
+                ymax = ymin + h
+                bbox_sample.append(float(xmin) / im_width)
+                bbox_sample.append(float(ymin) / im_height)
+                bbox_sample.append(float(xmax) / im_width)
+                bbox_sample.append(float(ymax) / im_height)
+                bbox_sample.append(float(ann['iscrowd']))
+                bbox_labels.append(bbox_sample)
+            im, sample_labels = preprocess(im, bbox_labels, mode, settings)
            sample_labels = np.array(sample_labels)
-            if mode == 'train' or mode == 'test':
+            if len(sample_labels) == 0: continue
-                if mode == 'train' and len(sample_labels) == 0: continue
+            im = im.astype('float32')
-                if mode == 'test' and len(sample_labels) == 0: continue
+            boxes = sample_labels[:, 1:5]
-                yield img.astype(
+            lbls = sample_labels[:, 0].astype('int32')
-                    'float32'
+            difficults = sample_labels[:, -1].astype('int32')
-                ), sample_labels[:, 1:5], sample_labels[:, 0].astype(
+            yield im, boxes, lbls, difficults
-                    'int32'), sample_labels[:, -1].astype('int32')
-            elif mode == 'infer':
+    return reader
-                yield img.astype('float32')
+def pascalvoc(settings, file_list, mode, shuffle):
+    flist = open(file_list)
+    images = [line.strip() for line in flist]
+    if not settings.toy == 0:
+        images = images[:settings.toy] if len(images) > settings.toy else images
+    print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
+    def reader():
+        if mode == 'train' and shuffle:
+            random.shuffle(images)
+        for image in images:
+            image_path, label_path = image.split()
+            image_path = os.path.join(settings.data_dir, image_path)
+            label_path = os.path.join(settings.data_dir, label_path)
+            im = Image.open(image_path)
+            if im.mode == 'L':
+                im = im.convert('RGB')
+            im_width, im_height = im.size
+            # layout: label | xmin | ymin | xmax | ymax | difficult
+            bbox_labels = []
+            root = xml.etree.ElementTree.parse(label_path).getroot()
+            for object in root.findall('object'):
+                bbox_sample = []
+                # start from 1
+                bbox_sample.append(
+                    float(settings.label_list.index(object.find('name').text)))
+                bbox = object.find('bndbox')
+                difficult = float(object.find('difficult').text)
+                bbox_sample.append(float(bbox.find('xmin').text) / im_width)
+                bbox_sample.append(float(bbox.find('ymin').text) / im_height)
+                bbox_sample.append(float(bbox.find('xmax').text) / im_width)
+                bbox_sample.append(float(bbox.find('ymax').text) / im_height)
+                bbox_sample.append(difficult)
+                bbox_labels.append(bbox_sample)
+            im, sample_labels = preprocess(im, bbox_labels, mode, settings)
+            sample_labels = np.array(sample_labels)
+            if len(sample_labels) == 0: continue
+            im = im.astype('float32')
+            boxes = sample_labels[:, 1:5]
+            lbls = sample_labels[:, 0].astype('int32')
+            difficults = sample_labels[:, -1].astype('int32')
+            yield im, boxes, lbls, difficults
    return reader
@@ -309,9 +309,9 @@ def train(settings, file_list, shuffle=True):
        elif '2017' in file_list:
            sub_dir = "train2017"
        train_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
-        return _reader_creator(train_settings, file_list, 'train', shuffle)
+        return coco(train_settings, file_list, 'train', shuffle)
    else:
-        return _reader_creator(settings, file_list, 'train', shuffle)
+        return pascalvoc(settings, file_list, 'train', shuffle)
 def test(settings, file_list):
@@ -323,10 +323,24 @@ def test(settings, file_list):
        elif '2017' in file_list:
            sub_dir = "val2017"
        test_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
-        return _reader_creator(test_settings, file_list, 'test', False)
+        return coco(test_settings, file_list, 'test', False)
    else:
-        return _reader_creator(settings, file_list, 'test', False)
+        return pascalvoc(settings, file_list, 'test', False)
-def infer(settings, file_list):
+def infer(settings, image_path):
-    return _reader_creator(settings, file_list, 'infer', False)
+    im = Image.open(image_path)
+    if im.mode == 'L':
+        im = im.convert('RGB')
+    im_width, im_height = im.size
+    img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
+    img = np.array(img)
+    # HWC to CHW
+    if len(img.shape) == 3:
+        img = np.swapaxes(img, 1, 2)
+        img = np.swapaxes(img, 1, 0)
+    # RBG to BGR
+    img = img[[2, 1, 0], :, :]
+    img = img.astype('float32')
+    img -= settings.img_mean
+    img = img * 0.007843
--- a/fluid/object_detection/train.py
+++ b/fluid/object_detection/train.py
@@ -3,6 +3,7 @@ import time
 import numpy as np
 import argparse
 import functools
+import shutil
 import paddle
 import paddle.fluid as fluid
@@ -205,7 +206,6 @@ def parallel_exe(args,
            evaluate_difficult=False,
            ap_version=args.ap_version)
-    print('ParallelExecutor, ap_version = ', args.ap_version)
    if data_args.dataset == 'coco':
        # learning rate decay in 12, 19 pass, respectively
        if '2014' in train_file_list:
@@ -243,7 +243,15 @@ def parallel_exe(args,
    feeder = fluid.DataFeeder(
        place=place, feed_list=[image, gt_box, gt_label, difficult])
-    def test(pass_id):
+    def save_model(postfix):
+        model_path = os.path.join(model_save_dir, postfix)
+        if os.path.isdir(model_path):
+            shutil.rmtree(model_path)
+        print 'save models to %s' % (model_path)
+        fluid.io.save_persistables(exe, model_path)
+    best_map = 0.
+    def test(pass_id, best_map):
        _, accum_map = map_eval.get_map_var()
        map_eval.reset(exe)
        test_map = None
@@ -251,13 +259,15 @@ def parallel_exe(args,
            test_map = exe.run(test_program,
                               feed=feeder.feed(data),
                               fetch_list=[accum_map])
+        if test_map[0] > best_map:
+            best_map = test_map[0]
+            save_model('best_model')
        print("Test {0}, map {1}".format(pass_id, test_map[0]))
    for pass_id in range(num_passes):
        start_time = time.time()
        prev_start_time = start_time
        end_time = 0
-        test(pass_id)
        for batch_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
@@ -269,11 +279,10 @@ def parallel_exe(args,
            if batch_id % 20 == 0:
                print("Pass {0}, batch {1}, loss {2}, time {3}".format(
                    pass_id, batch_id, loss_v, start_time - prev_start_time))
+        test(pass_id, best_map)
        if pass_id % 10 == 0 or pass_id == num_passes - 1:
-            model_path = os.path.join(model_save_dir, str(pass_id))
+            save_model(str(pass_id))
-            print 'save models to %s' % (model_path)
+    print("Best test map {0}".format(best_map))
-            fluid.io.save_persistables(exe, model_path)
 if __name__ == '__main__':
    args = parser.parse_args()