multiprocess data reader (#849)

03a0ae1d · Yuan Gao · qingqing01 · d300e5e4 · 03a0ae1d · 03a0ae1d
3 changed file
--- a/fluid/object_detection/image_util.py
+++ b/fluid/object_detection/image_util.py
@@ -216,7 +216,7 @@ def distort_image(img, settings):
 def expand_image(img, bbox_labels, img_width, img_height, settings):
    prob = random.uniform(0, 1)
    if prob < settings._expand_prob:
-        if _expand_max_ratio - 1 >= 0.01:
+        if settings._expand_max_ratio - 1 >= 0.01:
            expand_ratio = random.uniform(1, settings._expand_max_ratio)
            height = int(img_height * expand_ratio)
            width = int(img_width * expand_ratio)

--- a/fluid/object_detection/reader.py
+++ b/fluid/object_detection/reader.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import paddle
 import image_util
 from paddle.utils.image_util import *
 import random
@@ -22,6 +23,7 @@ import xml.etree.ElementTree
 import os
 import time
 import copy
+import functools


 class Settings(object):
@@ -36,6 +38,8 @@ class Settings(object):
            for line in open(label_fpath):
                self._label_list.append(line.strip())

+        self._thread = 2
+        self._buf_size = 2048
        self._apply_distort = apply_distort
        self._apply_expand = apply_expand
        self._resize_height = resize_h
@@ -94,6 +98,123 @@ class Settings(object):
        return self._img_mean


+def process_image(sample, settings, mode):
+    img = Image.open(sample[0])
+    if img.mode == 'L':
+        img = img.convert('RGB')
+    img_width, img_height = img.size
+
+    if mode == 'train' or mode == 'test':
+        if settings.dataset == 'coco':
+            # layout: category_id | xmin | ymin | xmax | ymax | iscrowd | origin_coco_bbox | segmentation | area | image_id | annotation_id
+            bbox_labels = []
+            annIds = coco.getAnnIds(imgIds=image['id'])
+            anns = coco.loadAnns(annIds)
+            for ann in anns:
+                bbox_sample = []
+                # start from 1, leave 0 to background
+                bbox_sample.append(
+                    float(category_ids.index(ann['category_id'])) + 1)
+                bbox = ann['bbox']
+                xmin, ymin, w, h = bbox
+                xmax = xmin + w
+                ymax = ymin + h
+                bbox_sample.append(float(xmin) / img_width)
+                bbox_sample.append(float(ymin) / img_height)
+                bbox_sample.append(float(xmax) / img_width)
+                bbox_sample.append(float(ymax) / img_height)
+                bbox_sample.append(float(ann['iscrowd']))
+                #bbox_sample.append(ann['bbox'])
+                #bbox_sample.append(ann['segmentation'])
+                #bbox_sample.append(ann['area'])
+                #bbox_sample.append(ann['image_id'])
+                #bbox_sample.append(ann['id'])
+                bbox_labels.append(bbox_sample)
+        elif settings.dataset == 'pascalvoc':
+            # layout: label | xmin | ymin | xmax | ymax | difficult
+            bbox_labels = []
+            root = xml.etree.ElementTree.parse(sample[1]).getroot()
+            for object in root.findall('object'):
+                bbox_sample = []
+                # start from 1
+                bbox_sample.append(
+                    float(settings.label_list.index(object.find('name').text)))
+                bbox = object.find('bndbox')
+                difficult = float(object.find('difficult').text)
+                bbox_sample.append(float(bbox.find('xmin').text) / img_width)
+                bbox_sample.append(float(bbox.find('ymin').text) / img_height)
+                bbox_sample.append(float(bbox.find('xmax').text) / img_width)
+                bbox_sample.append(float(bbox.find('ymax').text) / img_height)
+                bbox_sample.append(difficult)
+                bbox_labels.append(bbox_sample)
+
+        sample_labels = bbox_labels
+        if mode == 'train':
+            if settings._apply_distort:
+                img = image_util.distort_image(img, settings)
+            if settings._apply_expand:
+                img, bbox_labels, img_width, img_height = image_util.expand_image(
+                    img, bbox_labels, img_width, img_height, settings)
+            batch_sampler = []
+            # hard-code here
+            batch_sampler.append(
+                image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))
+            batch_sampler.append(
+                image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))
+            batch_sampler.append(
+                image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))
+            batch_sampler.append(
+                image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))
+            batch_sampler.append(
+                image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))
+            batch_sampler.append(
+                image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
+            batch_sampler.append(
+                image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
+            """ random crop """
+            sampled_bbox = image_util.generate_batch_samples(batch_sampler,
+                                                             bbox_labels)
+
+            img = np.array(img)
+            if len(sampled_bbox) > 0:
+                idx = int(random.uniform(0, len(sampled_bbox)))
+                img, sample_labels = image_util.crop_image(
+                    img, bbox_labels, sampled_bbox[idx], img_width, img_height)
+
+            img = Image.fromarray(img)
+    img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
+    img = np.array(img)
+
+    if mode == 'train':
+        mirror = int(random.uniform(0, 2))
+        if mirror == 1:
+            img = img[:, ::-1, :]
+            for i in xrange(len(sample_labels)):
+                tmp = sample_labels[i][1]
+                sample_labels[i][1] = 1 - sample_labels[i][3]
+                sample_labels[i][3] = 1 - tmp
+
+    # HWC to CHW
+    if len(img.shape) == 3:
+        img = np.swapaxes(img, 1, 2)
+        img = np.swapaxes(img, 1, 0)
+    # RBG to BGR
+    img = img[[2, 1, 0], :, :]
+    img = img.astype('float32')
+    img -= settings.img_mean
+    img = img.flatten()
+    img = img * 0.007843
+
+    sample_labels = np.array(sample_labels)
+    if mode == 'train' or mode == 'test':
+        if len(sample_labels) != 0:
+            return img.astype(
+                'float32'), sample_labels[:, 1:5], sample_labels[:, 0].astype(
+                    'int32'), sample_labels[:, -1].astype('int32')
+    elif mode == 'infer':
+        return img.astype('float32')
+
+
 def _reader_creator(settings, file_list, mode, shuffle):
    def reader():
        if settings.dataset == 'coco':
@@ -117,7 +238,6 @@ def _reader_creator(settings, file_list, mode, shuffle):
                images) > settings.toy else images
        print("{} on {} with {} images".format(mode, settings.dataset,
                                               len(images)))
-
        if shuffle:
            random.shuffle(images)

@@ -125,140 +245,20 @@ def _reader_creator(settings, file_list, mode, shuffle):
            if settings.dataset == 'coco':
                image_name = image['file_name']
                image_path = os.path.join(settings.data_dir, image_name)
+                yield [image_path]
            elif settings.dataset == 'pascalvoc':
                if mode == 'train' or mode == 'test':
                    image_path, label_path = image.split()
                    image_path = os.path.join(settings.data_dir, image_path)
                    label_path = os.path.join(settings.data_dir, label_path)
+                    yield image_path, label_path
                elif mode == 'infer':
                    image_path = os.path.join(settings.data_dir, image)
+                    yield [image_path]

-            img = Image.open(image_path)
-            if img.mode == 'L':
-                img = img.convert('RGB')
-            img_width, img_height = img.size
-
-            if mode == 'train' or mode == 'test':
-                if settings.dataset == 'coco':
-                    # layout: category_id | xmin | ymin | xmax | ymax | iscrowd | origin_coco_bbox | segmentation | area | image_id | annotation_id
-                    bbox_labels = []
-                    annIds = coco.getAnnIds(imgIds=image['id'])
-                    anns = coco.loadAnns(annIds)
-                    for ann in anns:
-                        bbox_sample = []
-                        # start from 1, leave 0 to background
-                        bbox_sample.append(
-                            float(category_ids.index(ann['category_id'])) + 1)
-                        bbox = ann['bbox']
-                        xmin, ymin, w, h = bbox
-                        xmax = xmin + w
-                        ymax = ymin + h
-                        bbox_sample.append(float(xmin) / img_width)
-                        bbox_sample.append(float(ymin) / img_height)
-                        bbox_sample.append(float(xmax) / img_width)
-                        bbox_sample.append(float(ymax) / img_height)
-                        bbox_sample.append(float(ann['iscrowd']))
-                        #bbox_sample.append(ann['bbox'])
-                        #bbox_sample.append(ann['segmentation'])
-                        #bbox_sample.append(ann['area'])
-                        #bbox_sample.append(ann['image_id'])
-                        #bbox_sample.append(ann['id'])
-                        bbox_labels.append(bbox_sample)
-                elif settings.dataset == 'pascalvoc':
-                    # layout: label | xmin | ymin | xmax | ymax | difficult
-                    bbox_labels = []
-                    root = xml.etree.ElementTree.parse(label_path).getroot()
-                    for object in root.findall('object'):
-                        bbox_sample = []
-                        # start from 1
-                        bbox_sample.append(
-                            float(
-                                settings.label_list.index(
-                                    object.find('name').text)))
-                        bbox = object.find('bndbox')
-                        difficult = float(object.find('difficult').text)
-                        bbox_sample.append(
-                            float(bbox.find('xmin').text) / img_width)
-                        bbox_sample.append(
-                            float(bbox.find('ymin').text) / img_height)
-                        bbox_sample.append(
-                            float(bbox.find('xmax').text) / img_width)
-                        bbox_sample.append(
-                            float(bbox.find('ymax').text) / img_height)
-                        bbox_sample.append(difficult)
-                        bbox_labels.append(bbox_sample)
-
-                sample_labels = bbox_labels
-                if mode == 'train':
-                    if settings._apply_distort:
-                        img = image_util.distort_image(img, settings)
-                    if settings._apply_expand:
-                        img, bbox_labels, img_width, img_height = image_util.expand_image(
-                            img, bbox_labels, img_width, img_height, settings)
-                    batch_sampler = []
-                    # hard-code here
-                    batch_sampler.append(
-                        image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))
-                    batch_sampler.append(
-                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))
-                    batch_sampler.append(
-                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))
-                    batch_sampler.append(
-                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))
-                    batch_sampler.append(
-                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))
-                    batch_sampler.append(
-                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
-                    batch_sampler.append(
-                        image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
-                    """ random crop """
-                    sampled_bbox = image_util.generate_batch_samples(
-                        batch_sampler, bbox_labels, img_width, img_height)
-
-                    img = np.array(img)
-                    if len(sampled_bbox) > 0:
-                        idx = int(random.uniform(0, len(sampled_bbox)))
-                        img, sample_labels = image_util.crop_image(
-                            img, bbox_labels, sampled_bbox[idx], img_width,
-                            img_height)
-
-                    img = Image.fromarray(img)
-            img = img.resize((settings.resize_w, settings.resize_h),
-                             Image.ANTIALIAS)
-            img = np.array(img)
-
-            if mode == 'train':
-                mirror = int(random.uniform(0, 2))
-                if mirror == 1:
-                    img = img[:, ::-1, :]
-                    for i in xrange(len(sample_labels)):
-                        tmp = sample_labels[i][1]
-                        sample_labels[i][1] = 1 - sample_labels[i][3]
-                        sample_labels[i][3] = 1 - tmp
-
-            # HWC to CHW
-            if len(img.shape) == 3:
-                img = np.swapaxes(img, 1, 2)
-                img = np.swapaxes(img, 1, 0)
-            # RBG to BGR
-            img = img[[2, 1, 0], :, :]
-            img = img.astype('float32')
-            img -= settings.img_mean
-            img = img.flatten()
-            img = img * 0.007843
-
-            sample_labels = np.array(sample_labels)
-            if mode == 'train' or mode == 'test':
-                if mode == 'train' and len(sample_labels) == 0: continue
-                if mode == 'test' and len(sample_labels) == 0: continue
-                yield img.astype(
-                    'float32'
-                ), sample_labels[:, 1:5], sample_labels[:, 0].astype(
-                    'int32'), sample_labels[:, -1].astype('int32')
-            elif mode == 'infer':
-                yield img.astype('float32')
-
-    return reader
+    mapper = functools.partial(process_image, mode=mode, settings=settings)
+    return paddle.reader.xmap_readers(mapper, reader, settings._thread,
+                                      settings._buf_size)


 def draw_bounding_box_on_image(image,

--- a/fluid/object_detection/train.py
+++ b/fluid/object_detection/train.py
 import paddle
 import paddle.fluid as fluid
 import reader
-import load_model as load_model
 from mobilenet_ssd import mobile_net
 from utility import add_arguments, print_arguments
 import os