Add global shuffle for data reader in object_detection.

679fa655 · Guanghua Yu · qingqing01 · 9afe4f67 · 679fa655 · 679fa655
3 changed file
--- a/PaddleCV/object_detection/mobilenet_ssd.py
+++ b/PaddleCV/object_detection/mobilenet_ssd.py
@@ -3,111 +3,124 @@ from paddle.fluid.initializer import MSRA
 from paddle.fluid.param_attr import ParamAttr
-def conv_bn(input,
+class MobileNetSSD:
-            filter_size,
+    def __init__(self, img, num_classes, img_shape):
-            num_filters,
+        self.img = img
-            stride,
+        self.num_classes = num_classes
-            padding,
+        self.img_shape = img_shape
-            channels=None,
-            num_groups=1,
-            act='relu',
-            use_cudnn=True):
-    parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA())
-    conv = fluid.layers.conv2d(
-        input=input,
-        num_filters=num_filters,
-        filter_size=filter_size,
-        stride=stride,
-        padding=padding,
-        groups=num_groups,
-        act=None,
-        use_cudnn=use_cudnn,
-        param_attr=parameter_attr,
-        bias_attr=False)
-    return fluid.layers.batch_norm(input=conv, act=act)
+    def ssd_net(self, scale=1.0):
+        # 300x300
+        tmp = self.conv_bn(self.img, 3, int(32 * scale), 2, 1, 3)
+        # 150x150
+        tmp = self.depthwise_separable(tmp, 32, 64, 32, 1, scale)
+        tmp = self.depthwise_separable(tmp, 64, 128, 64, 2, scale)
+        # 75x75
+        tmp = self.depthwise_separable(tmp, 128, 128, 128, 1, scale)
+        tmp = self.depthwise_separable(tmp, 128, 256, 128, 2, scale)
+        # 38x38
+        tmp = self.depthwise_separable(tmp, 256, 256, 256, 1, scale)
+        tmp = self.depthwise_separable(tmp, 256, 512, 256, 2, scale)
-def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride,
+        # 19x19
-                        scale):
+        for i in range(5):
-    depthwise_conv = conv_bn(
+            tmp = self.depthwise_separable(tmp, 512, 512, 512, 1, scale)
-        input=input,
+        module11 = tmp
-        filter_size=3,
+        tmp = self.depthwise_separable(tmp, 512, 1024, 512, 2, scale)
-        num_filters=int(num_filters1 * scale),
-        stride=stride,
-        padding=1,
-        num_groups=int(num_groups * scale),
-        use_cudnn=False)
-    pointwise_conv = conv_bn(
+        # 10x10
-        input=depthwise_conv,
+        module13 = self.depthwise_separable(tmp, 1024, 1024, 1024, 1, scale)
-        filter_size=1,
+        module14 = self.extra_block(module13, 256, 512, 1, 2, scale)
-        num_filters=int(num_filters2 * scale),
+        # 5x5
-        stride=1,
+        module15 = self.extra_block(module14, 128, 256, 1, 2, scale)
-        padding=0)
+        # 3x3
-    return pointwise_conv
+        module16 = self.extra_block(module15, 128, 256, 1, 2, scale)
+        # 2x2
+        module17 = self.extra_block(module16, 64, 128, 1, 2, scale)
+        mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
+            inputs=[
+                module11, module13, module14, module15, module16, module17
+            ],
+            image=self.img,
+            num_classes=self.num_classes,
+            min_ratio=20,
+            max_ratio=90,
+            min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
+            max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
+            aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
+                           [2., 3.]],
+            base_size=self.img_shape[2],
+            offset=0.5,
+            flip=True)
-def extra_block(input, num_filters1, num_filters2, num_groups, stride, scale):
+        return mbox_locs, mbox_confs, box, box_var
-    # 1x1 conv
-    pointwise_conv = conv_bn(
-        input=input,
-        filter_size=1,
-        num_filters=int(num_filters1 * scale),
-        stride=1,
-        num_groups=int(num_groups * scale),
-        padding=0)
-    # 3x3 conv
+    def conv_bn(self,
-    normal_conv = conv_bn(
+                input,
-        input=pointwise_conv,
+                filter_size,
-        filter_size=3,
+                num_filters,
-        num_filters=int(num_filters2 * scale),
+                stride,
-        stride=2,
+                padding,
-        num_groups=int(num_groups * scale),
+                channels=None,
-        padding=1)
+                num_groups=1,
-    return normal_conv
+                act='relu',
+                use_cudnn=True):
+        parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA())
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            act=None,
+            use_cudnn=use_cudnn,
+            param_attr=parameter_attr,
+            bias_attr=False)
+        return fluid.layers.batch_norm(input=conv, act=act)
+    def depthwise_separable(self, input, num_filters1, num_filters2, num_groups,
+                            stride, scale):
+        depthwise_conv = self.conv_bn(
+            input=input,
+            filter_size=3,
+            num_filters=int(num_filters1 * scale),
+            stride=stride,
+            padding=1,
+            num_groups=int(num_groups * scale),
+            use_cudnn=False)
-def mobile_net(num_classes, img, img_shape, scale=1.0):
+        pointwise_conv = self.conv_bn(
-    # 300x300
+            input=depthwise_conv,
-    tmp = conv_bn(img, 3, int(32 * scale), 2, 1, 3)
+            filter_size=1,
-    # 150x150
+            num_filters=int(num_filters2 * scale),
-    tmp = depthwise_separable(tmp, 32, 64, 32, 1, scale)
+            stride=1,
-    tmp = depthwise_separable(tmp, 64, 128, 64, 2, scale)
+            padding=0)
-    # 75x75
+        return pointwise_conv
-    tmp = depthwise_separable(tmp, 128, 128, 128, 1, scale)
-    tmp = depthwise_separable(tmp, 128, 256, 128, 2, scale)
-    # 38x38
-    tmp = depthwise_separable(tmp, 256, 256, 256, 1, scale)
-    tmp = depthwise_separable(tmp, 256, 512, 256, 2, scale)
-    # 19x19
+    def extra_block(self, input, num_filters1, num_filters2, num_groups, stride,
-    for i in range(5):
+                    scale):
-        tmp = depthwise_separable(tmp, 512, 512, 512, 1, scale)
+        # 1x1 conv
-    module11 = tmp
+        pointwise_conv = self.conv_bn(
-    tmp = depthwise_separable(tmp, 512, 1024, 512, 2, scale)
+            input=input,
+            filter_size=1,
+            num_filters=int(num_filters1 * scale),
+            stride=1,
+            num_groups=int(num_groups * scale),
+            padding=0)
-    # 10x10
+        # 3x3 conv
-    module13 = depthwise_separable(tmp, 1024, 1024, 1024, 1, scale)
+        normal_conv = self.conv_bn(
-    module14 = extra_block(module13, 256, 512, 1, 2, scale)
+            input=pointwise_conv,
-    # 5x5
+            filter_size=3,
-    module15 = extra_block(module14, 128, 256, 1, 2, scale)
+            num_filters=int(num_filters2 * scale),
-    # 3x3
+            stride=2,
-    module16 = extra_block(module15, 128, 256, 1, 2, scale)
+            num_groups=int(num_groups * scale),
-    # 2x2
+            padding=1)
-    module17 = extra_block(module16, 64, 128, 1, 2, scale)
+        return normal_conv
-    mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
-        inputs=[module11, module13, module14, module15, module16, module17],
-        image=img,
-        num_classes=num_classes,
-        min_ratio=20,
-        max_ratio=90,
-        min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
-        max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
-        aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]],
-        base_size=img_shape[2],
-        offset=0.5,
-        flip=True)
-    return mbox_locs, mbox_confs, box, box_var
+def build_mobilenet_ssd(img, num_classes, img_shape):
+    ssd_model = MobileNetSSD(img, num_classes, img_shape)
+    return ssd_model.ssd_net()
--- a/PaddleCV/object_detection/reader.py
+++ b/PaddleCV/object_detection/reader.py
@@ -293,6 +293,7 @@ def train(settings,
        coco_api = COCO(file_path)
        image_ids = coco_api.getImgIds()
        images = coco_api.loadImgs(image_ids)
+        np.random.shuffle(images)
        n = int(math.ceil(len(images) // num_workers))
        image_lists = [images[i:i + n] for i in range(0, len(images), n)]
@@ -307,11 +308,11 @@ def train(settings,
                     data_dir))
    else:
        images = [line.strip() for line in open(file_path)]
+        np.random.shuffle(images)
        n = int(math.ceil(len(images) // num_workers))
        image_lists = [images[i:i + n] for i in range(0, len(images), n)]
        for l in image_lists:
            readers.append(pascalvoc(settings, l, 'train', batch_size, shuffle))
    return paddle.reader.multiprocess_reader(readers, False)
@@ -341,7 +342,7 @@ def infer(settings, image_path):
                             "data path correctly." % image_path)
        img = Image.open(image_path)
        if img.mode == 'L':
-            img = im.convert('RGB')
+            img = img.convert('RGB')
        im_width, im_height = img.size
        img = img.resize((settings.resize_w, settings.resize_h),
                         Image.ANTIALIAS)

--- a/PaddleCV/object_detection/train.py
+++ b/PaddleCV/object_detection/train.py
@@ -10,7 +10,7 @@ import multiprocessing
 import paddle
 import paddle.fluid as fluid
 import reader
-from mobilenet_ssd import mobile_net
+from mobilenet_ssd import build_mobilenet_ssd
 from utility import add_arguments, print_arguments
 parser = argparse.ArgumentParser(description=__doc__)
@@ -92,7 +92,7 @@ def build_program(main_prog, startup_prog, train_params, is_train):
            use_double_buffer=True)
        with fluid.unique_name.guard():
            image, gt_box, gt_label, difficult = fluid.layers.read_file(py_reader)
-            locs, confs, box, box_var = mobile_net(class_num, image, image_shape)
+            locs, confs, box, box_var = build_mobilenet_ssd(image, class_num, image_shape)
            if is_train:
                with fluid.unique_name.guard("train"):
                    loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
@@ -228,6 +228,13 @@ def train(args,
    total_time = 0.0
    for epoc_id in range(epoc_num):
+        train_reader = reader.train(data_args,
+                                train_file_list,
+                                batch_size_per_device,
+                                shuffle=is_shuffle,
+                                num_workers=num_workers,
+                                enable_ce=enable_ce)
+        train_py_reader.decorate_paddle_reader(train_reader)
        epoch_idx = epoc_id + 1
        start_time = time.time()
        prev_start_time = start_time
@@ -255,9 +262,10 @@ def train(args,
        end_time = time.time()
        total_time += end_time - start_time
-        best_map, mean_map = test(epoc_id, best_map)
-        print("Best test map {0}".format(best_map))
        if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
+            best_map, mean_map = test(epoc_id, best_map)
+            print("Best test map {0}".format(best_map))
+            # save model
            save_model(str(epoc_id), train_prog)
    if enable_ce:
@@ -275,7 +283,7 @@ def train(args,
                   (devices_num, total_time / epoch_idx))
-if __name__ == '__main__':
+def main():
    args = parser.parse_args()
    print_arguments(args)
@@ -318,3 +326,7 @@ if __name__ == '__main__':
          train_parameters[dataset],
          train_file_list=train_file_list,
          val_file_list=val_file_list)
+if __name__ == '__main__':
+    main()
\ No newline at end of file