Refine network.

9b7d32d8 · dangqingqing · e9778f27 · 9b7d32d8 · 9b7d32d8 · 9b7d32d8
5 changed file
--- a/fluid/object_detection/README.md
+++ b/fluid/object_detection/README.md
+The minimum PaddlePaddle version needed for the code sample in this directory is the lastest develop branch. If you are on a version of PaddlePaddle earlier than this, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
+---
+# MobileNet-SSD
+This model built with paddle fluid is still under active development and is not
+the final version. We welcome feedbacks.
--- a/fluid/object_detection/load_model.py
+++ b/fluid/object_detection/load_model.py
@@ -37,7 +37,7 @@ def load_and_set_vars(place):
 # From Paddle V1
-def load_paddlev1_vars():
+def load_paddlev1_vars(place):
    vars = {}
    name_map = {}
    with open('./caffe2paddle/names.map', 'r') as map_file:
@@ -51,6 +51,7 @@ def load_paddlev1_vars():
        with open(file_name, 'rb') as f:
            f.read(16)
            arr = np.fromfile(f, dtype=np.float32)
+            #print(arr.size, reduce(mul, shape), file_name)
            assert arr.size == reduce(mul, shape)
            return arr.reshape(shape)

--- a/fluid/object_detection/mobilenet_ssd.py
+++ b/fluid/object_detection/mobilenet_ssd.py
-import os
 import paddle.v2 as paddle
 import paddle.fluid as fluid
 from paddle.fluid.initializer import MSRA
 from paddle.fluid.param_attr import ParamAttr
-import reader
-import numpy as np
-import load_model as load_model
-import argparse
-parser = argparse.ArgumentParser(description=__doc__)
-add_arg = functools.partial(add_arguments, argparser=parser)
-# yapf: disable
-add_arg('batch_size',     int,   32,     "Minibatch size.")
-add_arg('pass_num',       int,   100,     "# of training epochs.")
-add_arg('log_period',     int,   1000,   "Log period.")
-add_arg('learning_rate',  float, 1.0e-3, "Learning rate.")
-add_arg('l2',             float, 0.0004, "L2 regularizer.")
-add_arg('max_clip',       float, 10.0,   "Max clip threshold.")
-add_arg('min_clip',       float, -10.0,  "Min clip threshold.")
-add_arg('momentum',       float, 0.9,    "Momentum.")
-add_arg('rnn_hidden_size',int,   200,    "Hidden size of rnn layers.")
-add_arg('device',         int,   0,      "Device id.'-1' means running on CPU"
-                                         "while '0' means GPU-0.")
-parameter_attr = ParamAttr(initializer=MSRA())
 def conv_bn(input,
@@ -37,6 +13,7 @@ def conv_bn(input,
            num_groups=1,
            act='relu',
            use_cudnn=True):
+    parameter_attr = ParamAttr(initializer=MSRA())
    conv = fluid.layers.conv2d(
        input=input,
        num_filters=num_filters,
@@ -48,13 +25,12 @@ def conv_bn(input,
        use_cudnn=use_cudnn,
        param_attr=parameter_attr,
        bias_attr=False)
-    return fluid.layers.batch_norm(input=conv, act=act)
+    bn = fluid.layers.batch_norm(input=conv, act=act)
+    return bn
 def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride,
                        scale):
-    """
-    """
    depthwise_conv = conv_bn(
        input=input,
        filter_size=3,
@@ -128,6 +104,8 @@ def mobile_net(img, img_shape, scale=1.0):
        num_classes=21,
        min_ratio=20,
        max_ratio=90,
+        min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
+        max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
        aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]],
        base_size=img_shape[2],
        offset=0.5,
@@ -135,109 +113,3 @@ def mobile_net(img, img_shape, scale=1.0):
        clip=True)
    return mbox_locs, mbox_confs, box, box_var
-def train(train_file_list,
-          val_file_list,
-          data_args,
-          learning_rate,
-          batch_size,
-          num_passes,
-          model_save_dir='model',
-          init_model_path=None):
-    image_shape = [3, data_args.resize_h, data_args.resize_w]
-    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
-    gt_box = fluid.layers.data(
-        name='gt_box', shape=[4], dtype='float32', lod_level=1)
-    gt_label = fluid.layers.data(
-        name='gt_label', shape=[1], dtype='int32', lod_level=1)
-    difficult = fluid.layers.data(
-        name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
-    mbox_locs, mbox_confs, box, box_var = mobile_net(image, image_shape)
-    nmsed_out = fluid.layers.detection_output(mbox_locs, mbox_confs, box,
-                                              box_var)
-    loss_vec = fluid.layers.ssd_loss(mbox_locs, mbox_confs, gt_box, gt_label,
-                                     box, box_var)
-    loss = fluid.layers.nn.reduce_sum(loss_vec)
-    map_eval = fluid.evaluator.DetectionMAP(
-        nmsed_out,
-        gt_label,
-        gt_box,
-        difficult,
-        21,
-        overlap_threshold=0.5,
-        evaluate_difficult=False,
-        ap_version='11point')
-    test_program = fluid.default_main_program().clone(for_test=True)
-    optimizer = fluid.optimizer.Momentum(
-        learning_rate=fluid.layers.exponential_decay(
-            learning_rate=learning_rate,
-            decay_steps=40000,
-            decay_rate=0.1,
-            staircase=True),
-        momentum=0.9,
-        regularization=fluid.regularizer.L2Decay(0.0005), )
-    opts = optimizer.minimize(loss)
-    place = fluid.CUDAPlace(0)
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    load_model.load_and_set_vars(place)
-    train_reader = paddle.batch(
-        reader.train(data_args, train_file_list), batch_size=batch_size)
-    test_reader = paddle.batch(
-        reader.test(data_args, val_file_list), batch_size=batch_size)
-    feeder = fluid.DataFeeder(
-        place=place, feed_list=[image, gt_box, gt_label, difficult])
-    def test(pass_id):
-        map_eval.reset(exe)
-        test_map = None
-        for _, data in enumerate(test_reader()):
-            test_map = exe.run(test_program,
-                               feed=feeder.feed(data),
-                               fetch_list=[accum_map])
-        print("Test {0}, map {1}".format(pass_id, test_map[0]))
-    #print fluid.default_main_program()
-    map, accum_map = map_eval.get_map_var()
-    for pass_id in range(num_passes):
-        map_eval.reset(exe)
-        for batch_id, data in enumerate(train_reader()):
-            loss_v, map_v, accum_map_v = exe.run(
-                fluid.default_main_program(),
-                feed=feeder.feed(data),
-                fetch_list=[loss, map, accum_map])
-            print(
-                "Pass {0}, batch {1}, loss {2}, cur_map {3}, map {4}"
-                .format(pass_id, batch_id, loss_v[0], map_v[0], accum_map_v[0]))
-        test(pass_id)
-        if pass_id % 10 == 0:
-            model_path = os.path.join(model_save_dir, str(pass_id))
-            print 'save models to %s' % (model_path)
-            fluid.io.save_inference_model(model_path, ['image'], [nmsed_out],
-                                          exe)
-if __name__ == '__main__':
-    data_args = reader.Settings(
-        data_dir='./data',
-        label_file='label_list',
-        resize_h=300,
-        resize_w=300,
-        mean_value=[104, 117, 124])
-    train(
-        train_file_list='./data/trainval.txt',
-        val_file_list='./data/test.txt',
-        data_args=data_args,
-        learning_rate=0.001,
-        batch_size=32,
-        num_passes=300)
--- a/fluid/object_detection/reader.py
+++ b/fluid/object_detection/reader.py
@@ -153,6 +153,7 @@ def _reader_creator(settings, file_list, mode, shuffle):
                img = img.astype('float32')
                img -= settings.img_mean
                img = img.flatten()
+                img = img * 0.007843
                sample_labels = np.array(sample_labels)
                if mode == 'train' or mode == 'test':
@@ -160,7 +161,7 @@ def _reader_creator(settings, file_list, mode, shuffle):
                    yield img.astype(
                        'float32'
                    ), sample_labels[:, 1:5], sample_labels[:, 0].astype(
-                        'int32'), sample_labels[:, 5].astype('int32')
+                        'int32'), sample_labels[:, -1].astype('int32')
                elif mode == 'infer':
                    yield img.astype('float32')

--- a/fluid/object_detection/train.py
+++ b/fluid/object_detection/train.py
+import paddle.v2 as paddle
+import paddle.fluid as fluid
+import os
+import reader
+import numpy as np
+import load_model as load_model
+from mobilenet_ssd import mobile_net
+def train(train_file_list,
+          val_file_list,
+          data_args,
+          learning_rate,
+          batch_size,
+          num_passes,
+          model_save_dir='model',
+          init_model_path=None):
+    image_shape = [3, data_args.resize_h, data_args.resize_w]
+    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+    gt_box = fluid.layers.data(
+        name='gt_box', shape=[4], dtype='float32', lod_level=1)
+    gt_label = fluid.layers.data(
+        name='gt_label', shape=[1], dtype='int32', lod_level=1)
+    difficult = fluid.layers.data(
+        name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
+    mbox_locs, mbox_confs, box, box_var = mobile_net(image, image_shape)
+    nmsed_out = fluid.layers.detection_output(
+        mbox_locs, mbox_confs, box, box_var, nms_threshold=0.45)
+    loss_vec = fluid.layers.ssd_loss(mbox_locs, mbox_confs, gt_box, gt_label,
+                                     box, box_var)
+    loss = fluid.layers.nn.reduce_sum(loss_vec)
+    map_eval = fluid.evaluator.DetectionMAP(
+        nmsed_out,
+        gt_label,
+        gt_box,
+        difficult,
+        21,
+        overlap_threshold=0.5,
+        evaluate_difficult=False,
+        ap_version='11point')
+    map, accum_map = map_eval.get_map_var()
+    test_program = fluid.default_main_program().clone(for_test=True)
+    with fluid.program_guard(test_program):
+        test_program = fluid.io.get_inference_program([loss, map, accum_map])
+    optimizer = fluid.optimizer.DecayedAdagrad(
+        learning_rate=fluid.layers.exponential_decay(
+            learning_rate=learning_rate,
+            decay_steps=40000,
+            decay_rate=0.1,
+            staircase=True),
+        regularization=fluid.regularizer.L2Decay(0.0005), )
+    opts = optimizer.minimize(loss)
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    #load_model.load_and_set_vars(place)
+    train_reader = paddle.batch(
+        reader.train(data_args, train_file_list), batch_size=batch_size)
+    test_reader = paddle.batch(
+        reader.test(data_args, val_file_list), batch_size=batch_size)
+    feeder = fluid.DataFeeder(
+        place=place, feed_list=[image, gt_box, gt_label, difficult])
+    def test(pass_id):
+        map_eval.reset(exe)
+        test_map = None
+        for _, data in enumerate(test_reader()):
+            test_map = exe.run(test_program,
+                               feed=feeder.feed(data),
+                               fetch_list=[accum_map])
+        print("Test {0}, map {1}".format(pass_id, test_map[0]))
+    #print fluid.default_main_program()
+    for pass_id in range(num_passes):
+        map_eval.reset(exe)
+        for batch_id, data in enumerate(train_reader()):
+            loss_v, map_v, accum_map_v = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(data),
+                fetch_list=[loss, map, accum_map])
+            print(
+                "Pass {0}, batch {1}, loss {2}, cur_map {3}, map {4}"
+                .format(pass_id, batch_id, loss_v[0], map_v[0], accum_map_v[0]))
+        test(pass_id)
+        if pass_id % 10 == 0:
+            model_path = os.path.join(model_save_dir, str(pass_id))
+            print 'save models to %s' % (model_path)
+            fluid.io.save_inference_model(model_path, ['image'], [nmsed_out],
+                                          exe)
+if __name__ == '__main__':
+    data_args = reader.Settings(
+        data_dir='./data',
+        label_file='label_list',
+        resize_h=300,
+        resize_w=300,
+        mean_value=[127.5, 127.5, 127.5])
+    train(
+        train_file_list='./data/trainval.txt',
+        val_file_list='./data/test.txt',
+        data_args=data_args,
+        learning_rate=0.001,
+        batch_size=32,
+        num_passes=300)