diff --git a/fluid/object_detection/README.md b/fluid/object_detection/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4aa2c32865932bb949e20e32b63fc5cec2669dd0 --- /dev/null +++ b/fluid/object_detection/README.md @@ -0,0 +1,8 @@ +The minimum PaddlePaddle version needed for the code sample in this directory is the lastest develop branch. If you are on a version of PaddlePaddle earlier than this, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html). + +--- + +# MobileNet-SSD + +This model built with paddle fluid is still under active development and is not +the final version. We welcome feedbacks. diff --git a/fluid/object_detection/load_model.py b/fluid/object_detection/load_model.py index aa2839bc7df1b19f65e4a218cbf0d0d7b8954a5e..8c7389efea33699b2f90243311ff89747f831d06 100644 --- a/fluid/object_detection/load_model.py +++ b/fluid/object_detection/load_model.py @@ -3,6 +3,7 @@ import paddle.fluid as fluid import numpy as np +# From npy def load_vars(): vars = {} name_map = {} @@ -35,5 +36,32 @@ def load_and_set_vars(place): t.set(v, place) +# From Paddle V1 +def load_paddlev1_vars(place): + vars = {} + name_map = {} + with open('./caffe2paddle/names.map', 'r') as map_file: + for param in map_file: + fd_name, tf_name = param.strip().split('\t') + name_map[fd_name] = tf_name + + from operator import mul + + def load(file_name, shape): + with open(file_name, 'rb') as f: + f.read(16) + arr = np.fromfile(f, dtype=np.float32) + #print(arr.size, reduce(mul, shape), file_name) + assert arr.size == reduce(mul, shape) + return arr.reshape(shape) + + for fd_name in name_map: + v1_name = name_map[fd_name] + t = fluid.global_scope().find_var(fd_name).get_tensor() + shape = np.array(t).shape + v1_var = load('./caffe2paddle/' + v1_name, shape) + t.set(v1_var, place) + + if __name__ == "__main__": load_vars() diff --git a/fluid/object_detection/mobilenet_ssd.py b/fluid/object_detection/mobilenet_ssd.py index eacb7e7af35c6b4013bee0d6041ed6acd9788531..1999df8aa0c5d2bf2e024f382f4ccc5e653dae9f 100644 --- a/fluid/object_detection/mobilenet_ssd.py +++ b/fluid/object_detection/mobilenet_ssd.py @@ -1,13 +1,7 @@ -import os import paddle.v2 as paddle import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -import reader -import numpy as np -import load_model as load_model - -parameter_attr = ParamAttr(initializer=MSRA()) def conv_bn(input, @@ -19,6 +13,7 @@ def conv_bn(input, num_groups=1, act='relu', use_cudnn=True): + parameter_attr = ParamAttr(initializer=MSRA()) conv = fluid.layers.conv2d( input=input, num_filters=num_filters, @@ -30,13 +25,12 @@ def conv_bn(input, use_cudnn=use_cudnn, param_attr=parameter_attr, bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) + bn = fluid.layers.batch_norm(input=conv, act=act) + return bn def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, scale): - """ - """ depthwise_conv = conv_bn( input=input, filter_size=3, @@ -110,6 +104,8 @@ def mobile_net(img, img_shape, scale=1.0): num_classes=21, min_ratio=20, max_ratio=90, + min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0], + max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0], aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]], base_size=img_shape[2], offset=0.5, @@ -117,106 +113,3 @@ def mobile_net(img, img_shape, scale=1.0): clip=True) return mbox_locs, mbox_confs, box, box_var - - -def train(train_file_list, - val_file_list, - data_args, - learning_rate, - batch_size, - num_passes, - model_save_dir='model', - init_model_path=None): - image_shape = [3, data_args.resize_h, data_args.resize_w] - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - difficult = fluid.layers.data( - name='gt_difficult', shape=[1], dtype='int32', lod_level=1) - - mbox_locs, mbox_confs, box, box_var = mobile_net(image, image_shape) - nmsed_out = fluid.layers.detection_output(mbox_locs, mbox_confs, box, - box_var) - loss_vec = fluid.layers.ssd_loss(mbox_locs, mbox_confs, gt_box, gt_label, - box, box_var) - loss = fluid.layers.nn.reduce_sum(loss_vec) - - map_eval = fluid.evaluator.DetectionMAP( - nmsed_out, - gt_label, - gt_box, - difficult, - 21, - overlap_threshold=0.5, - evaluate_difficult=False, - ap_version='11point') - - test_program = fluid.default_main_program().clone(for_test=True) - optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.exponential_decay( - learning_rate=learning_rate, - decay_steps=40000, - decay_rate=0.1, - staircase=True), - momentum=0.9, - regularization=fluid.regularizer.L2Decay(0.0005), ) - opts = optimizer.minimize(loss) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - load_model.load_and_set_vars(place) - - train_reader = paddle.batch( - reader.train(data_args, train_file_list), batch_size=batch_size) - test_reader = paddle.batch( - reader.test(data_args, val_file_list), batch_size=batch_size) - feeder = fluid.DataFeeder( - place=place, feed_list=[image, gt_box, gt_label, difficult]) - - #print fluid.default_main_program() - map, accum_map = map_eval.get_map_var() - for pass_id in range(num_passes): - map_eval.reset(exe) - for batch_id, data in enumerate(train_reader()): - loss_v, map_v, accum_map_v = exe.run( - fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[loss, map, accum_map]) - print( - "Pass {0}, batch {1}, loss {2}, cur_map {3}, map {4}" - .format(pass_id, batch_id, loss_v[0], map_v[0], accum_map_v[0])) - - map_eval.reset(exe) - test_map = None - for _, data in enumerate(test_reader()): - test_map = exe.run(test_program, - feed=feeder.feed(data), - fetch_list=[accum_map]) - print("Test {0}, map {1}".format(pass_id, test_map[0])) - - if pass_id % 10 == 0: - model_path = os.path.join(model_save_dir, str(pass_id)) - print 'save models to %s' % (model_path) - fluid.io.save_inference_model(model_path, ['image'], [nmsed_out], - exe) - - -if __name__ == '__main__': - data_args = reader.Settings( - data_dir='./data', - label_file='label_list', - resize_h=300, - resize_w=300, - mean_value=[104, 117, 124]) - train( - train_file_list='./data/trainval.txt', - val_file_list='./data/test.txt', - data_args=data_args, - learning_rate=0.001, - batch_size=32, - num_passes=300) diff --git a/fluid/object_detection/reader.py b/fluid/object_detection/reader.py index 6564384118c55db13d88d36c85ec1212f1be2ce5..29f218941029eee34acf4492911b58902f7f0c9c 100644 --- a/fluid/object_detection/reader.py +++ b/fluid/object_detection/reader.py @@ -153,6 +153,7 @@ def _reader_creator(settings, file_list, mode, shuffle): img = img.astype('float32') img -= settings.img_mean img = img.flatten() + img = img * 0.007843 sample_labels = np.array(sample_labels) if mode == 'train' or mode == 'test': @@ -160,7 +161,7 @@ def _reader_creator(settings, file_list, mode, shuffle): yield img.astype( 'float32' ), sample_labels[:, 1:5], sample_labels[:, 0].astype( - 'int32'), sample_labels[:, 5].astype('int32') + 'int32'), sample_labels[:, -1].astype('int32') elif mode == 'infer': yield img.astype('float32') diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py new file mode 100644 index 0000000000000000000000000000000000000000..bcfd520d9051a75b1d8bfb82305f5ea216839872 --- /dev/null +++ b/fluid/object_detection/train.py @@ -0,0 +1,114 @@ +import paddle.v2 as paddle +import paddle.fluid as fluid +import os +import reader +import numpy as np +import load_model as load_model +from mobilenet_ssd import mobile_net + + +def train(train_file_list, + val_file_list, + data_args, + learning_rate, + batch_size, + num_passes, + model_save_dir='model', + init_model_path=None): + image_shape = [3, data_args.resize_h, data_args.resize_w] + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + gt_box = fluid.layers.data( + name='gt_box', shape=[4], dtype='float32', lod_level=1) + gt_label = fluid.layers.data( + name='gt_label', shape=[1], dtype='int32', lod_level=1) + difficult = fluid.layers.data( + name='gt_difficult', shape=[1], dtype='int32', lod_level=1) + + mbox_locs, mbox_confs, box, box_var = mobile_net(image, image_shape) + nmsed_out = fluid.layers.detection_output( + mbox_locs, mbox_confs, box, box_var, nms_threshold=0.45) + loss_vec = fluid.layers.ssd_loss(mbox_locs, mbox_confs, gt_box, gt_label, + box, box_var) + loss = fluid.layers.nn.reduce_sum(loss_vec) + + map_eval = fluid.evaluator.DetectionMAP( + nmsed_out, + gt_label, + gt_box, + difficult, + 21, + overlap_threshold=0.5, + evaluate_difficult=False, + ap_version='11point') + map, accum_map = map_eval.get_map_var() + + test_program = fluid.default_main_program().clone(for_test=True) + with fluid.program_guard(test_program): + test_program = fluid.io.get_inference_program([loss, map, accum_map]) + + optimizer = fluid.optimizer.DecayedAdagrad( + learning_rate=fluid.layers.exponential_decay( + learning_rate=learning_rate, + decay_steps=40000, + decay_rate=0.1, + staircase=True), + regularization=fluid.regularizer.L2Decay(0.0005), ) + + opts = optimizer.minimize(loss) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + #load_model.load_and_set_vars(place) + train_reader = paddle.batch( + reader.train(data_args, train_file_list), batch_size=batch_size) + test_reader = paddle.batch( + reader.test(data_args, val_file_list), batch_size=batch_size) + feeder = fluid.DataFeeder( + place=place, feed_list=[image, gt_box, gt_label, difficult]) + + def test(pass_id): + map_eval.reset(exe) + test_map = None + for _, data in enumerate(test_reader()): + test_map = exe.run(test_program, + feed=feeder.feed(data), + fetch_list=[accum_map]) + print("Test {0}, map {1}".format(pass_id, test_map[0])) + + #print fluid.default_main_program() + for pass_id in range(num_passes): + map_eval.reset(exe) + for batch_id, data in enumerate(train_reader()): + loss_v, map_v, accum_map_v = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[loss, map, accum_map]) + print( + "Pass {0}, batch {1}, loss {2}, cur_map {3}, map {4}" + .format(pass_id, batch_id, loss_v[0], map_v[0], accum_map_v[0])) + test(pass_id) + + if pass_id % 10 == 0: + model_path = os.path.join(model_save_dir, str(pass_id)) + print 'save models to %s' % (model_path) + fluid.io.save_inference_model(model_path, ['image'], [nmsed_out], + exe) + + +if __name__ == '__main__': + data_args = reader.Settings( + data_dir='./data', + label_file='label_list', + resize_h=300, + resize_w=300, + mean_value=[127.5, 127.5, 127.5]) + train( + train_file_list='./data/trainval.txt', + val_file_list='./data/test.txt', + data_args=data_args, + learning_rate=0.001, + batch_size=32, + num_passes=300)