diff --git a/fluid/object_detection/data/prepare_voc_data.py b/fluid/object_detection/data/pascalvoc/create_list.py similarity index 96% rename from fluid/object_detection/data/prepare_voc_data.py rename to fluid/object_detection/data/pascalvoc/create_list.py index a652956e91ab8277bc6670d4dc85905fc52a3203..1f53b182fdab937c250945fdb8ee1da8cd85f46e 100644 --- a/fluid/object_detection/data/prepare_voc_data.py +++ b/fluid/object_detection/data/pascalvoc/create_list.py @@ -60,4 +60,5 @@ def prepare_filelist(devkit_dir, years, output_dir): ftest.write(item[0] + ' ' + item[1] + '\n') -prepare_filelist(devkit_dir, years, '.') +if __name__ == '__main__': + prepare_filelist(devkit_dir, years, '.') diff --git a/fluid/object_detection/data/pascalvoc/download.sh b/fluid/object_detection/data/pascalvoc/download.sh new file mode 100644 index 0000000000000000000000000000000000000000..55bbb0e5a43f937ee478c9502444b22c493890ae --- /dev/null +++ b/fluid/object_detection/data/pascalvoc/download.sh @@ -0,0 +1,16 @@ +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +cd "$DIR" + +# Download the data. +echo "Downloading..." +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar +wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar +# Extract the data. +echo "Extractint..." +tar -xf VOCtrainval_11-May-2012.tar +tar -xf VOCtrainval_06-Nov-2007.tar +tar -xf VOCtest_06-Nov-2007.tar + +echo "Creating data lists..." +python create_list.py diff --git a/fluid/object_detection/data/label_list b/fluid/object_detection/data/pascalvoc/label_list similarity index 100% rename from fluid/object_detection/data/label_list rename to fluid/object_detection/data/pascalvoc/label_list diff --git a/fluid/object_detection/load_model.py b/fluid/object_detection/load_model.py deleted file mode 100644 index 8c7389efea33699b2f90243311ff89747f831d06..0000000000000000000000000000000000000000 --- a/fluid/object_detection/load_model.py +++ /dev/null @@ -1,67 +0,0 @@ -import paddle.v2 as paddle -import paddle.fluid as fluid -import numpy as np - - -# From npy -def load_vars(): - vars = {} - name_map = {} - with open('./ssd_mobilenet_v1_coco/names.map', 'r') as map_file: - for param in map_file: - fd_name, tf_name = param.strip().split('\t') - name_map[fd_name] = tf_name - - tf_vars = np.load( - './ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco_2017_11_17.npy').item() - for fd_name in name_map: - tf_name = name_map[fd_name] - tf_var = tf_vars[tf_name] - if len(tf_var.shape) == 4 and 'depthwise' in tf_name: - vars[fd_name] = np.transpose(tf_var, (2, 3, 0, 1)) - elif len(tf_var.shape) == 4: - vars[fd_name] = np.transpose(tf_var, (3, 2, 0, 1)) - else: - vars[fd_name] = tf_var - - return vars - - -def load_and_set_vars(place): - vars = load_vars() - for k, v in vars.items(): - t = fluid.global_scope().find_var(k).get_tensor() - #print(np.array(t).shape, v.shape, k) - assert np.array(t).shape == v.shape - t.set(v, place) - - -# From Paddle V1 -def load_paddlev1_vars(place): - vars = {} - name_map = {} - with open('./caffe2paddle/names.map', 'r') as map_file: - for param in map_file: - fd_name, tf_name = param.strip().split('\t') - name_map[fd_name] = tf_name - - from operator import mul - - def load(file_name, shape): - with open(file_name, 'rb') as f: - f.read(16) - arr = np.fromfile(f, dtype=np.float32) - #print(arr.size, reduce(mul, shape), file_name) - assert arr.size == reduce(mul, shape) - return arr.reshape(shape) - - for fd_name in name_map: - v1_name = name_map[fd_name] - t = fluid.global_scope().find_var(fd_name).get_tensor() - shape = np.array(t).shape - v1_var = load('./caffe2paddle/' + v1_name, shape) - t.set(v1_var, place) - - -if __name__ == "__main__": - load_vars() diff --git a/fluid/object_detection/mobilenet_ssd.py b/fluid/object_detection/mobilenet_ssd.py index 01fa32cfb87d9a572acb207bd546c4ed00bf2690..c39883196056aede5d410554e14a0198e540d754 100644 --- a/fluid/object_detection/mobilenet_ssd.py +++ b/fluid/object_detection/mobilenet_ssd.py @@ -13,7 +13,7 @@ def conv_bn(input, num_groups=1, act='relu', use_cudnn=True): - parameter_attr = ParamAttr(initializer=MSRA()) + parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA()) conv = fluid.layers.conv2d( input=input, num_filters=num_filters, @@ -25,11 +25,9 @@ def conv_bn(input, use_cudnn=use_cudnn, param_attr=parameter_attr, bias_attr=False) - #parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA()) - #bias_attr = ParamAttr(learning_rate=0.2) - return fluid.layers.batch_norm(input=conv, act=act, epsilon=0.00001) - #param_attr=parameter_attr, - #bias_attr=bias_attr) + parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA()) + bias_attr = ParamAttr(learning_rate=0.2) + return fluid.layers.batch_norm(input=conv, act=act) def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride, diff --git a/fluid/object_detection/pretrained/download_coco.sh b/fluid/object_detection/pretrained/download_coco.sh new file mode 100644 index 0000000000000000000000000000000000000000..41684b116e327a2e052414f0b933f79b61acc1a2 --- /dev/null +++ b/fluid/object_detection/pretrained/download_coco.sh @@ -0,0 +1,8 @@ +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +cd "$DIR" + +# Download the data. +echo "Downloading..." +wget http://paddlemodels.bj.bcebos.com/ssd_mobilenet_coco.tar.gz +echo "Extractint..." +tar -xf ssd_mobilenet_coco.tar.gz diff --git a/fluid/object_detection/pretrained/download_imagenet.sh b/fluid/object_detection/pretrained/download_imagenet.sh new file mode 100644 index 0000000000000000000000000000000000000000..597cc8a44c55b29f63f3fc374dc252b515f310c9 --- /dev/null +++ b/fluid/object_detection/pretrained/download_imagenet.sh @@ -0,0 +1,8 @@ +DIR="$( cd "$(dirname "$0")" ; pwd -P )" +cd "$DIR" + +# Download the data. +echo "Downloading..." +wget http://paddlemodels.bj.bcebos.com/mobilenet_imagenet.tar.gz +echo "Extractint..." +tar -xf ssd_mobilenet_imagenet.tar.gz diff --git a/fluid/object_detection/reader.py b/fluid/object_detection/reader.py index 29ac65a5002dc3c90eacfd7a2d1ccf104bb2b564..47f78f6343c155c2d5b034bafed3a78426f19fdc 100644 --- a/fluid/object_detection/reader.py +++ b/fluid/object_detection/reader.py @@ -23,10 +23,6 @@ import os import time import copy -# cocoapi -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - class Settings(object): def __init__(self, dataset, toy, data_dir, label_file, resize_h, resize_w, @@ -101,6 +97,10 @@ class Settings(object): def _reader_creator(settings, file_list, mode, shuffle): def reader(): if settings.dataset == 'coco': + # cocoapi + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + coco = COCO(file_list) image_ids = coco.getImgIds() images = coco.loadImgs(image_ids) @@ -295,6 +295,7 @@ def draw_bounding_box_on_image(image, def train(settings, file_list, shuffle=True): + file_list = os.path.join(settings.data_dir, file_list) if settings.dataset == 'coco': train_settings = copy.copy(settings) if '2014' in file_list: @@ -302,13 +303,13 @@ def train(settings, file_list, shuffle=True): elif '2017' in file_list: sub_dir = "train2017" train_settings.data_dir = os.path.join(settings.data_dir, sub_dir) - file_list = os.path.join(settings.data_dir, file_list) return _reader_creator(train_settings, file_list, 'train', shuffle) elif settings.dataset == 'pascalvoc': return _reader_creator(settings, file_list, 'train', shuffle) def test(settings, file_list): + file_list = os.path.join(settings.data_dir, file_list) if settings.dataset == 'coco': test_settings = copy.copy(settings) if '2014' in file_list: @@ -316,7 +317,6 @@ def test(settings, file_list): elif '2017' in file_list: sub_dir = "val2017" test_settings.data_dir = os.path.join(settings.data_dir, sub_dir) - file_list = os.path.join(settings.data_dir, file_list) return _reader_creator(test_settings, file_list, 'test', False) elif settings.dataset == 'pascalvoc': return _reader_creator(settings, file_list, 'test', False) diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py index 1691955d5f5ac19c590c52140b73e0e62a8de435..9f89a832b17a9b2b6bec193cf5cacddcd95e1a53 100644 --- a/fluid/object_detection/train.py +++ b/fluid/object_detection/train.py @@ -12,46 +12,35 @@ import functools parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) -add_arg('learning_rate', float, 0.001, "Learning rate.") -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('num_passes', int, 25, "Epoch number.") -add_arg('parallel', bool, True, "Whether use parallel training.") -add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('data_dir', str, './data/COCO17', "Root path of data") -add_arg('train_file_list', str, 'annotations/instances_train2017.json', - "train file list") -add_arg('val_file_list', str, 'annotations/instances_val2017.json', - "vaild file list") -add_arg('model_save_dir', str, 'model_COCO17', "where to save model") - -add_arg('dataset', str, 'coco', "coco or pascalvoc") -add_arg( - 'is_toy', int, 0, - "Is Toy for quick debug, 0 means using all data, while n means using only n sample" -) -add_arg('label_file', str, 'label_list', - "Lable file which lists all label name") -add_arg('apply_distort', bool, True, "Whether apply distort") -add_arg('apply_expand', bool, False, "Whether appley expand") -add_arg('resize_h', int, 300, "resize image size") -add_arg('resize_w', int, 300, "resize image size") -add_arg('mean_value_B', float, 127.5, - "mean value which will be subtracted") #123.68 -add_arg('mean_value_G', float, 127.5, - "mean value which will be subtracted") #116.78 -add_arg('mean_value_R', float, 127.5, - "mean value which will be subtracted") #103.94 - - -def train(args, - train_file_list, - val_file_list, - data_args, - learning_rate, - batch_size, - num_passes, - model_save_dir='model', - init_model_path=None): +# yapf: disable +add_arg('learning_rate', float, 0.001, "Learning rate.") +add_arg('batch_size', int, 32, "Minibatch size.") +add_arg('num_passes', int, 25, "Epoch number.") +add_arg('parallel', bool, True, "Whether use parallel training.") +add_arg('use_gpu', bool, True, "Whether use GPU.") +add_arg('dataset', str, 'pascalvoc', "coco or pascalvoc.") +add_arg('model_save_dir', str, 'model', "The path to save model.") +add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_coco/', "The init model path.") +add_arg('apply_distort', bool, True, "Whether apply distort") +add_arg('apply_expand', bool, False, "Whether appley expand") +add_arg('resize_h', int, 300, "resize image size") +add_arg('resize_w', int, 300, "resize image size") +add_arg('mean_value_B', float, 127.5, "mean value which will be subtracted") #123.68 +add_arg('mean_value_G', float, 127.5, "mean value which will be subtracted") #116.78 +add_arg('mean_value_R', float, 127.5, "mean value which will be subtracted") #103.94 +add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample") +# yapf: disable + + +def parallel_do(args, + train_file_list, + val_file_list, + data_args, + learning_rate, + batch_size, + num_passes, + model_save_dir, + pretrained_model=None): image_shape = [3, data_args.resize_h, data_args.resize_w] if data_args.dataset == 'coco': num_classes = 81 @@ -125,8 +114,11 @@ def train(args, exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - #load_model.load_and_set_vars(place) - load_model.load_paddlev1_vars(place) + if pretrained_model: + def if_exist(var): + return os.path.exists(os.path.join(pretrained_model, var.name)) + fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + train_reader = paddle.batch( reader.train(data_args, train_file_list), batch_size=batch_size) test_reader = paddle.batch( @@ -151,7 +143,6 @@ def train(args, for batch_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() - #print("Batch {} start at {:.2f}".format(batch_id, start_time)) loss_v = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[loss]) @@ -164,29 +155,148 @@ def train(args, if pass_id % 10 == 0 or pass_id == num_passes - 1: model_path = os.path.join(model_save_dir, str(pass_id)) print 'save models to %s' % (model_path) - fluid.io.save_inference_model(model_path, ['image'], [nmsed_out], - exe) + fluid.io.save_persistables(exe, model_path) + + +def parallel_exe(args, + train_file_list, + val_file_list, + data_args, + learning_rate, + batch_size, + num_passes, + model_save_dir='model', + pretrained_model=None): + image_shape = [3, data_args.resize_h, data_args.resize_w] + if data_args.dataset == 'coco': + num_classes = 81 + elif data_args.dataset == 'pascalvoc': + num_classes = 21 + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + gt_box = fluid.layers.data( + name='gt_box', shape=[4], dtype='float32', lod_level=1) + gt_label = fluid.layers.data( + name='gt_label', shape=[1], dtype='int32', lod_level=1) + difficult = fluid.layers.data( + name='gt_difficult', shape=[1], dtype='int32', lod_level=1) + + locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) + nmsed_out = fluid.layers.detection_output( + locs, confs, box, box_var, nms_threshold=0.45) + loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, + box_var) + loss = fluid.layers.reduce_sum(loss) + + test_program = fluid.default_main_program().clone(for_test=True) + with fluid.program_guard(test_program): + map_eval = fluid.evaluator.DetectionMAP( + nmsed_out, + gt_label, + gt_box, + difficult, + num_classes, + overlap_threshold=0.5, + evaluate_difficult=False, + ap_version='integral') + + if data_args.dataset == 'coco': + # learning rate decay in 12, 19 pass, respectively + if '2014' in train_file_list: + boundaries = [82783 / batch_size * 12, 82783 / batch_size * 19] + elif '2017' in train_file_list: + boundaries = [118287 / batch_size * 12, 118287 / batch_size * 19] + elif data_args.dataset == 'pascalvoc': + boundaries = [40000, 60000] + values = [learning_rate, learning_rate * 0.5, learning_rate * 0.25] + optimizer = fluid.optimizer.RMSProp( + learning_rate=fluid.layers.piecewise_decay(boundaries, values), + regularization=fluid.regularizer.L2Decay(0.00005), ) + + optimizer.minimize(loss) + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if pretrained_model: + def if_exist(var): + return os.path.exists(os.path.join(pretrained_model, var.name)) + fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) + + train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) + + train_reader = paddle.batch( + reader.train(data_args, train_file_list), batch_size=batch_size) + test_reader = paddle.batch( + reader.test(data_args, val_file_list), batch_size=batch_size) + feeder = fluid.DataFeeder( + place=place, feed_list=[image, gt_box, gt_label, difficult]) + + def test(pass_id): + _, accum_map = map_eval.get_map_var() + map_eval.reset(exe) + test_map = None + for _, data in enumerate(test_reader()): + test_map = exe.run(test_program, + feed=feeder.feed(data), + fetch_list=[accum_map]) + print("Test {0}, map {1}".format(pass_id, test_map[0])) + + for pass_id in range(num_passes): + start_time = time.time() + prev_start_time = start_time + end_time = 0 + test(pass_id) + for batch_id, data in enumerate(train_reader()): + prev_start_time = start_time + start_time = time.time() + loss_v, = train_exe.run(fetch_list=[loss.name], + feed_dict=feeder.feed(data)) + end_time = time.time() + loss_v = np.mean(np.array(loss_v)) + if batch_id % 20 == 0: + print("Pass {0}, batch {1}, loss {2}, time {3}".format( + pass_id, batch_id, loss_v, start_time - prev_start_time)) + + if pass_id % 10 == 0 or pass_id == num_passes - 1: + model_path = os.path.join(model_save_dir, str(pass_id)) + print 'save models to %s' % (model_path) + fluid.io.save_persistables(exe, model_path) if __name__ == '__main__': args = parser.parse_args() print_arguments(args) + + data_dir = 'data/pascalvoc' + train_file_list = 'trainval.txt' + val_file_list = 'test.txt' + label_file = 'label_list' + model_save_dir = args.model_save_dir + if args.dataset == 'coco': + data_dir = './data/COCO17' + train_file_list = 'annotations/instances_train2017.json' + val_file_list = 'annotations/instances_val2017.json' + label_file = 'label_list' + data_args = reader.Settings( - dataset=args.dataset, # coco or pascalvoc + dataset=args.dataset, toy=args.is_toy, - data_dir=args.data_dir, - label_file=args.label_file, + data_dir=data_dir, + label_file=label_file, apply_distort=args.apply_distort, apply_expand=args.apply_expand, resize_h=args.resize_h, resize_w=args.resize_w, mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R]) - train( - args, - train_file_list=args.train_file_list, - val_file_list=args.val_file_list, - data_args=data_args, - learning_rate=args.learning_rate, - batch_size=args.batch_size, - num_passes=args.num_passes, - model_save_dir=args.model_save_dir) + #method = parallel_do + method = parallel_exe + method(args, + train_file_list=train_file_list, + val_file_list=val_file_list, + data_args=data_args, + learning_rate=args.learning_rate, + batch_size=args.batch_size, + num_passes=args.num_passes, + model_save_dir=model_save_dir, + pretrained_model=args.pretrained_model)