profile.py 7.1 KB
Newer Older
B
baiyfbupt 已提交
1 2 3 4
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
import os
import shutil
import numpy as np
import time
import argparse
import functools

import reader
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
from pyramidbox import PyramidBox
from utility import add_arguments, print_arguments

parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)

# yapf: disable
add_arg('parallel',         bool,  True,            "parallel")
add_arg('learning_rate',    float, 0.001,           "Learning rate.")
add_arg('batch_size',       int,   20,              "Minibatch size.")
add_arg('num_iteration',    int,   10,              "Epoch number.")
add_arg('use_gpu',          bool,  True,            "Whether use GPU.")
add_arg('use_pyramidbox',   bool,  True,            "Whether use PyramidBox model.")
add_arg('model_save_dir',   str,   'output',        "The path to save model.")
B
Bai Yifan 已提交
30
add_arg('pretrained_model', str,   './vgg_ilsvrc_16_fc_reduced', "The init model path.")
31 32
add_arg('resize_h',         int,   640,             "The resized image height.")
add_arg('resize_w',         int,   640,             "The resized image height.")
33
add_arg('data_dir',         str,   'data',          "The base dir of dataset")
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
#yapf: enable


def train(args, config, train_file_list, optimizer_method):
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    height = args.resize_h
    width = args.resize_w
    use_gpu = args.use_gpu
    use_pyramidbox = args.use_pyramidbox
    model_save_dir = args.model_save_dir
    pretrained_model = args.pretrained_model
    num_iterations = args.num_iteration
    parallel = args.parallel

    num_classes = 2
    image_shape = [3, height, width]

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        py_reader = fluid.layers.py_reader(
            capacity=8,
            shapes=[[-1] + image_shape, [-1, 4], [-1, 4], [-1, 1]],
            lod_levels=[0, 1, 1, 1],
            dtypes=["float32", "float32", "float32", "int32"],
            use_double_buffer=True)
        with fluid.unique_name.guard():
            image, face_box, head_box, gt_label = fluid.layers.read_file(py_reader)
            fetches = []
            network = PyramidBox(image=image,
                                 face_box=face_box,
                                 head_box=head_box,
                                 gt_label=gt_label,
                                 sub_network=use_pyramidbox)
            if use_pyramidbox:
                face_loss, head_loss, loss = network.train()
                fetches = [face_loss, head_loss]
            else:
                loss = network.vgg_ssd_loss()
                fetches = [loss]
            devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
            devices_num = len(devices.split(","))
            batch_size_per_device = batch_size // devices_num
            steps_per_pass = 12880 // batch_size
            boundaries = [steps_per_pass * 50, steps_per_pass * 80,
                          steps_per_pass * 120, steps_per_pass * 140]
            values = [
                learning_rate, learning_rate * 0.5, learning_rate * 0.25,
                learning_rate * 0.1, learning_rate * 0.01]
            if optimizer_method == "momentum":
                optimizer = fluid.optimizer.Momentum(
                    learning_rate=fluid.layers.piecewise_decay(
                        boundaries=boundaries, values=values),
                    momentum=0.9,
                    regularization=fluid.regularizer.L2Decay(0.0005),
                )
            else:
                optimizer = fluid.optimizer.RMSProp(
                    learning_rate=
                    fluid.layers.piecewise_decay(boundaries, values),
                    regularization=fluid.regularizer.L2Decay(0.0005),
                )
            optimizer.minimize(loss)
    fluid.memory_optimize(train_prog)
99 100 101 102


    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
103
    exe.run(startup_prog)
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

    start_pass = 0
    if pretrained_model:
        if pretrained_model.isdigit():
            start_pass = int(pretrained_model) + 1
            pretrained_model = os.path.join(model_save_dir, pretrained_model)
            print("Resume from %s " %(pretrained_model))

        if not os.path.exists(pretrained_model):
            raise ValueError("The pre-trained model path [%s] does not exist." %
                             (pretrained_model))
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))
        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    if parallel:
        train_exe = fluid.ParallelExecutor(
121 122 123 124 125 126 127 128 129 130 131
            use_cuda=use_gpu, loss_name=loss.name, main_program = train_prog)
    train_reader = reader.train(config,
                                train_file_list,
                                batch_size_per_device,
                                shuffle=False,
                                use_multiprocessing=True,
                                num_workers=8,
                                max_queue=24)
    py_reader.decorate_paddle_reader(train_reader)

    def run(iterations):
132
        # global feed_data
133
        py_reader.start()
134 135 136 137
        run_time = []
        for batch_id in range(iterations):
            start_time = time.time()
            if parallel:
138
                fetch_vars = train_exe.run(fetch_list=[v.name for v in fetches])
139
            else:
140
                fetch_vars = exe.run(train_prog,
141 142 143 144 145 146 147 148 149
                                     fetch_list=fetches)
            end_time = time.time()
            run_time.append(end_time - start_time)
            fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
            if not args.use_pyramidbox:
                print("Batch {0}, loss {1}".format(batch_id, fetch_vars[0]))
            else:
                print("Batch {0}, face loss {1}, head loss {2}".format(
                       batch_id, fetch_vars[0], fetch_vars[1]))
150
        return run_time
151 152

    # start-up
153
    run(2)
154 155 156 157 158

    # profiling
    start = time.time()
    if not parallel:
        with profiler.profiler('All', 'total', '/tmp/profile_file'):
159
            run_time = run(num_iterations)
160
    else:
161
        run_time = run(num_iterations)
162 163 164
    end = time.time()
    total_time = end - start
    print("Total time: {0}, reader time: {1} s, run time: {2} s".format(
165
        total_time, total_time - np.sum(run_time), np.sum(run_time)))
166 167 168 169 170 171


if __name__ == '__main__':
    args = parser.parse_args()
    print_arguments(args)

172 173 174
    data_dir = os.path.join(args.data_dir, 'WIDER_train/images/')
    train_file_list = os.path.join(args.data_dir,
        'wider_face_split/wider_face_train_bbx_gt.txt')
175 176 177 178 179 180 181 182 183

    config = reader.Settings(
        data_dir=data_dir,
        resize_h=args.resize_h,
        resize_w=args.resize_w,
        apply_expand=False,
        mean_value=[104., 117., 123.],
        ap_version='11point')
    train(args, config, train_file_list, optimizer_method="momentum")