train.py 10.1 KB
Newer Older
B
baiyfbupt 已提交
1 2 3 4
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

5
import os
6
import shutil
7
import numpy as np
8
import time
9 10 11
import argparse
import functools

12
import paddle
13 14
import paddle.fluid as fluid
from pyramidbox import PyramidBox
Q
qingqing01 已提交
15
import reader
16 17 18 19
from utility import add_arguments, print_arguments

parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
20

21
# yapf: disable
Q
qingqing01 已提交
22 23
add_arg('parallel',         bool,  True,            "Whether use multi-GPU/threads or not.")
add_arg('learning_rate',    float, 0.001,           "The start learning rate.")
24
add_arg('batch_size',       int,   16,              "Minibatch size.")
25
add_arg('epoc_num',         int,   160,             "Epoch number.")
Q
qingqing01 已提交
26 27 28 29
add_arg('use_gpu',          bool,  True,            "Whether use GPU.")
add_arg('use_pyramidbox',   bool,  True,            "Whether use PyramidBox model.")
add_arg('model_save_dir',   str,   'output',        "The path to save model.")
add_arg('resize_h',         int,   640,             "The resized image height.")
Q
qingqing01 已提交
30
add_arg('resize_w',         int,   640,             "The resized image width.")
31
add_arg('mean_BGR',         str,   '104., 117., 123.', "Mean value for B,G,R channel which will be subtracted.")
32
add_arg('with_mem_opt',     bool,  True,            "Whether to use memory optimization or not.")
Q
qingqing01 已提交
33
add_arg('pretrained_model', str,   './vgg_ilsvrc_16_fc_reduced/', "The init model path.")
B
baiyfbupt 已提交
34
add_arg('data_dir',         str,   'data',          "The base dir of dataset")
Z
zhengya01 已提交
35 36 37
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
parser.add_argument('--batch_num', type=int, help="batch num for ce")
parser.add_argument('--num_devices', type=int, default=1, help='Number of GPU devices')
38 39
#yapf: enable

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
train_parameters = {
    "train_images": 12880,
    "image_shape": [3, 640, 640],
    "class_num": 2,
    "batch_size": 16,
    "lr": 0.001,
    "lr_epochs": [99, 124, 149],
    "lr_decay": [1, 0.1, 0.01, 0.001],
    "epoc_num": 160,
    "optimizer_method": "momentum",
    "use_pyramidbox": True
}

def optimizer_setting(train_params):
    batch_size = train_params["batch_size"]
    iters = train_params["train_images"] // batch_size
    lr = train_params["lr"]
    optimizer_method = train_params["optimizer_method"]
    boundaries = [i * iters for i in train_params["lr_epochs"]]
    values = [i * lr for i in train_params["lr_decay"]]
60

Q
qingqing01 已提交
61 62
    if optimizer_method == "momentum":
        optimizer = fluid.optimizer.Momentum(
63
            learning_rate=fluid.layers.piecewise_decay(boundaries, values),
Q
qingqing01 已提交
64 65 66 67 68 69 70 71
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(0.0005),
        )
    else:
        optimizer = fluid.optimizer.RMSProp(
            learning_rate=fluid.layers.piecewise_decay(boundaries, values),
            regularization=fluid.regularizer.L2Decay(0.0005),
        )
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
    return optimizer


def build_program(train_params, main_prog, startup_prog, args):
    use_pyramidbox = train_params["use_pyramidbox"]
    image_shape = train_params["image_shape"]
    class_num = train_params["class_num"]
    with fluid.program_guard(main_prog, startup_prog):
        py_reader = fluid.layers.py_reader(
            capacity=8,
            shapes=[[-1] + image_shape, [-1, 4], [-1, 4], [-1, 1]],
            lod_levels=[0, 1, 1, 1],
            dtypes=["float32", "float32", "float32", "int32"],
            use_double_buffer=True)
        with fluid.unique_name.guard():
            image, face_box, head_box, gt_label = fluid.layers.read_file(py_reader)
            fetches = []
            network = PyramidBox(image=image,
                                 face_box=face_box,
                                 head_box=head_box,
                                 gt_label=gt_label,
                                 sub_network=use_pyramidbox)
            if use_pyramidbox:
                face_loss, head_loss, loss = network.train()
                fetches = [face_loss, head_loss]
            else:
                loss = network.vgg_ssd_loss()
                fetches = [loss]
            optimizer = optimizer_setting(train_params)
            optimizer.minimize(loss)
    return py_reader, fetches, loss

def train(args, config, train_params, train_file_list):
    batch_size = train_params["batch_size"]
    epoc_num = train_params["epoc_num"]
    optimizer_method = train_params["optimizer_method"]
    use_pyramidbox = train_params["use_pyramidbox"]

    use_gpu = args.use_gpu
    model_save_dir = args.model_save_dir
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    batch_size_per_device = batch_size // devices_num
    iters_per_epoc = train_params["train_images"] // batch_size
    num_workers = 8
    is_shuffle = True

    startup_prog = fluid.Program()
    train_prog = fluid.Program()

Z
zhengya01 已提交
125 126 127 128 129 130 131 132 133 134
    #only for ce
    if args.enable_ce:
        SEED = 102
        startup_prog.random_seed = SEED
        train_prog.random_seed = SEED
        num_workers = 1
        pretrained_model = ""
        if args.batch_num != None:
            iters_per_epoc = args.batch_num

135 136 137 138 139
    train_py_reader, fetches, loss = build_program(
        train_params = train_params,
        main_prog = train_prog,
        startup_prog = startup_prog,
        args=args)
140

141
    if with_memory_optimization:
142
        fluid.memory_optimize(train_prog)
143

Q
qingqing01 已提交
144
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
145
    exe = fluid.Executor(place)
146
    exe.run(startup_prog)
147

148
    start_epoc = 0
149
    if pretrained_model:
150
        if pretrained_model.isdigit():
151
            start_epoc = int(pretrained_model) + 1
Q
qingqing01 已提交
152
            pretrained_model = os.path.join(model_save_dir, pretrained_model)
153 154
            print("Resume from %s " %(pretrained_model))

155 156 157
        if not os.path.exists(pretrained_model):
            raise ValueError("The pre-trained model path [%s] does not exist." %
                             (pretrained_model))
158 159
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))
160 161 162 163 164 165
        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)
    train_reader = reader.train(config,
                                train_file_list,
                                batch_size_per_device,
                                shuffle = is_shuffle,
166
                                num_workers = num_workers)
167
    train_py_reader.decorate_paddle_reader(train_reader)
168

169 170
    if args.parallel:
        train_exe = fluid.ParallelExecutor(
171 172 173
            main_program = train_prog,
            use_cuda=use_gpu,
            loss_name=loss.name)
174

175
    def save_model(postfix, program):
176 177 178
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
179

B
baiyf 已提交
180
        print('save models to %s' % (model_path))
Q
qingqing01 已提交
181
        fluid.io.save_persistables(exe, model_path, main_program=program)
182

183 184 185 186 187 188 189 190 191 192 193 194 195
    total_time = 0.0
    epoch_idx = 0
    face_loss = 0
    head_loss = 0
    for pass_id in range(start_epoc, epoc_num):
        epoch_idx += 1
        start_time = time.time()
        prev_start_time = start_time
        end_time = 0
        batch_id = 0
        train_py_reader.start()
        while True:
            try:
196 197 198 199 200
                prev_start_time = start_time
                start_time = time.time()
                if args.parallel:
                    fetch_vars = train_exe.run(fetch_list=
                        [v.name for v in fetches])
201
                else:
202
                    fetch_vars = exe.run(train_prog, fetch_list=fetches)
203 204
                end_time = time.time()
                fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
205 206
                face_loss = fetch_vars[0]
                head_loss = fetch_vars[1]
207 208
                if batch_id % 10 == 0:
                    if not args.use_pyramidbox:
209
                        print("Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
210
                            pass_id, batch_id, face_loss,
211 212
                            start_time - prev_start_time))
                    else:
213 214 215
                        print("Pass {:d}, batch {:d}, face loss {:.6f}, " \
                              "head loss {:.6f}, " \
                              "time {:.5f}".format(pass_id,
216
                               batch_id, face_loss, head_loss,
217
                               start_time - prev_start_time))
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
                batch_id += 1
            except (fluid.core.EOFException, StopIteration):
                train_py_reader.reset()
                break
        epoch_end_time = time.time()
        total_time += epoch_end_time - start_time
        save_model(str(pass_id), train_prog)

    # only for ce
    if args.enable_ce:
        gpu_num = get_cards(args)
        print("kpis\teach_pass_duration_card%s\t%s" %
                (gpu_num, total_time / epoch_idx))
        print("kpis\ttrain_face_loss_card%s\t%s" %
                (gpu_num, face_loss))
        print("kpis\ttrain_head_loss_card%s\t%s" %
                (gpu_num, head_loss))

236

Z
zhengya01 已提交
237 238 239 240 241 242 243 244 245 246

def get_cards(args):
    if args.enable_ce:
        cards = os.environ.get('CUDA_VISIBLE_DEVICES')
        num = len(cards.split(","))
        return num
    else:
        return args.num_devices


247 248 249 250
if __name__ == '__main__':
    args = parser.parse_args()
    print_arguments(args)

B
baiyfbupt 已提交
251 252 253
    data_dir = os.path.join(args.data_dir, 'WIDER_train/images/')
    train_file_list = os.path.join(args.data_dir,
        'wider_face_split/wider_face_train_bbx_gt.txt')
254 255 256 257 258 259 260 261
    mean_BGR = [float(m) for m in args.mean_BGR.split(",")]
    image_shape = [3, int(args.resize_h), int(args.resize_w)]
    train_parameters["image_shape"] = image_shape
    train_parameters["use_pyramidbox"] = args.use_pyramidbox
    train_parameters["batch_size"] = args.batch_size
    train_parameters["lr"] = args.learning_rate
    train_parameters["epoc_num"] = args.epoc_num

262

Q
qingqing01 已提交
263
    config = reader.Settings(
264
        data_dir=data_dir,
265 266
        resize_h=image_shape[1],
        resize_w=image_shape[2],
267
        apply_distort=True,
Q
qingqing01 已提交
268
        apply_expand=False,
269
        mean_value=mean_BGR,
270
        ap_version='11point')
271
    train(args, config, train_parameters, train_file_list)