train.py 8.3 KB
Newer Older
1 2 3
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
D
Dun 已提交
4
import os
D
Dun 已提交
5 6
if 'FLAGS_fraction_of_gpu_memory_to_use' not in os.environ:
    os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98'
D
Dun 已提交
7 8 9 10 11 12 13 14

import paddle
import paddle.fluid as fluid
import numpy as np
import argparse
from reader import CityscapeDataset
import reader
import models
C
ccmeteorljh 已提交
15
import time
D
Dun 已提交
16 17 18
import contextlib
import paddle.fluid.profiler as profiler
import utility
D
Dun 已提交
19

D
Dun 已提交
20 21 22 23
parser = argparse.ArgumentParser()
add_arg = lambda *args: utility.add_arguments(*args, argparser=parser)

# yapf: disable
24
add_arg('batch_size',           int,    4,      "The number of images in each batch during training.")
D
Dun 已提交
25
add_arg('train_crop_size',      int,    769,    "Image crop size during training.")
26 27
add_arg('base_lr',              float,  0.001,  "The base learning rate for model training.")
add_arg('total_step',           int,    500000, "Number of the training step.")
D
Dun 已提交
28 29 30 31 32 33 34 35
add_arg('init_weights_path',    str,    None,   "Path of the initial weights in paddlepaddle format.")
add_arg('save_weights_path',    str,    None,   "Path of the saved weights during training.")
add_arg('dataset_path',         str,    None,   "Cityscape dataset path.")
add_arg('parallel',             bool,   True,   "using ParallelExecutor.")
add_arg('use_gpu',              bool,   True,   "Whether use GPU or CPU.")
add_arg('num_classes',          int,    19,     "Number of classes.")
add_arg('load_logit_layer',     bool,   True,   "Load last logit fc layer or not. If you are training with different number of classes, you should set to False.")
add_arg('memory_optimize',      bool,   True,   "Using memory optimizer.")
D
Dun 已提交
36
add_arg('norm_type',            str,    'bn',   "Normalization type, should be 'bn' or 'gn'.")
D
Dun 已提交
37 38 39 40 41
add_arg('profile',              bool,    False, "Enable profiler.")
add_arg('use_py_reader',        bool,    True,  "Use py reader.")
parser.add_argument(
    '--enable_ce',
    action='store_true',
42
    help='If set, run the task with continuous evaluation logs. Users can ignore this agument.')
D
Dun 已提交
43 44 45 46 47 48 49 50 51
#yapf: enable

@contextlib.contextmanager
def profile_context(profile=True):
    if profile:
        with profiler.profiler('All', 'total', '/tmp/profile_file2'):
            yield
    else:
        yield
D
Dun 已提交
52 53

def load_model():
D
Dun 已提交
54 55 56 57 58 59 60
    if os.path.isdir(args.init_weights_path):
        load_vars = [
            x for x in tp.list_vars()
            if isinstance(x, fluid.framework.Parameter) and x.name.find('logit') ==
            -1
        ]
        if args.load_logit_layer:
D
Dun 已提交
61 62 63
            fluid.io.load_params(
                exe, dirname=args.init_weights_path, main_program=tp)
        else:
D
Dun 已提交
64
            fluid.io.load_vars(exe, dirname=args.init_weights_path, vars=load_vars)
D
Dun 已提交
65
    else:
D
Dun 已提交
66 67 68 69 70 71
        fluid.io.load_params(
            exe,
            dirname="",
            filename=args.init_weights_path,
            main_program=tp)

D
Dun 已提交
72 73 74


def save_model():
D
Dun 已提交
75 76 77
    assert not os.path.isfile(args.save_weights_path)
    fluid.io.save_params(
        exe, dirname=args.save_weights_path, main_program=tp)
D
Dun 已提交
78 79 80


def loss(logit, label):
D
Dun 已提交
81 82 83 84
    label_nignore = fluid.layers.less_than(
        label.astype('float32'),
        fluid.layers.assign(np.array([num_classes], 'float32')),
        force_cpu=False).astype('float32')
D
Dun 已提交
85 86 87 88 89
    logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
    logit = fluid.layers.reshape(logit, [-1, num_classes])
    label = fluid.layers.reshape(label, [-1, 1])
    label = fluid.layers.cast(label, 'int64')
    label_nignore = fluid.layers.reshape(label_nignore, [-1, 1])
90 91
    logit = fluid.layers.softmax(logit, use_cudnn=False)
    loss = fluid.layers.cross_entropy(logit, label, ignore_index=255)
D
Dun 已提交
92 93
    label_nignore.stop_gradient = True
    label.stop_gradient = True
D
Dun 已提交
94 95 96 97
    return loss, label_nignore


args = parser.parse_args()
D
Dun 已提交
98
utility.print_arguments(args)
D
Dun 已提交
99 100 101 102

models.clean()
models.bn_momentum = 0.9997
models.dropout_keep_prop = 0.9
D
Dun 已提交
103
models.label_number = args.num_classes
D
Dun 已提交
104
models.default_norm_type = args.norm_type
D
Dun 已提交
105 106 107 108
deeplabv3p = models.deeplabv3p

sp = fluid.Program()
tp = fluid.Program()
Z
add ce  
zhengya01 已提交
109 110 111 112 113 114 115

# only for ce
if args.enable_ce:
    SEED = 102
    sp.random_seed = SEED
    tp.random_seed = SEED

D
Dun 已提交
116 117 118 119 120
crop_size = args.train_crop_size
batch_size = args.batch_size
image_shape = [crop_size, crop_size]
reader.default_config['crop_size'] = crop_size
reader.default_config['shuffle'] = True
D
Dun 已提交
121
num_classes = args.num_classes
D
Dun 已提交
122 123 124 125 126 127
weight_decay = 0.00004

base_lr = args.base_lr
total_step = args.total_step

with fluid.program_guard(tp, sp):
D
Dun 已提交
128 129 130 131 132 133 134 135 136 137
    if args.use_py_reader:
        batch_size_each = batch_size // fluid.core.get_cuda_device_count()
        py_reader = fluid.layers.py_reader(capacity=64,
                                        shapes=[[batch_size_each, 3] + image_shape, [batch_size_each] + image_shape],
                                        dtypes=['float32', 'int32'])
        img, label = fluid.layers.read_file(py_reader)
    else:
        img = fluid.layers.data(
            name='img', shape=[3] + image_shape, dtype='float32')
        label = fluid.layers.data(name='label', shape=image_shape, dtype='int32')
D
Dun 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    logit = deeplabv3p(img)
    pred = fluid.layers.argmax(logit, axis=1).astype('int32')
    loss, mask = loss(logit, label)
    lr = fluid.layers.polynomial_decay(
        base_lr, total_step, end_learning_rate=0, power=0.9)
    area = fluid.layers.elementwise_max(
        fluid.layers.reduce_mean(mask),
        fluid.layers.assign(np.array(
            [0.1], dtype=np.float32)))
    loss_mean = fluid.layers.reduce_mean(loss) / area

    opt = fluid.optimizer.Momentum(
        lr,
        momentum=0.9,
        regularization=fluid.regularizer.L2DecayRegularizer(
D
Dun 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
            regularization_coeff=weight_decay))
    optimize_ops, params_grads = opt.minimize(loss_mean, startup_program=sp)
    # ir memory optimizer has some issues, we need to seed grad persistable to
    # avoid this issue
    for p,g in params_grads: g.persistable = True


exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = fluid.core.get_cuda_device_count()
exec_strategy.num_iteration_per_drop_scope = 100
build_strategy = fluid.BuildStrategy()
if args.memory_optimize:
    build_strategy.fuse_relu_depthwise_conv = True
    build_strategy.enable_inplace = True
    build_strategy.memory_optimize = True
D
Dun 已提交
168 169 170 171 172 173 174 175

place = fluid.CPUPlace()
if args.use_gpu:
    place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(sp)

if args.init_weights_path:
176
    print("load from:", args.init_weights_path)
D
Dun 已提交
177 178
    load_model()

D
Dun 已提交
179
dataset = reader.CityscapeDataset(args.dataset_path, 'train')
D
Dun 已提交
180 181

if args.parallel:
D
Dun 已提交
182 183 184 185 186
    binary = fluid.compiler.CompiledProgram(tp).with_data_parallel(
        loss_name=loss_mean.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)
else:
187
    binary = fluid.compiler.CompiledProgram(tp)
D
Dun 已提交
188 189 190 191 192 193 194 195 196 197 198 199 200

if args.use_py_reader:
    assert(batch_size % fluid.core.get_cuda_device_count() == 0)
    def data_gen():
        batches = dataset.get_batch_generator(
            batch_size // fluid.core.get_cuda_device_count(),
            total_step * fluid.core.get_cuda_device_count())
        for b in batches:
            yield b[1], b[2]
    py_reader.decorate_tensor_provider(data_gen)
    py_reader.start()
else:
    batches = dataset.get_batch_generator(batch_size, total_step)
Z
add ce  
zhengya01 已提交
201 202 203 204
total_time = 0.0
epoch_idx = 0
train_loss = 0

D
Dun 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
with profile_context(args.profile):
    for i in range(total_step):
        epoch_idx += 1
        begin_time = time.time()
        prev_start_time = time.time()
        if not args.use_py_reader:
            _, imgs, labels, names = next(batches)
            train_loss, = exe.run(binary,
                             feed={'img': imgs,
                                   'label': labels}, fetch_list=[loss_mean])
        else:
            train_loss, = exe.run(binary, fetch_list=[loss_mean])
        train_loss = np.mean(train_loss)
        end_time = time.time()
        total_time += end_time - begin_time
        if i % 100 == 0:
            print("Model is saved to", args.save_weights_path)
            save_model()
        print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
            i, train_loss, end_time - prev_start_time))

print("Training done. Model is saved to", args.save_weights_path)
save_model()
Z
add ce  
zhengya01 已提交
228 229

if args.enable_ce:
D
Dun 已提交
230
    gpu_num = fluid.core.get_cuda_device_count()
Z
add ce  
zhengya01 已提交
231
    print("kpis\teach_pass_duration_card%s\t%s" %
D
Dun 已提交
232 233
          (gpu_num, total_time / epoch_idx))
    print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss))