train.py 8.3 KB
Newer Older
1 2 3
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
D
Dun 已提交
4
import os
D
Dun 已提交
5 6 7
if 'FLAGS_fraction_of_gpu_memory_to_use' not in os.environ:
    os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98'
os.environ['FLAGS_enable_parallel_graph'] = '1'
D
Dun 已提交
8 9 10 11 12 13 14 15

import paddle
import paddle.fluid as fluid
import numpy as np
import argparse
from reader import CityscapeDataset
import reader
import models
C
ccmeteorljh 已提交
16
import time
D
Dun 已提交
17 18 19
import contextlib
import paddle.fluid.profiler as profiler
import utility
D
Dun 已提交
20

D
Dun 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
parser = argparse.ArgumentParser()
add_arg = lambda *args: utility.add_arguments(*args, argparser=parser)

# yapf: disable
add_arg('batch_size',           int,    2,      "The number of images in each batch during training.")
add_arg('train_crop_size',      int,    769,    "Image crop size during training.")
add_arg('base_lr',              float,  0.0001, "The base learning rate for model training.")
add_arg('total_step',           int,    90000,  "Number of the training step.")
add_arg('init_weights_path',    str,    None,   "Path of the initial weights in paddlepaddle format.")
add_arg('save_weights_path',    str,    None,   "Path of the saved weights during training.")
add_arg('dataset_path',         str,    None,   "Cityscape dataset path.")
add_arg('parallel',             bool,   True,   "using ParallelExecutor.")
add_arg('use_gpu',              bool,   True,   "Whether use GPU or CPU.")
add_arg('num_classes',          int,    19,     "Number of classes.")
add_arg('load_logit_layer',     bool,   True,   "Load last logit fc layer or not. If you are training with different number of classes, you should set to False.")
add_arg('memory_optimize',      bool,   True,   "Using memory optimizer.")
add_arg('norm_type',            str,    'bn',   "Normalization type, should be bn or gn.")
add_arg('profile',              bool,    False, "Enable profiler.")
add_arg('use_py_reader',        bool,    True,  "Use py reader.")
parser.add_argument(
    '--enable_ce',
    action='store_true',
    help='If set, run the task with continuous evaluation logs.')
#yapf: enable

@contextlib.contextmanager
def profile_context(profile=True):
    if profile:
        with profiler.profiler('All', 'total', '/tmp/profile_file2'):
            yield
    else:
        yield
D
Dun 已提交
53 54

def load_model():
D
Dun 已提交
55 56 57 58 59 60 61
    if os.path.isdir(args.init_weights_path):
        load_vars = [
            x for x in tp.list_vars()
            if isinstance(x, fluid.framework.Parameter) and x.name.find('logit') ==
            -1
        ]
        if args.load_logit_layer:
D
Dun 已提交
62 63 64
            fluid.io.load_params(
                exe, dirname=args.init_weights_path, main_program=tp)
        else:
D
Dun 已提交
65
            fluid.io.load_vars(exe, dirname=args.init_weights_path, vars=load_vars)
D
Dun 已提交
66
    else:
D
Dun 已提交
67 68 69 70 71 72
        fluid.io.load_params(
            exe,
            dirname="",
            filename=args.init_weights_path,
            main_program=tp)

D
Dun 已提交
73 74 75


def save_model():
D
Dun 已提交
76 77 78
    assert not os.path.isfile(args.save_weights_path)
    fluid.io.save_params(
        exe, dirname=args.save_weights_path, main_program=tp)
D
Dun 已提交
79 80 81


def loss(logit, label):
D
Dun 已提交
82 83 84 85
    label_nignore = fluid.layers.less_than(
        label.astype('float32'),
        fluid.layers.assign(np.array([num_classes], 'float32')),
        force_cpu=False).astype('float32')
D
Dun 已提交
86 87 88 89 90
    logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
    logit = fluid.layers.reshape(logit, [-1, num_classes])
    label = fluid.layers.reshape(label, [-1, 1])
    label = fluid.layers.cast(label, 'int64')
    label_nignore = fluid.layers.reshape(label_nignore, [-1, 1])
D
Dun 已提交
91 92 93
    loss = fluid.layers.softmax_with_cross_entropy(logit, label, ignore_index=255, numeric_stable_mode=True)
    label_nignore.stop_gradient = True
    label.stop_gradient = True
D
Dun 已提交
94 95 96 97
    return loss, label_nignore


args = parser.parse_args()
D
Dun 已提交
98
utility.print_arguments(args)
D
Dun 已提交
99 100 101 102

models.clean()
models.bn_momentum = 0.9997
models.dropout_keep_prop = 0.9
D
Dun 已提交
103
models.label_number = args.num_classes
D
Dun 已提交
104
models.default_norm_type = args.norm_type
D
Dun 已提交
105 106 107 108
deeplabv3p = models.deeplabv3p

sp = fluid.Program()
tp = fluid.Program()
Z
add ce  
zhengya01 已提交
109 110 111 112 113 114 115

# only for ce
if args.enable_ce:
    SEED = 102
    sp.random_seed = SEED
    tp.random_seed = SEED

D
Dun 已提交
116 117 118 119 120
crop_size = args.train_crop_size
batch_size = args.batch_size
image_shape = [crop_size, crop_size]
reader.default_config['crop_size'] = crop_size
reader.default_config['shuffle'] = True
D
Dun 已提交
121
num_classes = args.num_classes
D
Dun 已提交
122 123 124 125 126 127
weight_decay = 0.00004

base_lr = args.base_lr
total_step = args.total_step

with fluid.program_guard(tp, sp):
D
Dun 已提交
128 129 130 131 132 133 134 135 136 137
    if args.use_py_reader:
        batch_size_each = batch_size // fluid.core.get_cuda_device_count()
        py_reader = fluid.layers.py_reader(capacity=64,
                                        shapes=[[batch_size_each, 3] + image_shape, [batch_size_each] + image_shape],
                                        dtypes=['float32', 'int32'])
        img, label = fluid.layers.read_file(py_reader)
    else:
        img = fluid.layers.data(
            name='img', shape=[3] + image_shape, dtype='float32')
        label = fluid.layers.data(name='label', shape=image_shape, dtype='int32')
D
Dun 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    logit = deeplabv3p(img)
    pred = fluid.layers.argmax(logit, axis=1).astype('int32')
    loss, mask = loss(logit, label)
    lr = fluid.layers.polynomial_decay(
        base_lr, total_step, end_learning_rate=0, power=0.9)
    area = fluid.layers.elementwise_max(
        fluid.layers.reduce_mean(mask),
        fluid.layers.assign(np.array(
            [0.1], dtype=np.float32)))
    loss_mean = fluid.layers.reduce_mean(loss) / area

    opt = fluid.optimizer.Momentum(
        lr,
        momentum=0.9,
        regularization=fluid.regularizer.L2DecayRegularizer(
D
Dun 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
            regularization_coeff=weight_decay))
    optimize_ops, params_grads = opt.minimize(loss_mean, startup_program=sp)
    # ir memory optimizer has some issues, we need to seed grad persistable to
    # avoid this issue
    for p,g in params_grads: g.persistable = True


exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = fluid.core.get_cuda_device_count()
exec_strategy.num_iteration_per_drop_scope = 100
build_strategy = fluid.BuildStrategy()
if args.memory_optimize:
    build_strategy.fuse_relu_depthwise_conv = True
    build_strategy.enable_inplace = True
    build_strategy.memory_optimize = True
D
Dun 已提交
168 169 170 171 172 173 174 175

place = fluid.CPUPlace()
if args.use_gpu:
    place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(sp)

if args.init_weights_path:
176
    print("load from:", args.init_weights_path)
D
Dun 已提交
177 178
    load_model()

D
Dun 已提交
179
dataset = reader.CityscapeDataset(args.dataset_path, 'train')
D
Dun 已提交
180 181

if args.parallel:
D
Dun 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
    binary = fluid.compiler.CompiledProgram(tp).with_data_parallel(
        loss_name=loss_mean.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)
else:
    binary = fluid.compiler.CompiledProgram(main)

if args.use_py_reader:
    assert(batch_size % fluid.core.get_cuda_device_count() == 0)
    def data_gen():
        batches = dataset.get_batch_generator(
            batch_size // fluid.core.get_cuda_device_count(),
            total_step * fluid.core.get_cuda_device_count())
        for b in batches:
            yield b[1], b[2]
    py_reader.decorate_tensor_provider(data_gen)
    py_reader.start()
else:
    batches = dataset.get_batch_generator(batch_size, total_step)
Z
add ce  
zhengya01 已提交
201 202 203 204
total_time = 0.0
epoch_idx = 0
train_loss = 0

D
Dun 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
with profile_context(args.profile):
    for i in range(total_step):
        epoch_idx += 1
        begin_time = time.time()
        prev_start_time = time.time()
        if not args.use_py_reader:
            _, imgs, labels, names = next(batches)
            train_loss, = exe.run(binary,
                             feed={'img': imgs,
                                   'label': labels}, fetch_list=[loss_mean])
        else:
            train_loss, = exe.run(binary, fetch_list=[loss_mean])
        train_loss = np.mean(train_loss)
        end_time = time.time()
        total_time += end_time - begin_time
        if i % 100 == 0:
            print("Model is saved to", args.save_weights_path)
            save_model()
        print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
            i, train_loss, end_time - prev_start_time))

print("Training done. Model is saved to", args.save_weights_path)
save_model()
py_reader.stop()
Z
add ce  
zhengya01 已提交
229 230

if args.enable_ce:
D
Dun 已提交
231
    gpu_num = fluid.core.get_cuda_device_count()
Z
add ce  
zhengya01 已提交
232
    print("kpis\teach_pass_duration_card%s\t%s" %
D
Dun 已提交
233 234
          (gpu_num, total_time / epoch_idx))
    print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss))