profile.py 6.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

15 16 17 18
import os
import time
import numpy as np
import argparse
19
from utility import parse_args, add_arguments, print_arguments
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34

import paddle
import paddle.fluid as fluid
import reader
import paddle.fluid.profiler as profiler

import models.model_builder as model_builder
import models.resnet as resnet
from learning_rate import exponential_with_warmup_decay


def train(cfg):
    batch_size = cfg.batch_size
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.max_size, cfg.max_size]
35
    num_iterations = cfg.max_iter
36 37 38 39 40 41 42 43 44 45 46 47

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))

    model = model_builder.FasterRCNN(
        cfg=cfg,
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
        use_random=False)
    model.build_model(image_shape)
    loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss = model.loss()
48 49 50 51
    loss_cls.persistable = True
    loss_bbox.persistable = True
    rpn_cls_loss.persistable = True
    rpn_reg_loss.persistable = True
52 53 54
    loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss

    boundaries = [120000, 160000]
55
    values = [learning_rate, learning_rate * 0.1, learning_rate * 0.01]
56 57

    optimizer = fluid.optimizer.Momentum(
58 59
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
60 61 62
            boundaries=boundaries,
            values=values,
            warmup_iter=500,
63
            warmup_factor=1.0 / 3.0),
64 65 66 67 68 69 70 71 72 73 74
        regularization=fluid.regularizer.L2Decay(0.0001),
        momentum=0.9)
    optimizer.minimize(loss)

    fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:
75

76 77
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
78

79 80 81 82 83 84
        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        train_exe = fluid.ParallelExecutor(
            use_cuda=bool(cfg.use_gpu), loss_name=loss.name)

85 86
    assert cfg.batch_size % devices_num == 0, \
        "batch_size = %d, devices_num = %d" %(cfg.batch_size, devices_num)
87

88
    batch_size_per_dev = cfg.batch_size / devices_num
89
    if cfg.use_pyreader:
90 91 92 93 94 95
        train_reader = reader.train(
            cfg,
            batch_size=batch_size_per_dev,
            total_batch_size=cfg.batch_size,
            padding_total=cfg.padding_minibatch,
            shuffle=False)
96 97 98
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
99 100 101
        train_reader = reader.train(
            cfg, batch_size=cfg.batch_size, shuffle=False)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
102 103 104 105 106 107 108 109 110 111 112 113 114 115

    fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss]

    def run(iterations):
        reader_time = []
        run_time = []
        total_images = 0

        for batch_id in range(iterations):
            start_time = time.time()
            data = train_reader().next()
            end_time = time.time()
            reader_time.append(end_time - start_time)
            start_time = time.time()
116 117 118 119 120 121 122
            if cfg.parallel:
                losses = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                       feed=feeder.feed(data))
            else:
                losses = exe.run(fluid.default_main_program(),
                                 fetch_list=[v.name for v in fetch_list],
                                 feed=feeder.feed(data))
123 124
            end_time = time.time()
            run_time.append(end_time - start_time)
125
            total_images += len(data)
126

127 128 129 130
            lr = np.array(fluid.global_scope().find_var('learning_rate')
                          .get_tensor())
            print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(batch_id, lr[0],
                                                               losses[0][0]))
131 132 133 134 135 136 137 138 139 140 141
        return reader_time, run_time, total_images

    def run_pyreader(iterations):
        reader_time = [0]
        run_time = []
        total_images = 0

        py_reader.start()
        try:
            for batch_id in range(iterations):
                start_time = time.time()
142 143 144 145 146 147
                if cfg.parallel:
                    losses = train_exe.run(
                        fetch_list=[v.name for v in fetch_list])
                else:
                    losses = exe.run(fluid.default_main_program(),
                                     fetch_list=[v.name for v in fetch_list])
148 149 150
                end_time = time.time()
                run_time.append(end_time - start_time)
                total_images += devices_num
151 152 153 154
                lr = np.array(fluid.global_scope().find_var('learning_rate')
                              .get_tensor())
                print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format(batch_id, lr[
                    0], losses[0][0]))
155 156 157 158 159 160 161 162 163 164 165
        except fluid.core.EOFException:
            py_reader.reset()

        return reader_time, run_time, total_images

    run_func = run if not cfg.use_pyreader else run_pyreader

    # warm-up
    run_func(2)
    # profiling
    start = time.time()
166 167
    use_profile = False
    if use_profile:
168
        with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
169
            reader_time, run_time, total_images = run_func(num_iterations)
170 171 172 173 174
    else:
        reader_time, run_time, total_images = run_func(num_iterations)

    end = time.time()
    total_time = end - start
175 176 177 178
    print("Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".
          format(total_time,
                 np.sum(reader_time),
                 np.sum(run_time), total_images / total_time))
179 180 181


if __name__ == '__main__':
182
    args = parse_args()
183 184 185 186
    print_arguments(args)

    data_args = reader.Settings(args)
    train(data_args)