profile.py 6.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
import numpy as np
import argparse
import time

L
Luo Tao 已提交
10 11
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
12
import _init_paths
13 14 15
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
16
import data_utils.async_data_reader as reader
17
from model_utils.model import stacked_lstmp_model
18
from data_utils.util import lodtensor_to_ndarray
19 20 21


def parse_args():
Y
Yibing Liu 已提交
22
    parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
23 24 25 26 27
    parser.add_argument(
        '--batch_size',
        type=int,
        default=32,
        help='The sequence number of a batch data. (default: %(default)d)')
28 29 30 31 32 33
    parser.add_argument(
        '--minimum_batch_size',
        type=int,
        default=1,
        help='The minimum sequence number of a batch data. '
        '(default: %(default)d)')
34 35 36 37 38
    parser.add_argument(
        '--frame_dim',
        type=int,
        default=120 * 11,
        help='Frame dimension of feature data. (default: %(default)d)')
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
    parser.add_argument(
        '--stacked_num',
        type=int,
        default=5,
        help='Number of lstmp layers to stack. (default: %(default)d)')
    parser.add_argument(
        '--proj_dim',
        type=int,
        default=512,
        help='Project size of lstmp unit. (default: %(default)d)')
    parser.add_argument(
        '--hidden_dim',
        type=int,
        default=1024,
        help='Hidden size of lstmp unit. (default: %(default)d)')
54 55 56 57 58
    parser.add_argument(
        '--class_num',
        type=int,
        default=1749,
        help='Number of classes in label. (default: %(default)d)')
59 60 61
    parser.add_argument(
        '--learning_rate',
        type=float,
62
        default=0.00016,
63 64 65 66 67 68 69
        help='Learning rate used to train. (default: %(default)f)')
    parser.add_argument(
        '--device',
        type=str,
        default='GPU',
        choices=['CPU', 'GPU'],
        help='The device type. (default: %(default)s)')
70 71
    parser.add_argument(
        '--parallel', action='store_true', help='If set, run in parallel.')
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    parser.add_argument(
        '--mean_var',
        type=str,
        default='data/global_mean_var_search26kHr',
        help='mean var path')
    parser.add_argument(
        '--feature_lst',
        type=str,
        default='data/feature.lst',
        help='feature list path.')
    parser.add_argument(
        '--label_lst',
        type=str,
        default='data/label.lst',
        help='label list path.')
    parser.add_argument(
        '--max_batch_num',
        type=int,
90
        default=10,
91 92
        help='Maximum number of batches for profiling. (default: %(default)d)')
    parser.add_argument(
93
        '--first_batches_to_skip',
94 95
        type=int,
        default=1,
96 97
        help='Number of first batches to skip for profiling. '
        '(default: %(default)d)')
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    parser.add_argument(
        '--print_train_acc',
        action='store_true',
        help='If set, output training accuray.')
    parser.add_argument(
        '--sorted_key',
        type=str,
        default='total',
        choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
        help='Different types of time to sort the profiling report. '
        '(default: %(default)s)')
    args = parser.parse_args()
    return args


113 114 115 116 117 118 119
def print_arguments(args):
    print('-----------  Configuration Arguments -----------')
    for arg, value in sorted(vars(args).iteritems()):
        print('%s: %s' % (arg, value))
    print('------------------------------------------------')


120
def profile(args):
Y
Yibing Liu 已提交
121 122
    """profile the training process.
    """
123

124 125
    if not args.first_batches_to_skip < args.max_batch_num:
        raise ValueError("arg 'first_batches_to_skip' must be smaller than "
126
                         "'max_batch_num'.")
127 128 129
    if not args.first_batches_to_skip >= 0:
        raise ValueError(
            "arg 'first_batches_to_skip' must not be smaller than 0.")
130

131
    _, avg_cost, accuracy = stacked_lstmp_model(
132
        frame_dim=args.frame_dim,
133 134 135
        hidden_dim=args.hidden_dim,
        proj_dim=args.proj_dim,
        stacked_num=args.stacked_num,
136
        class_num=args.class_num,
137
        parallel=args.parallel)
138

139
    optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
Y
Yibing Liu 已提交
140
    optimizer.minimize(avg_cost)
141 142 143 144 145 146 147 148 149 150 151

    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    ltrans = [
        trans_add_delta.TransAddDelta(2, 2),
        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
        trans_splice.TransSplice()
    ]

152
    data_reader = reader.AsyncDataReader(args.feature_lst, args.label_lst)
153
    data_reader.set_transformers(ltrans)
154

Y
Yibing Liu 已提交
155 156
    feature_t = fluid.LoDTensor()
    label_t = fluid.LoDTensor()
157 158 159 160

    sorted_key = None if args.sorted_key is 'None' else args.sorted_key
    with profiler.profiler(args.device, sorted_key) as prof:
        frames_seen, start_time = 0, 0.0
161 162 163 164 165
        for batch_id, batch_data in enumerate(
                data_reader.batch_iterator(args.batch_size,
                                           args.minimum_batch_size)):
            if batch_id >= args.max_batch_num:
                break
166
            if args.first_batches_to_skip == batch_id:
167 168 169 170
                profiler.reset_profiler()
                start_time = time.time()
                frames_seen = 0
            # load_data
Y
Yibing Liu 已提交
171
            (features, labels, lod, _) = batch_data
172 173 174 175
            feature_t.set(features, place)
            feature_t.set_lod([lod])
            label_t.set(labels, place)
            label_t.set_lod([lod])
176

177
            frames_seen += lod[-1]
178 179

            outs = exe.run(fluid.default_main_program(),
Y
Yibing Liu 已提交
180 181
                           feed={"feature": feature_t,
                                 "label": label_t},
Y
Yibing Liu 已提交
182 183
                           fetch_list=[avg_cost, accuracy]
                           if args.print_train_acc else [],
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
                           return_numpy=False)

            if args.print_train_acc:
                print("Batch %d acc: %f" %
                      (batch_id, lodtensor_to_ndarray(outs[1])[0]))
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        time_consumed = time.time() - start_time
        frames_per_sec = frames_seen / time_consumed
        print("\nTime consumed: %f s, performance: %f frames/s." %
              (time_consumed, frames_per_sec))


if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
    profile(args)