profile.py 6.2 KB
Newer Older
1 2 3 4 5 6 7 8 9
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
import numpy as np
import argparse
import time

L
Luo Tao 已提交
10 11
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
12
import _init_paths
13 14 15
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
16
import data_utils.data_reader as reader
17
from model_utils.model import stacked_lstmp_model
18
from data_utils.util import lodtensor_to_ndarray
19 20 21


def parse_args():
Y
Yibing Liu 已提交
22
    parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
23 24 25 26 27
    parser.add_argument(
        '--batch_size',
        type=int,
        default=32,
        help='The sequence number of a batch data. (default: %(default)d)')
28 29 30 31 32 33
    parser.add_argument(
        '--minimum_batch_size',
        type=int,
        default=1,
        help='The minimum sequence number of a batch data. '
        '(default: %(default)d)')
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
    parser.add_argument(
        '--stacked_num',
        type=int,
        default=5,
        help='Number of lstmp layers to stack. (default: %(default)d)')
    parser.add_argument(
        '--proj_dim',
        type=int,
        default=512,
        help='Project size of lstmp unit. (default: %(default)d)')
    parser.add_argument(
        '--hidden_dim',
        type=int,
        default=1024,
        help='Hidden size of lstmp unit. (default: %(default)d)')
    parser.add_argument(
        '--learning_rate',
        type=float,
        default=0.002,
        help='Learning rate used to train. (default: %(default)f)')
    parser.add_argument(
        '--device',
        type=str,
        default='GPU',
        choices=['CPU', 'GPU'],
        help='The device type. (default: %(default)s)')
60 61
    parser.add_argument(
        '--parallel', action='store_true', help='If set, run in parallel.')
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
    parser.add_argument(
        '--mean_var',
        type=str,
        default='data/global_mean_var_search26kHr',
        help='mean var path')
    parser.add_argument(
        '--feature_lst',
        type=str,
        default='data/feature.lst',
        help='feature list path.')
    parser.add_argument(
        '--label_lst',
        type=str,
        default='data/label.lst',
        help='label list path.')
    parser.add_argument(
        '--max_batch_num',
        type=int,
80
        default=10,
81 82
        help='Maximum number of batches for profiling. (default: %(default)d)')
    parser.add_argument(
83
        '--first_batches_to_skip',
84 85
        type=int,
        default=1,
86 87
        help='Number of first batches to skip for profiling. '
        '(default: %(default)d)')
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    parser.add_argument(
        '--print_train_acc',
        action='store_true',
        help='If set, output training accuray.')
    parser.add_argument(
        '--sorted_key',
        type=str,
        default='total',
        choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
        help='Different types of time to sort the profiling report. '
        '(default: %(default)s)')
    args = parser.parse_args()
    return args


103 104 105 106 107 108 109
def print_arguments(args):
    print('-----------  Configuration Arguments -----------')
    for arg, value in sorted(vars(args).iteritems()):
        print('%s: %s' % (arg, value))
    print('------------------------------------------------')


110
def profile(args):
Y
Yibing Liu 已提交
111 112
    """profile the training process.
    """
113

114 115
    if not args.first_batches_to_skip < args.max_batch_num:
        raise ValueError("arg 'first_batches_to_skip' must be smaller than "
116
                         "'max_batch_num'.")
117 118 119
    if not args.first_batches_to_skip >= 0:
        raise ValueError(
            "arg 'first_batches_to_skip' must not be smaller than 0.")
120

121 122 123 124 125 126
    _, avg_cost, accuracy = stacked_lstmp_model(
        hidden_dim=args.hidden_dim,
        proj_dim=args.proj_dim,
        stacked_num=args.stacked_num,
        class_num=1749,
        parallel=args.parallel)
127

Y
Yibing Liu 已提交
128
    optimizer = fluid.optimizer.Momentum(
129
        learning_rate=args.learning_rate, momentum=0.9)
Y
Yibing Liu 已提交
130
    optimizer.minimize(avg_cost)
131 132 133 134 135 136 137 138 139 140 141

    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    ltrans = [
        trans_add_delta.TransAddDelta(2, 2),
        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
        trans_splice.TransSplice()
    ]

142 143
    data_reader = reader.DataReader(args.feature_lst, args.label_lst)
    data_reader.set_transformers(ltrans)
144

Y
Yibing Liu 已提交
145 146
    feature_t = fluid.LoDTensor()
    label_t = fluid.LoDTensor()
147 148 149 150

    sorted_key = None if args.sorted_key is 'None' else args.sorted_key
    with profiler.profiler(args.device, sorted_key) as prof:
        frames_seen, start_time = 0, 0.0
151 152 153 154 155
        for batch_id, batch_data in enumerate(
                data_reader.batch_iterator(args.batch_size,
                                           args.minimum_batch_size)):
            if batch_id >= args.max_batch_num:
                break
156
            if args.first_batches_to_skip == batch_id:
157 158 159 160
                profiler.reset_profiler()
                start_time = time.time()
                frames_seen = 0
            # load_data
Y
Yibing Liu 已提交
161 162 163 164 165
            (features, labels, lod) = batch_data
            feature_t.set(features, place)
            feature_t.set_lod([lod])
            label_t.set(labels, place)
            label_t.set_lod([lod])
166 167 168 169

            frames_seen += lod[-1]

            outs = exe.run(fluid.default_main_program(),
Y
Yibing Liu 已提交
170 171
                           feed={"feature": feature_t,
                                 "label": label_t},
172
                           fetch_list=[avg_cost, accuracy],
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
                           return_numpy=False)

            if args.print_train_acc:
                print("Batch %d acc: %f" %
                      (batch_id, lodtensor_to_ndarray(outs[1])[0]))
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        time_consumed = time.time() - start_time
        frames_per_sec = frames_seen / time_consumed
        print("\nTime consumed: %f s, performance: %f frames/s." %
              (time_consumed, frames_per_sec))


if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
    profile(args)