profile.py 7.0 KB
Newer Older
1 2 3 4 5 6 7 8 9
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
import numpy as np
import argparse
import time

L
Luo Tao 已提交
10 11
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
12
import _init_paths
13 14 15
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
Z
zhxfl 已提交
16
import data_utils.augmentor.trans_delay as trans_delay
17
import data_utils.async_data_reader as reader
18
from model_utils.model import stacked_lstmp_model
19
from data_utils.util import lodtensor_to_ndarray
20 21 22


def parse_args():
Y
Yibing Liu 已提交
23
    parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
24 25 26 27 28
    parser.add_argument(
        '--batch_size',
        type=int,
        default=32,
        help='The sequence number of a batch data. (default: %(default)d)')
29 30 31 32 33 34
    parser.add_argument(
        '--minimum_batch_size',
        type=int,
        default=1,
        help='The minimum sequence number of a batch data. '
        '(default: %(default)d)')
35 36 37 38 39
    parser.add_argument(
        '--frame_dim',
        type=int,
        default=120 * 11,
        help='Frame dimension of feature data. (default: %(default)d)')
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
    parser.add_argument(
        '--stacked_num',
        type=int,
        default=5,
        help='Number of lstmp layers to stack. (default: %(default)d)')
    parser.add_argument(
        '--proj_dim',
        type=int,
        default=512,
        help='Project size of lstmp unit. (default: %(default)d)')
    parser.add_argument(
        '--hidden_dim',
        type=int,
        default=1024,
        help='Hidden size of lstmp unit. (default: %(default)d)')
55 56 57 58 59
    parser.add_argument(
        '--class_num',
        type=int,
        default=1749,
        help='Number of classes in label. (default: %(default)d)')
60 61 62
    parser.add_argument(
        '--learning_rate',
        type=float,
63
        default=0.00016,
64 65 66 67 68 69 70
        help='Learning rate used to train. (default: %(default)f)')
    parser.add_argument(
        '--device',
        type=str,
        default='GPU',
        choices=['CPU', 'GPU'],
        help='The device type. (default: %(default)s)')
71 72
    parser.add_argument(
        '--parallel', action='store_true', help='If set, run in parallel.')
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    parser.add_argument(
        '--mean_var',
        type=str,
        default='data/global_mean_var_search26kHr',
        help='mean var path')
    parser.add_argument(
        '--feature_lst',
        type=str,
        default='data/feature.lst',
        help='feature list path.')
    parser.add_argument(
        '--label_lst',
        type=str,
        default='data/label.lst',
        help='label list path.')
    parser.add_argument(
        '--max_batch_num',
        type=int,
Z
zhxfl 已提交
91
        default=11,
92 93
        help='Maximum number of batches for profiling. (default: %(default)d)')
    parser.add_argument(
94
        '--first_batches_to_skip',
95 96
        type=int,
        default=1,
97 98
        help='Number of first batches to skip for profiling. '
        '(default: %(default)d)')
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
    parser.add_argument(
        '--print_train_acc',
        action='store_true',
        help='If set, output training accuray.')
    parser.add_argument(
        '--sorted_key',
        type=str,
        default='total',
        choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
        help='Different types of time to sort the profiling report. '
        '(default: %(default)s)')
    args = parser.parse_args()
    return args


114 115 116 117 118 119 120
def print_arguments(args):
    print('-----------  Configuration Arguments -----------')
    for arg, value in sorted(vars(args).iteritems()):
        print('%s: %s' % (arg, value))
    print('------------------------------------------------')


121
def profile(args):
Y
Yibing Liu 已提交
122 123
    """profile the training process.
    """
124

125 126
    if not args.first_batches_to_skip < args.max_batch_num:
        raise ValueError("arg 'first_batches_to_skip' must be smaller than "
127
                         "'max_batch_num'.")
128 129 130
    if not args.first_batches_to_skip >= 0:
        raise ValueError(
            "arg 'first_batches_to_skip' must not be smaller than 0.")
131

132
    _, avg_cost, accuracy = stacked_lstmp_model(
133
        frame_dim=args.frame_dim,
134 135 136
        hidden_dim=args.hidden_dim,
        proj_dim=args.proj_dim,
        stacked_num=args.stacked_num,
137
        class_num=args.class_num,
138
        parallel=args.parallel)
139

140 141 142 143 144 145
    optimizer = fluid.optimizer.Adam(
        learning_rate=fluid.layers.exponential_decay(
            learning_rate=args.learning_rate,
            decay_steps=1879,
            decay_rate=1 / 1.2,
            staircase=True))
Y
Yibing Liu 已提交
146
    optimizer.minimize(avg_cost)
147 148 149 150 151 152 153 154

    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    ltrans = [
        trans_add_delta.TransAddDelta(2, 2),
        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
155
        trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5)
156 157
    ]

158 159
    data_reader = reader.AsyncDataReader(
        args.feature_lst, args.label_lst, -1, split_sentence_threshold=1024)
160
    data_reader.set_transformers(ltrans)
161

Y
Yibing Liu 已提交
162 163
    feature_t = fluid.LoDTensor()
    label_t = fluid.LoDTensor()
164 165 166 167

    sorted_key = None if args.sorted_key is 'None' else args.sorted_key
    with profiler.profiler(args.device, sorted_key) as prof:
        frames_seen, start_time = 0, 0.0
168 169 170 171 172
        for batch_id, batch_data in enumerate(
                data_reader.batch_iterator(args.batch_size,
                                           args.minimum_batch_size)):
            if batch_id >= args.max_batch_num:
                break
173
            if args.first_batches_to_skip == batch_id:
174 175 176 177
                profiler.reset_profiler()
                start_time = time.time()
                frames_seen = 0
            # load_data
Y
Yibing Liu 已提交
178
            (features, labels, lod, _) = batch_data
179 180
            features = np.reshape(features, (-1, 11, 3, args.frame_dim))
            features = np.transpose(features, (0, 2, 1, 3))
181 182 183 184
            feature_t.set(features, place)
            feature_t.set_lod([lod])
            label_t.set(labels, place)
            label_t.set_lod([lod])
185

186
            frames_seen += lod[-1]
187 188

            outs = exe.run(fluid.default_main_program(),
Y
Yibing Liu 已提交
189 190
                           feed={"feature": feature_t,
                                 "label": label_t},
Y
Yibing Liu 已提交
191 192
                           fetch_list=[avg_cost, accuracy]
                           if args.print_train_acc else [],
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
                           return_numpy=False)

            if args.print_train_acc:
                print("Batch %d acc: %f" %
                      (batch_id, lodtensor_to_ndarray(outs[1])[0]))
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        time_consumed = time.time() - start_time
        frames_per_sec = frames_seen / time_consumed
        print("\nTime consumed: %f s, performance: %f frames/s." %
              (time_consumed, frames_per_sec))


if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
    profile(args)