from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np import argparse import time import paddle.v2 as paddle import paddle.v2.fluid as fluid import paddle.v2.fluid.profiler as profiler import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_splice as trans_splice import data_utils.data_reader as reader def parse_args(): parser = argparse.ArgumentParser("LSTM model benchmark.") parser.add_argument( '--batch_size', type=int, default=32, help='The sequence number of a batch data. (default: %(default)d)') parser.add_argument( '--minimum_batch_size', type=int, default=1, help='The minimum sequence number of a batch data. (default: %(default)d)' ) parser.add_argument( '--stacked_num', type=int, default=5, help='Number of lstm layers to stack. (default: %(default)d)') parser.add_argument( '--proj_dim', type=int, default=512, help='Project size of lstm unit. (default: %(default)d)') parser.add_argument( '--hidden_dim', type=int, default=1024, help='Hidden size of lstm unit. (default: %(default)d)') parser.add_argument( '--pass_num', type=int, default=100, help='Epoch number to train. (default: %(default)d)') parser.add_argument( '--learning_rate', type=float, default=0.002, help='Learning rate used to train. (default: %(default)f)') parser.add_argument( '--device', type=str, default='GPU', choices=['CPU', 'GPU'], help='The device type. (default: %(default)s)') parser.add_argument( '--infer_only', action='store_true', help='If set, run forward only.') parser.add_argument( '--use_cprof', action='store_true', help='If set, use cProfile.') parser.add_argument( '--use_nvprof', action='store_true', help='If set, use nvprof for CUDA.') parser.add_argument('--mean_var', type=str, help='mean var path') parser.add_argument('--feature_lst', type=str, help='mean var path') parser.add_argument('--label_lst', type=str, help='mean var path') args = parser.parse_args() return args def print_arguments(args): vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and vars(args)['device'] == 'GPU') print('----------- Configuration Arguments -----------') for arg, value in sorted(vars(args).iteritems()): print('%s: %s' % (arg, value)) print('------------------------------------------------') def dynamic_lstmp_model(hidden_dim, proj_dim, stacked_num, class_num=1749, is_train=True): feature = fluid.layers.data( name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1) seq_conv1 = fluid.layers.sequence_conv( input=feature, num_filters=1024, filter_size=3, filter_stride=1, bias_attr=True) bn1 = fluid.layers.batch_norm( input=seq_conv1, act="sigmoid", is_test=False, momentum=0.9, epsilon=1e-05, data_layout='NCHW') stack_input = bn1 for i in range(stacked_num): fc = fluid.layers.fc(input=stack_input, size=hidden_dim * 4, bias_attr=True) proj, cell = fluid.layers.dynamic_lstmp( input=fc, size=hidden_dim * 4, proj_size=proj_dim, bias_attr=True, use_peepholes=True, is_reverse=False, cell_activation="tanh", proj_activation="tanh") bn = fluid.layers.batch_norm( input=proj, act="sigmoid", is_test=False, momentum=0.9, epsilon=1e-05, data_layout='NCHW') stack_input = bn prediction = fluid.layers.fc(input=stack_input, size=class_num, act='softmax') if not is_train: return feature, prediction label = fluid.layers.data( name="label", shape=[-1, 1], dtype="int64", lod_level=1) cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) return prediction, label, avg_cost def train(args): if args.use_cprof: pr = cProfile.Profile() pr.enable() prediction, label, avg_cost = dynamic_lstmp_model( args.hidden_dim, args.proj_dim, args.stacked_num) adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) adam_optimizer.minimize(avg_cost) accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) # clone from default main program inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): test_accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states inference_program = fluid.io.get_inference_program(test_target) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # @TODO datareader should take the responsibility (parsing from config file) ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_splice.TransSplice() ] data_reader = reader.DataReader(args.feature_lst, args.label_lst) data_reader.set_transformers(ltrans) res_feature = fluid.LoDTensor() res_label = fluid.LoDTensor() for pass_id in xrange(args.pass_num): pass_start_time = time.time() words_seen = 0 accuracy.reset(exe) for batch_id, batch_data in enumerate( data_reader.batch_iterator(args.batch_size, args.minimum_batch_size)): (bat_feature, bat_label, lod) = batch_data res_feature.set(bat_feature, place) res_feature.set_lod([lod]) res_label.set(bat_label, place) res_label.set_lod([lod]) words_seen += lod[-1] loss, acc = exe.run( fluid.default_main_program(), feed={"feature": res_feature, "label": res_label}, fetch_list=[avg_cost] + accuracy.metrics, return_numpy=False) train_acc = accuracy.eval(exe) print("acc:", lodtensor_to_ndarray(loss)) pass_end_time = time.time() time_consumed = pass_end_time - pass_start_time words_per_sec = words_seen / time_consumed def lodtensor_to_ndarray(lod_tensor): dims = lod_tensor.get_dims() ret = np.zeros(shape=dims).astype('float32') for i in xrange(np.product(dims)): ret.ravel()[i] = lod_tensor.get_float_element(i) return ret, lod_tensor.lod() if __name__ == '__main__': args = parse_args() print_arguments(args) if args.infer_only: pass else: if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(args) else: train(args)