未验证 提交 2738ca10 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #636 from kuke/refactor_model

Refactor model conf: add profiling, parallel running, model saving etc
......@@ -225,8 +225,8 @@ class DataReader(object):
@suppress_complaints(verbose=self._verbose)
def ordered_processing_task(sample_info_queue, sample_queue, out_order):
if self._verbose == 0:
signal.signal(signal.SIGTERM, suppress_signal())
signal.signal(signal.SIGINT, suppress_signal())
signal.signal(signal.SIGTERM, suppress_signal)
signal.signal(signal.SIGINT, suppress_signal)
def read_bytes(fpath, start, size):
f = open(fpath, 'r')
......
......@@ -5,6 +5,8 @@ import sys
from six import reraise
from tblib import Traceback
import numpy as np
def to_lodtensor(data, place):
"""convert tensor to lodtensor
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def stacked_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num,
parallel=False,
is_train=True):
""" The model for DeepASR. The main structure is composed of stacked
identical LSTMP (LSTM with recurrent projection) layers.
When running in training and validation phase, the feeding dictionary
is {'feature', 'label'}, fed by the LodTensor for feature data and
label data respectively. And in inference, only `feature` is needed.
Args:
hidden_dim(int): The hidden state's dimension of the LSTMP layer.
proj_dim(int): The projection size of the LSTMP layer.
stacked_num(int): The number of stacked LSTMP layers.
parallel(bool): Run in parallel or not, default `False`.
is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes.
"""
# network configuration
def _net_conf(feature, label):
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return prediction, avg_cost, acc
# data feeder
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
if parallel:
# When the execution place is specified to CUDAPlace, the program will
# run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will
# run on all CPU devices.
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
feat_ = pd.read_input(feature)
label_ = pd.read_input(label)
prediction, avg_cost, acc = _net_conf(feat_, label_)
for out in [avg_cost, acc]:
pd.write_output(out)
# get mean loss and acc through every devices.
avg_cost, acc = pd()
avg_cost = fluid.layers.mean(x=avg_cost)
acc = fluid.layers.mean(x=acc)
else:
prediction, avg_cost, acc = _net_conf(feature, label)
return prediction, avg_cost, acc
"""Add the parent directory to $PYTHONPATH"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import sys
def add_path(path):
if path not in sys.path:
sys.path.insert(0, path)
this_dir = os.path.dirname(__file__)
# Add project path to PYTHONPATH
proj_path = os.path.join(this_dir, '..')
add_path(proj_path)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import argparse
import time
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import _init_paths
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader
from model_utils.model import stacked_lstmp_model
from data_utils.util import lodtensor_to_ndarray
def parse_args():
parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
parser.add_argument(
'--batch_size',
type=int,
default=32,
help='The sequence number of a batch data. (default: %(default)d)')
parser.add_argument(
'--minimum_batch_size',
type=int,
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
default=5,
help='Number of lstmp layers to stack. (default: %(default)d)')
parser.add_argument(
'--proj_dim',
type=int,
default=512,
help='Project size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--hidden_dim',
type=int,
default=1024,
help='Hidden size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
default=0.002,
help='Learning rate used to train. (default: %(default)f)')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--parallel', action='store_true', help='If set, run in parallel.')
parser.add_argument(
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--feature_lst',
type=str,
default='data/feature.lst',
help='feature list path.')
parser.add_argument(
'--label_lst',
type=str,
default='data/label.lst',
help='label list path.')
parser.add_argument(
'--max_batch_num',
type=int,
default=10,
help='Maximum number of batches for profiling. (default: %(default)d)')
parser.add_argument(
'--first_batches_to_skip',
type=int,
default=1,
help='Number of first batches to skip for profiling. '
'(default: %(default)d)')
parser.add_argument(
'--print_train_acc',
action='store_true',
help='If set, output training accuray.')
parser.add_argument(
'--sorted_key',
type=str,
default='total',
choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
help='Different types of time to sort the profiling report. '
'(default: %(default)s)')
args = parser.parse_args()
return args
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def profile(args):
"""profile the training process.
"""
if not args.first_batches_to_skip < args.max_batch_num:
raise ValueError("arg 'first_batches_to_skip' must be smaller than "
"'max_batch_num'.")
if not args.first_batches_to_skip >= 0:
raise ValueError(
"arg 'first_batches_to_skip' must not be smaller than 0.")
_, avg_cost, accuracy = stacked_lstmp_model(
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
parallel=args.parallel)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
trans_splice.TransSplice()
]
data_reader = reader.DataReader(args.feature_lst, args.label_lst)
data_reader.set_transformers(ltrans)
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
sorted_key = None if args.sorted_key is 'None' else args.sorted_key
with profiler.profiler(args.device, sorted_key) as prof:
frames_seen, start_time = 0, 0.0
for batch_id, batch_data in enumerate(
data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
if batch_id >= args.max_batch_num:
break
if args.first_batches_to_skip == batch_id:
profiler.reset_profiler()
start_time = time.time()
frames_seen = 0
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
frames_seen += lod[-1]
outs = exe.run(fluid.default_main_program(),
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
if args.print_train_acc:
print("Batch %d acc: %f" %
(batch_id, lodtensor_to_ndarray(outs[1])[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
time_consumed = time.time() - start_time
frames_per_sec = frames_seen / time_consumed
print("\nTime consumed: %f s, performance: %f frames/s." %
(time_consumed, frames_per_sec))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
profile(args)
......@@ -2,21 +2,23 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import numpy as np
import argparse
import time
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model
def parse_args():
parser = argparse.ArgumentParser("LSTM model benchmark.")
parser = argparse.ArgumentParser("Training for stacked LSTMP model.")
parser.add_argument(
'--batch_size',
type=int,
......@@ -26,8 +28,8 @@ def parse_args():
'--minimum_batch_size',
type=int,
default=1,
help='The minimum sequence number of a batch data. (default: %(default)d)'
)
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
......@@ -48,6 +50,11 @@ def parse_args():
type=int,
default=100,
help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--print_per_batches',
type=int,
default=100,
help='Interval to print training accuracy. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
......@@ -60,168 +67,164 @@ def parse_args():
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
'--parallel', action='store_true', help='If set, run in parallel.')
parser.add_argument(
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--train_feature_lst',
type=str,
default='data/feature.lst',
help='feature list path for training.')
parser.add_argument(
'--train_label_lst',
type=str,
default='data/label.lst',
help='label list path for training.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
'--val_feature_lst',
type=str,
default='data/val_feature.lst',
help='feature list path for validation.')
parser.add_argument(
'--val_label_lst',
type=str,
default='data/val_label.lst',
help='label list path for validation.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
parser.add_argument('--mean_var', type=str, help='mean var path')
parser.add_argument('--feature_lst', type=str, help='mean var path')
parser.add_argument('--label_lst', type=str, help='mean var path')
'--model_save_dir',
type=str,
default='./checkpoints',
help='directory to save model. Do not save model if set to '
'.')
args = parser.parse_args()
return args
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def dynamic_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
def train(args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
prediction, label, avg_cost = dynamic_lstmp_model(
args.hidden_dim, args.proj_dim, args.stacked_num)
"""train in loop.
"""
# prediction, avg_cost, accuracy = stacked_lstmp_model(args.hidden_dim,
# args.proj_dim, args.stacked_num, class_num=1749, args.parallel)
prediction, avg_cost, accuracy = stacked_lstmp_model(
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
parallel=args.parallel)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
# clone from default main program
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
test_accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
inference_program = fluid.io.get_inference_program(test_target)
# program for test
test_program = fluid.default_main_program().clone()
with fluid.program_guard(test_program):
test_program = fluid.io.get_inference_program([avg_cost, accuracy])
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# @TODO datareader should take the responsibility (parsing from config file)
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
trans_splice.TransSplice()
]
data_reader = reader.DataReader(args.feature_lst, args.label_lst)
data_reader.set_transformers(ltrans)
res_feature = fluid.LoDTensor()
res_label = fluid.LoDTensor()
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
# validation
def test(exe):
# If test data not found, return invalid cost and accuracy
if not (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)):
return -1.0, -1.0
# test data reader
test_data_reader = reader.DataReader(args.val_feature_lst,
args.val_label_lst)
test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], []
for batch_id, batch_data in enumerate(
test_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(test_program,
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
test_costs.append(lodtensor_to_ndarray(cost)[0])
test_accs.append(lodtensor_to_ndarray(acc)[0])
return np.mean(test_costs), np.mean(test_accs)
# train data reader
train_data_reader = reader.DataReader(args.train_feature_lst,
args.train_label_lst)
train_data_reader.set_transformers(ltrans)
# train
for pass_id in xrange(args.pass_num):
pass_start_time = time.time()
words_seen = 0
accuracy.reset(exe)
for batch_id, batch_data in enumerate(
data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
(bat_feature, bat_label, lod) = batch_data
res_feature.set(bat_feature, place)
res_feature.set_lod([lod])
res_label.set(bat_label, place)
res_label.set_lod([lod])
words_seen += lod[-1]
loss, acc = exe.run(
fluid.default_main_program(),
feed={"feature": res_feature,
"label": res_label},
fetch_list=[avg_cost] + accuracy.metrics,
return_numpy=False)
train_acc = accuracy.eval(exe)
print("acc:", lodtensor_to_ndarray(loss))
train_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(fluid.default_main_program(),
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
if batch_id > 0 and (batch_id % args.print_per_batches == 0):
print("\nBatch %d, train cost: %f, train acc: %f" %
(batch_id, lodtensor_to_ndarray(cost)[0],
lodtensor_to_ndarray(acc)[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
# run test
val_cost, val_acc = test(exe)
# save model
if args.model_save_dir != '':
model_path = os.path.join(
args.model_save_dir, "deep_asr.pass_" + str(pass_id) + ".model")
fluid.io.save_inference_model(model_path, ["feature"],
[prediction], exe)
# cal pass time
pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time
words_per_sec = words_seen / time_consumed
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
# print info at pass end
print("\nPass %d, time consumed: %f s, val cost: %f, val acc: %f\n" %
(pass_id, time_consumed, val_cost, val_acc))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.infer_only:
pass
else:
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args)
else:
train(args)
if args.model_save_dir != '' and not os.path.exists(args.model_save_dir):
os.mkdir(args.model_save_dir)
train(args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册