未验证 提交 2738ca10 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #636 from kuke/refactor_model

Refactor model conf: add profiling, parallel running, model saving etc
...@@ -225,8 +225,8 @@ class DataReader(object): ...@@ -225,8 +225,8 @@ class DataReader(object):
@suppress_complaints(verbose=self._verbose) @suppress_complaints(verbose=self._verbose)
def ordered_processing_task(sample_info_queue, sample_queue, out_order): def ordered_processing_task(sample_info_queue, sample_queue, out_order):
if self._verbose == 0: if self._verbose == 0:
signal.signal(signal.SIGTERM, suppress_signal()) signal.signal(signal.SIGTERM, suppress_signal)
signal.signal(signal.SIGINT, suppress_signal()) signal.signal(signal.SIGINT, suppress_signal)
def read_bytes(fpath, start, size): def read_bytes(fpath, start, size):
f = open(fpath, 'r') f = open(fpath, 'r')
......
...@@ -5,6 +5,8 @@ import sys ...@@ -5,6 +5,8 @@ import sys
from six import reraise from six import reraise
from tblib import Traceback from tblib import Traceback
import numpy as np
def to_lodtensor(data, place): def to_lodtensor(data, place):
"""convert tensor to lodtensor """convert tensor to lodtensor
......
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def stacked_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num,
parallel=False,
is_train=True):
""" The model for DeepASR. The main structure is composed of stacked
identical LSTMP (LSTM with recurrent projection) layers.
When running in training and validation phase, the feeding dictionary
is {'feature', 'label'}, fed by the LodTensor for feature data and
label data respectively. And in inference, only `feature` is needed.
Args:
hidden_dim(int): The hidden state's dimension of the LSTMP layer.
proj_dim(int): The projection size of the LSTMP layer.
stacked_num(int): The number of stacked LSTMP layers.
parallel(bool): Run in parallel or not, default `False`.
is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes.
"""
# network configuration
def _net_conf(feature, label):
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return prediction, avg_cost, acc
# data feeder
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
if parallel:
# When the execution place is specified to CUDAPlace, the program will
# run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will
# run on all CPU devices.
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
feat_ = pd.read_input(feature)
label_ = pd.read_input(label)
prediction, avg_cost, acc = _net_conf(feat_, label_)
for out in [avg_cost, acc]:
pd.write_output(out)
# get mean loss and acc through every devices.
avg_cost, acc = pd()
avg_cost = fluid.layers.mean(x=avg_cost)
acc = fluid.layers.mean(x=acc)
else:
prediction, avg_cost, acc = _net_conf(feature, label)
return prediction, avg_cost, acc
"""Add the parent directory to $PYTHONPATH"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import sys
def add_path(path):
if path not in sys.path:
sys.path.insert(0, path)
this_dir = os.path.dirname(__file__)
# Add project path to PYTHONPATH
proj_path = os.path.join(this_dir, '..')
add_path(proj_path)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import argparse
import time
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import _init_paths
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader
from model_utils.model import stacked_lstmp_model
from data_utils.util import lodtensor_to_ndarray
def parse_args():
parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
parser.add_argument(
'--batch_size',
type=int,
default=32,
help='The sequence number of a batch data. (default: %(default)d)')
parser.add_argument(
'--minimum_batch_size',
type=int,
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
default=5,
help='Number of lstmp layers to stack. (default: %(default)d)')
parser.add_argument(
'--proj_dim',
type=int,
default=512,
help='Project size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--hidden_dim',
type=int,
default=1024,
help='Hidden size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
default=0.002,
help='Learning rate used to train. (default: %(default)f)')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--parallel', action='store_true', help='If set, run in parallel.')
parser.add_argument(
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--feature_lst',
type=str,
default='data/feature.lst',
help='feature list path.')
parser.add_argument(
'--label_lst',
type=str,
default='data/label.lst',
help='label list path.')
parser.add_argument(
'--max_batch_num',
type=int,
default=10,
help='Maximum number of batches for profiling. (default: %(default)d)')
parser.add_argument(
'--first_batches_to_skip',
type=int,
default=1,
help='Number of first batches to skip for profiling. '
'(default: %(default)d)')
parser.add_argument(
'--print_train_acc',
action='store_true',
help='If set, output training accuray.')
parser.add_argument(
'--sorted_key',
type=str,
default='total',
choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
help='Different types of time to sort the profiling report. '
'(default: %(default)s)')
args = parser.parse_args()
return args
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def profile(args):
"""profile the training process.
"""
if not args.first_batches_to_skip < args.max_batch_num:
raise ValueError("arg 'first_batches_to_skip' must be smaller than "
"'max_batch_num'.")
if not args.first_batches_to_skip >= 0:
raise ValueError(
"arg 'first_batches_to_skip' must not be smaller than 0.")
_, avg_cost, accuracy = stacked_lstmp_model(
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
parallel=args.parallel)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
trans_splice.TransSplice()
]
data_reader = reader.DataReader(args.feature_lst, args.label_lst)
data_reader.set_transformers(ltrans)
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
sorted_key = None if args.sorted_key is 'None' else args.sorted_key
with profiler.profiler(args.device, sorted_key) as prof:
frames_seen, start_time = 0, 0.0
for batch_id, batch_data in enumerate(
data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
if batch_id >= args.max_batch_num:
break
if args.first_batches_to_skip == batch_id:
profiler.reset_profiler()
start_time = time.time()
frames_seen = 0
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
frames_seen += lod[-1]
outs = exe.run(fluid.default_main_program(),
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
if args.print_train_acc:
print("Batch %d acc: %f" %
(batch_id, lodtensor_to_ndarray(outs[1])[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
time_consumed = time.time() - start_time
frames_per_sec = frames_seen / time_consumed
print("\nTime consumed: %f s, performance: %f frames/s." %
(time_consumed, frames_per_sec))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
profile(args)
...@@ -2,21 +2,23 @@ from __future__ import absolute_import ...@@ -2,21 +2,23 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import sys
import os
import numpy as np import numpy as np
import argparse import argparse
import time import time
import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader import data_utils.data_reader as reader
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model
def parse_args(): def parse_args():
parser = argparse.ArgumentParser("LSTM model benchmark.") parser = argparse.ArgumentParser("Training for stacked LSTMP model.")
parser.add_argument( parser.add_argument(
'--batch_size', '--batch_size',
type=int, type=int,
...@@ -26,8 +28,8 @@ def parse_args(): ...@@ -26,8 +28,8 @@ def parse_args():
'--minimum_batch_size', '--minimum_batch_size',
type=int, type=int,
default=1, default=1,
help='The minimum sequence number of a batch data. (default: %(default)d)' help='The minimum sequence number of a batch data. '
) '(default: %(default)d)')
parser.add_argument( parser.add_argument(
'--stacked_num', '--stacked_num',
type=int, type=int,
...@@ -48,6 +50,11 @@ def parse_args(): ...@@ -48,6 +50,11 @@ def parse_args():
type=int, type=int,
default=100, default=100,
help='Epoch number to train. (default: %(default)d)') help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--print_per_batches',
type=int,
default=100,
help='Interval to print training accuracy. (default: %(default)d)')
parser.add_argument( parser.add_argument(
'--learning_rate', '--learning_rate',
type=float, type=float,
...@@ -60,168 +67,164 @@ def parse_args(): ...@@ -60,168 +67,164 @@ def parse_args():
choices=['CPU', 'GPU'], choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)') help='The device type. (default: %(default)s)')
parser.add_argument( parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.') '--parallel', action='store_true', help='If set, run in parallel.')
parser.add_argument(
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--train_feature_lst',
type=str,
default='data/feature.lst',
help='feature list path for training.')
parser.add_argument(
'--train_label_lst',
type=str,
default='data/label.lst',
help='label list path for training.')
parser.add_argument( parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.') '--val_feature_lst',
type=str,
default='data/val_feature.lst',
help='feature list path for validation.')
parser.add_argument(
'--val_label_lst',
type=str,
default='data/val_label.lst',
help='label list path for validation.')
parser.add_argument( parser.add_argument(
'--use_nvprof', '--model_save_dir',
action='store_true', type=str,
help='If set, use nvprof for CUDA.') default='./checkpoints',
parser.add_argument('--mean_var', type=str, help='mean var path') help='directory to save model. Do not save model if set to '
parser.add_argument('--feature_lst', type=str, help='mean var path') '.')
parser.add_argument('--label_lst', type=str, help='mean var path')
args = parser.parse_args() args = parser.parse_args()
return args return args
def print_arguments(args): def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
def dynamic_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
def train(args): def train(args):
if args.use_cprof: """train in loop.
pr = cProfile.Profile() """
pr.enable()
# prediction, avg_cost, accuracy = stacked_lstmp_model(args.hidden_dim,
prediction, label, avg_cost = dynamic_lstmp_model( # args.proj_dim, args.stacked_num, class_num=1749, args.parallel)
args.hidden_dim, args.proj_dim, args.stacked_num) prediction, avg_cost, accuracy = stacked_lstmp_model(
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
parallel=args.parallel)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost) adam_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) # program for test
test_program = fluid.default_main_program().clone()
# clone from default main program with fluid.program_guard(test_program):
inference_program = fluid.default_main_program().clone() test_program = fluid.io.get_inference_program([avg_cost, accuracy])
with fluid.program_guard(inference_program):
test_accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
inference_program = fluid.io.get_inference_program(test_target)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
# @TODO datareader should take the responsibility (parsing from config file)
ltrans = [ ltrans = [
trans_add_delta.TransAddDelta(2, 2), trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
trans_splice.TransSplice() trans_splice.TransSplice()
] ]
data_reader = reader.DataReader(args.feature_lst, args.label_lst) feature_t = fluid.LoDTensor()
data_reader.set_transformers(ltrans) label_t = fluid.LoDTensor()
res_feature = fluid.LoDTensor() # validation
res_label = fluid.LoDTensor() def test(exe):
# If test data not found, return invalid cost and accuracy
if not (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)):
return -1.0, -1.0
# test data reader
test_data_reader = reader.DataReader(args.val_feature_lst,
args.val_label_lst)
test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], []
for batch_id, batch_data in enumerate(
test_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(test_program,
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
test_costs.append(lodtensor_to_ndarray(cost)[0])
test_accs.append(lodtensor_to_ndarray(acc)[0])
return np.mean(test_costs), np.mean(test_accs)
# train data reader
train_data_reader = reader.DataReader(args.train_feature_lst,
args.train_label_lst)
train_data_reader.set_transformers(ltrans)
# train
for pass_id in xrange(args.pass_num): for pass_id in xrange(args.pass_num):
pass_start_time = time.time() pass_start_time = time.time()
words_seen = 0
accuracy.reset(exe)
for batch_id, batch_data in enumerate( for batch_id, batch_data in enumerate(
data_reader.batch_iterator(args.batch_size, train_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)): args.minimum_batch_size)):
(bat_feature, bat_label, lod) = batch_data # load_data
res_feature.set(bat_feature, place) (features, labels, lod) = batch_data
res_feature.set_lod([lod]) feature_t.set(features, place)
res_label.set(bat_label, place) feature_t.set_lod([lod])
res_label.set_lod([lod]) label_t.set(labels, place)
words_seen += lod[-1] label_t.set_lod([lod])
loss, acc = exe.run(
fluid.default_main_program(), cost, acc = exe.run(fluid.default_main_program(),
feed={"feature": res_feature, feed={"feature": feature_t,
"label": res_label}, "label": label_t},
fetch_list=[avg_cost] + accuracy.metrics, fetch_list=[avg_cost, accuracy],
return_numpy=False) return_numpy=False)
train_acc = accuracy.eval(exe)
print("acc:", lodtensor_to_ndarray(loss)) if batch_id > 0 and (batch_id % args.print_per_batches == 0):
print("\nBatch %d, train cost: %f, train acc: %f" %
(batch_id, lodtensor_to_ndarray(cost)[0],
lodtensor_to_ndarray(acc)[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
# run test
val_cost, val_acc = test(exe)
# save model
if args.model_save_dir != '':
model_path = os.path.join(
args.model_save_dir, "deep_asr.pass_" + str(pass_id) + ".model")
fluid.io.save_inference_model(model_path, ["feature"],
[prediction], exe)
# cal pass time
pass_end_time = time.time() pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time time_consumed = pass_end_time - pass_start_time
words_per_sec = words_seen / time_consumed # print info at pass end
print("\nPass %d, time consumed: %f s, val cost: %f, val acc: %f\n" %
(pass_id, time_consumed, val_cost, val_acc))
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
if args.infer_only: if args.model_save_dir != '' and not os.path.exists(args.model_save_dir):
pass os.mkdir(args.model_save_dir)
else:
if args.use_nvprof and args.device == 'GPU': train(args)
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args)
else:
train(args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册