未验证 提交 93756632 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge with develop branch (#1056)

上级 c19f7ac1
...@@ -25,16 +25,6 @@ def to_lodtensor(data, place): ...@@ -25,16 +25,6 @@ def to_lodtensor(data, place):
return res return res
def lodtensor_to_ndarray(lod_tensor):
"""conver lodtensor to ndarray
"""
dims = lod_tensor._get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
def split_infer_result(infer_seq, lod): def split_infer_result(infer_seq, lod):
infer_batch = [] infer_batch = []
for i in xrange(0, len(lod[0]) - 1): for i in xrange(0, len(lod[0]) - 1):
......
ThreadPool
build
post_latgen_faster_mapped.so
pybind11
aux.tar.gz
aux
data
checkpoints
...@@ -7,7 +7,8 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \ ...@@ -7,7 +7,8 @@ python -u ../../train.py --train_feature_lst data/train_feature.lst \
--checkpoints checkpoints \ --checkpoints checkpoints \
--frame_dim 80 \ --frame_dim 80 \
--class_num 3040 \ --class_num 3040 \
--print_per_batches 100 \
--infer_models '' \ --infer_models '' \
--batch_size 64 \ --batch_size 16 \
--learning_rate 6.4e-5 \ --learning_rate 6.4e-5 \
--parallel --parallel
...@@ -5,14 +5,16 @@ from __future__ import print_function ...@@ -5,14 +5,16 @@ from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
def stacked_lstmp_model(frame_dim, def stacked_lstmp_model(feature,
label,
hidden_dim, hidden_dim,
proj_dim, proj_dim,
stacked_num, stacked_num,
class_num, class_num,
parallel=False, parallel=False,
is_train=True): is_train=True):
""" The model for DeepASR. The main structure is composed of stacked """
The model for DeepASR. The main structure is composed of stacked
identical LSTMP (LSTM with recurrent projection) layers. identical LSTMP (LSTM with recurrent projection) layers.
When running in training and validation phase, the feeding dictionary When running in training and validation phase, the feeding dictionary
...@@ -28,9 +30,6 @@ def stacked_lstmp_model(frame_dim, ...@@ -28,9 +30,6 @@ def stacked_lstmp_model(frame_dim,
is_train(bool): Run in training phase or not, default `True`. is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes. class_dim(int): The number of output classes.
""" """
# network configuration
def _net_conf(feature, label):
conv1 = fluid.layers.conv2d( conv1 = fluid.layers.conv2d(
input=feature, input=feature,
num_filters=32, num_filters=32,
...@@ -73,35 +72,3 @@ def stacked_lstmp_model(frame_dim, ...@@ -73,35 +72,3 @@ def stacked_lstmp_model(frame_dim,
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label) acc = fluid.layers.accuracy(input=prediction, label=label)
return prediction, avg_cost, acc return prediction, avg_cost, acc
# data feeder
feature = fluid.layers.data(
name="feature",
shape=[-1, 3, 11, frame_dim],
dtype="float32",
lod_level=1)
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
if parallel:
# When the execution place is specified to CUDAPlace, the program will
# run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will
# run on all CPU devices.
places = fluid.layers.device.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
feat_ = pd.read_input(feature)
label_ = pd.read_input(label)
prediction, avg_cost, acc = _net_conf(feat_, label_)
for out in [prediction, avg_cost, acc]:
pd.write_output(out)
# get mean loss and acc through every devices.
prediction, avg_cost, acc = pd()
prediction.stop_gradient = True
avg_cost = fluid.layers.mean(x=avg_cost)
acc = fluid.layers.mean(x=acc)
else:
prediction, avg_cost, acc = _net_conf(feature, label)
return prediction, avg_cost, acc
...@@ -14,7 +14,6 @@ import data_utils.augmentor.trans_add_delta as trans_add_delta ...@@ -14,7 +14,6 @@ import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice import data_utils.augmentor.trans_splice as trans_splice
import data_utils.augmentor.trans_delay as trans_delay import data_utils.augmentor.trans_delay as trans_delay
import data_utils.async_data_reader as reader import data_utils.async_data_reader as reader
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model from model_utils.model import stacked_lstmp_model
...@@ -24,7 +23,8 @@ def parse_args(): ...@@ -24,7 +23,8 @@ def parse_args():
'--batch_size', '--batch_size',
type=int, type=int,
default=32, default=32,
help='The sequence number of a batch data. (default: %(default)d)') help='The sequence number of a batch data. Batch size per GPU. (default: %(default)d)'
)
parser.add_argument( parser.add_argument(
'--minimum_batch_size', '--minimum_batch_size',
type=int, type=int,
...@@ -147,18 +147,26 @@ def train(args): ...@@ -147,18 +147,26 @@ def train(args):
if args.infer_models != '' and not os.path.exists(args.infer_models): if args.infer_models != '' and not os.path.exists(args.infer_models):
os.mkdir(args.infer_models) os.mkdir(args.infer_models)
train_program = fluid.Program()
train_startup = fluid.Program()
with fluid.program_guard(train_program, train_startup):
with fluid.unique_name.guard():
py_train_reader = fluid.layers.py_reader(
capacity=10,
shapes=([-1, 3, 11, args.frame_dim], [-1, 1]),
dtypes=['float32', 'int64'],
lod_levels=[1, 1],
name='train_reader')
feature, label = fluid.layers.read_file(py_train_reader)
prediction, avg_cost, accuracy = stacked_lstmp_model( prediction, avg_cost, accuracy = stacked_lstmp_model(
frame_dim=args.frame_dim, feature=feature,
label=label,
hidden_dim=args.hidden_dim, hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim, proj_dim=args.proj_dim,
stacked_num=args.stacked_num, stacked_num=args.stacked_num,
class_num=args.class_num, class_num=args.class_num)
parallel=args.parallel) # optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9)
# program for test
test_program = fluid.default_main_program().clone()
#optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9)
optimizer = fluid.optimizer.Adam( optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay( learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
...@@ -166,10 +174,45 @@ def train(args): ...@@ -166,10 +174,45 @@ def train(args):
decay_rate=1 / 1.2, decay_rate=1 / 1.2,
staircase=True)) staircase=True))
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
fluid.memory_optimize(train_program)
test_program = fluid.Program()
test_startup = fluid.Program()
with fluid.program_guard(test_program, test_startup):
with fluid.unique_name.guard():
py_test_reader = fluid.layers.py_reader(
capacity=10,
shapes=([-1, 3, 11, args.frame_dim], [-1, 1]),
dtypes=['float32', 'int64'],
lod_levels=[1, 1],
name='test_reader')
feature, label = fluid.layers.read_file(py_test_reader)
prediction, avg_cost, accuracy = stacked_lstmp_model(
feature=feature,
label=label,
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=args.class_num)
test_program = test_program.clone(for_test=True)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(train_startup)
exe.run(test_startup)
if args.parallel:
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 10
train_exe = fluid.ParallelExecutor(
use_cuda=(args.device == 'GPU'),
loss_name=avg_cost.name,
exec_strategy=exec_strategy,
main_program=train_program)
test_exe = fluid.ParallelExecutor(
use_cuda=(args.device == 'GPU'),
main_program=test_program,
exec_strategy=exec_strategy,
share_vars_from=train_exe)
# resume training if initial model provided. # resume training if initial model provided.
if args.init_model_path is not None: if args.init_model_path is not None:
...@@ -181,15 +224,24 @@ def train(args): ...@@ -181,15 +224,24 @@ def train(args):
trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5)
] ]
feature_t = fluid.LoDTensor() # bind train_reader
label_t = fluid.LoDTensor() train_data_reader = reader.AsyncDataReader(
args.train_feature_lst,
args.train_label_lst,
-1,
split_sentence_threshold=1024)
# validation train_data_reader.set_transformers(ltrans)
def test(exe):
# If test data not found, return invalid cost and accuracy def train_data_provider():
if not (os.path.exists(args.val_feature_lst) and for data in train_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size):
yield batch_data_to_lod_tensors(args, data, fluid.CPUPlace())
py_train_reader.decorate_tensor_provider(train_data_provider)
if (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)): os.path.exists(args.val_label_lst)):
return -1.0, -1.0
# test data reader # test data reader
test_data_reader = reader.AsyncDataReader( test_data_reader = reader.AsyncDataReader(
args.val_feature_lst, args.val_feature_lst,
...@@ -197,86 +249,101 @@ def train(args): ...@@ -197,86 +249,101 @@ def train(args):
-1, -1,
split_sentence_threshold=1024) split_sentence_threshold=1024)
test_data_reader.set_transformers(ltrans) test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], []
for batch_id, batch_data in enumerate(
test_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod, _) = batch_data
features = np.reshape(features, (-1, 11, 3, args.frame_dim))
features = np.transpose(features, (0, 2, 1, 3))
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(test_program, def test_data_provider():
feed={"feature": feature_t, for data in test_data_reader.batch_iterator(
"label": label_t}, args.batch_size, args.minimum_batch_size):
yield batch_data_to_lod_tensors(args, data, fluid.CPUPlace())
py_test_reader.decorate_tensor_provider(test_data_provider)
# validation
def test(exe):
# If test data not found, return invalid cost and accuracy
if not (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)):
return -1.0, -1.0
batch_id = 0
test_costs = []
test_accs = []
while True:
if batch_id == 0:
py_test_reader.start()
try:
if args.parallel:
cost, acc = exe.run(
fetch_list=[avg_cost.name, accuracy.name],
return_numpy=False)
else:
cost, acc = exe.run(program=test_program,
fetch_list=[avg_cost, accuracy], fetch_list=[avg_cost, accuracy],
return_numpy=False) return_numpy=False)
test_costs.append(lodtensor_to_ndarray(cost)[0]) sys.stdout.write('.')
test_accs.append(lodtensor_to_ndarray(acc)[0]) sys.stdout.flush()
test_costs.append(np.array(cost)[0])
test_accs.append(np.array(acc)[0])
batch_id += 1
except fluid.core.EOFException:
py_test_reader.reset()
break
return np.mean(test_costs), np.mean(test_accs) return np.mean(test_costs), np.mean(test_accs)
# train data reader
train_data_reader = reader.AsyncDataReader(
args.train_feature_lst,
args.train_label_lst,
-1,
split_sentence_threshold=1024)
train_data_reader.set_transformers(ltrans)
# train # train
for pass_id in xrange(args.pass_num): for pass_id in xrange(args.pass_num):
pass_start_time = time.time() pass_start_time = time.time()
for batch_id, batch_data in enumerate( batch_id = 0
train_data_reader.batch_iterator(args.batch_size, while True:
args.minimum_batch_size)): if batch_id == 0:
# load_data py_train_reader.start()
(features, labels, lod, name_lst) = batch_data
features = np.reshape(features, (-1, 11, 3, args.frame_dim))
features = np.transpose(features, (0, 2, 1, 3))
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0) to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0)
outs = exe.run(fluid.default_main_program(), try:
feed={"feature": feature_t, if args.parallel:
"label": label_t}, outs = train_exe.run(
fetch_list=[avg_cost, accuracy] if to_print else [], fetch_list=[avg_cost.name, accuracy.name]
if to_print else [],
return_numpy=False)
else:
outs = exe.run(program=train_program,
fetch_list=[avg_cost, accuracy]
if to_print else [],
return_numpy=False) return_numpy=False)
except fluid.core.EOFException:
py_train_reader.reset()
break
if to_print: if to_print:
if args.parallel:
print("\nBatch %d, train cost: %f, train acc: %f" % print("\nBatch %d, train cost: %f, train acc: %f" %
(batch_id, lodtensor_to_ndarray(outs[0])[0], (batch_id, np.mean(outs[0]), np.mean(outs[1])))
lodtensor_to_ndarray(outs[1])[0])) else:
print("\nBatch %d, train cost: %f, train acc: %f" % (
batch_id, np.array(outs[0])[0], np.array(outs[1])[0]))
# save the latest checkpoint # save the latest checkpoint
if args.checkpoints != '': if args.checkpoints != '':
model_path = os.path.join(args.checkpoints, model_path = os.path.join(args.checkpoints,
"deep_asr.latest.checkpoint") "deep_asr.latest.checkpoint")
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path, train_program)
else: else:
sys.stdout.write('.') sys.stdout.write('.')
sys.stdout.flush() sys.stdout.flush()
batch_id += 1
# run test # run test
val_cost, val_acc = test(exe) val_cost, val_acc = test(test_exe if args.parallel else exe)
# save checkpoint per pass # save checkpoint per pass
if args.checkpoints != '': if args.checkpoints != '':
model_path = os.path.join( model_path = os.path.join(
args.checkpoints, args.checkpoints,
"deep_asr.pass_" + str(pass_id) + ".checkpoint") "deep_asr.pass_" + str(pass_id) + ".checkpoint")
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path, train_program)
# save inference model # save inference model
if args.infer_models != '': if args.infer_models != '':
model_path = os.path.join( model_path = os.path.join(
args.infer_models, args.infer_models,
"deep_asr.pass_" + str(pass_id) + ".infer.model") "deep_asr.pass_" + str(pass_id) + ".infer.model")
fluid.io.save_inference_model(model_path, ["feature"], fluid.io.save_inference_model(model_path, ["feature"],
[prediction], exe) [prediction], exe, train_program)
# cal pass time # cal pass time
pass_end_time = time.time() pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time time_consumed = pass_end_time - pass_start_time
...@@ -285,6 +352,19 @@ def train(args): ...@@ -285,6 +352,19 @@ def train(args):
(pass_id, time_consumed, val_cost, val_acc)) (pass_id, time_consumed, val_cost, val_acc))
def batch_data_to_lod_tensors(args, batch_data, place):
features, labels, lod, name_lst = batch_data
features = np.reshape(features, (-1, 11, 3, args.frame_dim))
features = np.transpose(features, (0, 2, 1, 3))
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
return feature_t, label_t
if __name__ == '__main__': if __name__ == '__main__':
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册