提交 52cda823 编写于 作者: Z zhxfl

first data load

上级 124afdb7
#by zhxfl 2018.01.24
""" @package docstring
load speech data from disk
"""
import random
import Queue
import numpy
import struct
import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.trans_add_delta as trans_add_delta
g_lblock = []
g_que_sample = Queue.Queue()
g_nframe_dim = 120 * 11
g_nstart_block_idx = 0
g_nload_block_num = 1
g_ndrop_frame_len = 256
class OneBlock(object):
""" Documentation for a class.
struct for one block :
contain label, label desc, feature, feature_desc
"""
def __init__(self):
"""The constructor."""
self.label = ""
self.label_desc = ""
self.feature = ""
self.feature_desc = ""
def set_trans(ltrans):
global g_ltrans
g_ltrans = ltrans
def load_list(sfeature_lst, slabel_lst):
""" load list """
global g_lblock
lFeature = open(sfeature_lst).readlines()
lLabel = open(slabel_lst).readlines()
assert len(lLabel) == len(lFeature)
for i in range(0, len(lFeature), 2):
one_block = OneBlock()
one_block.label = lLabel[i]
one_block.label_desc = lLabel[i + 1]
one_block.feature = lFeature[i]
one_block.feature_desc = lFeature[i + 1]
g_lblock.append(one_block)
random.shuffle(g_lblock)
def load_one_block(lsample, id):
"""read one block"""
global g_lblock
if id >= len(g_lblock):
return
slabel_path = g_lblock[id].label.replace("\n", "")
slabel_desc_path = g_lblock[id].label_desc.replace("\n", "")
sfeature_path = g_lblock[id].feature.replace("\n", "")
sfeature_desc_path = g_lblock[id].feature_desc.replace("\n", "")
llabel_line = open(slabel_desc_path).readlines()
lfeature_line = open(sfeature_desc_path).readlines()
file_lable_bin = open(slabel_path, "r")
file_feature_bin = open(sfeature_path, "r")
sample_num = int(llabel_line[0].split()[1])
assert sample_num == int(lfeature_line[0].split()[1])
llabel_line = llabel_line[1:]
lfeature_line = lfeature_line[1:]
for i in range(sample_num):
# read label
llabel_split = llabel_line[i].split()
nlabel_start = int(llabel_split[2])
nlabel_size = int(llabel_split[3])
nlabel_frame_num = int(llabel_split[4])
file_lable_bin.seek(nlabel_start, 0)
label_bytes = file_lable_bin.read(nlabel_size)
assert nlabel_frame_num * 4 == len(label_bytes)
label_array = struct.unpack('I' * nlabel_frame_num, label_bytes)
label_data = numpy.array(label_array, dtype=int)
label_data = label_data.reshape((nlabel_frame_num, 1))
# read feature
lfeature_split = lfeature_line[i].split()
nfeature_start = int(lfeature_split[2])
nfeature_size = int(lfeature_split[3])
nfeature_frame_num = int(lfeature_split[4])
nfeature_frame_dim = int(lfeature_split[5])
file_feature_bin.seek(nfeature_start, 0)
feature_bytes = file_feature_bin.read(nfeature_size)
assert nfeature_frame_num * nfeature_frame_dim * 4 == len(feature_bytes)
feature_array = struct.unpack('f' * nfeature_frame_num *
nfeature_frame_dim, feature_bytes)
feature_data = numpy.array(feature_array, dtype=float)
feature_data = feature_data.reshape(
(nfeature_frame_num, nfeature_frame_dim))
global g_ndrop_frame_len
#drop long sentence
if g_ndrop_frame_len < feature_data.shape[0]:
continue
lsample.append((feature_data, label_data))
def load_block(lblock_id):
"""
read blocks
"""
global g_ltrans
lsample = []
for id in lblock_id:
load_one_block(lsample, id)
# transform sample
for (nidx, sample) in enumerate(lsample):
for trans in g_ltrans:
sample = trans.perform_trans(sample)
print nidx
lsample[nidx] = sample
return lsample
def move_sample(lsample):
"""
move sample to queue
"""
# random
random.shuffle(lsample)
global g_que_sample
for sample in lsample:
g_que_sample.put(sample)
def get_one_batch(nbatch_size):
"""
construct one batch
"""
global g_que_sample
global g_nstart_block_idx
global g_nframe_dim
global g_nload_block_num
if g_que_sample.empty():
lsample = load_block(
range(g_nstart_block_idx, g_nstart_block_idx + g_nload_block_num,
1))
move_sample(lsample)
g_nstart_block_idx += g_nload_block_num
if g_que_sample.empty():
g_nstart_block_idx = 0
return None
#cal all frame num
ncur_len = 0
lod = [0]
samples = []
bat_feature = numpy.zeros((nbatch_size, g_nframe_dim))
for i in range(nbatch_size):
# empty clear zero
if g_que_sample.empty():
g_nstart_block_idx = 0
# copy
else:
(one_feature, one_label) = g_que_sample.get()
samples.append((one_feature, one_label))
ncur_len += one_feature.shape[0]
lod.append(ncur_len)
bat_feature = numpy.zeros((ncur_len, g_nframe_dim), dtype="float32")
bat_label = numpy.zeros((ncur_len, 1), dtype="int64")
ncur_len = 0
for sample in samples:
one_feature = sample[0]
one_label = sample[1]
nframe_num = one_feature.shape[0]
nstart = ncur_len
nend = ncur_len + nframe_num
bat_feature[nstart:nend, :] = one_feature
bat_label[nstart:nend, :] = one_label
ncur_len += nframe_num
return (bat_feature, bat_label, lod)
#by zhxfl 2018.01.29
import numpy
import math
import copy
class TransAddDelta(object):
""" add delta of feature data
trans feature for shape(a, b) to shape(a, b * 3)
"""
def __init__(self, norder=2, nwindow=2):
""" init construction
Args:
norder: default 2
nwindow: default 2
"""
self._norder = norder
self._nwindow = nwindow
def perform_trans(self, sample):
""" add delta for feature
trans feature shape from (a,b) to (a, b * 3)
"""
(feature, label) = sample
frame_dim = feature.shape[1]
d_frame_dim = frame_dim * 3
head_filled = 5
tail_filled = 5
mat = numpy.zeros(
(feature.shape[0] + head_filled + tail_filled, d_frame_dim),
dtype="float32")
#copy first frame
for i in xrange(head_filled):
numpy.copyto(mat[i, 0:frame_dim], feature[0, :])
numpy.copyto(
mat[head_filled:head_filled + feature.shape[0], 0:frame_dim],
feature[:, :])
# copy last frame
for i in xrange(head_filled + feature.shape[0], mat.shape[0], 1):
numpy.copyto(mat[i, 0:frame_dim], feature[feature.shape[0] - 1, :])
nframe = feature.shape[0]
start = head_filled
tmp_shape = mat.shape
mat = mat.reshape((tmp_shape[0] * tmp_shape[1]))
self._regress(mat, start * d_frame_dim, mat,
start * d_frame_dim + frame_dim, frame_dim, nframe,
d_frame_dim)
self._regress(mat, start * d_frame_dim + frame_dim, mat,
start * d_frame_dim + 2 * frame_dim, frame_dim, nframe,
d_frame_dim)
mat.shape = tmp_shape
return (mat[head_filled:mat.shape[0] - tail_filled, :], label)
def _regress(self, data_in, start_in, data_out, start_out, size, n, step):
""" regress
Args:
data_in: in data
start_in: start index of data_in
data_out: out data
start_out: start index of data_out
size: frame dimentional
n: frame num
step: 3 * (frame num)
"""
sigma_t2 = 0.0
delta_window = self._nwindow
for t in xrange(1, delta_window + 1):
sigma_t2 += t * t
sigma_t2 *= 2.0
for i in xrange(n):
fp1 = start_in
fp2 = start_out
for j in xrange(size):
back = fp1
forw = fp1
sum = 0.0
for t in xrange(1, delta_window + 1):
back -= step
forw += step
sum += t * (data_in[forw] - data_in[back])
data_out[fp2] = sum / sigma_t2
fp1 += 1
fp2 += 1
start_in += step
start_out += step
#by zhxfl 2018.01.29
import numpy
import math
class TransMeanVarianceNorm(object):
""" normalization of mean variance for feature data
"""
def __init__(self, snorm_path):
"""init construction
Args:
snorm_path: the path of mean and variance
"""
self._mean = None
self._var = None
self._load_norm(snorm_path)
def _load_norm(self, snorm_path):
""" load global mean var file
"""
lLines = open(snorm_path).readlines()
nLen = len(lLines)
self._mean = numpy.zeros((nLen), dtype="float32")
self._var = numpy.zeros((nLen), dtype="float32")
self._nLen = nLen
for nidx, l in enumerate(lLines):
s = l.split()
assert len(s) == 2
self._mean[nidx] = float(s[0])
self._var[nidx] = 1.0 / math.sqrt(float(s[1]))
if self._var[nidx] > 100000.0:
self._var[nidx] = 100000.0
def get_mean_var(self):
""" get mean and var
"""
return (self._mean, self._var)
def perform_trans(self, sample):
""" feature = (feature - mean) * var
"""
(feature, label) = sample
shape = feature.shape
assert len(shape) == 2
nfeature_len = shape[0] * shape[1]
assert nfeature_len % self._nLen == 0
ncur_idx = 0
feature = feature.reshape((nfeature_len))
while ncur_idx < nfeature_len:
block = feature[ncur_idx:ncur_idx + self._nLen]
block = (block - self._mean) * self._var
feature[ncur_idx:ncur_idx + self._nLen] = block
ncur_idx += self._nLen
feature = feature.reshape(shape)
return (feature, label)
#by zhxfl
import numpy
import math
class TransSplit(object):
""" expand feature data from shape (frame_num, frame_dim)
to shape (frame_num, frame_dim * 11)
"""
def __init__(self, nleft_context=5, nright_context=5):
self._nleft_context = nleft_context
self._nright_context = nright_context
#by zhxfl 2018.01.31
import numpy
import math
class TransSplice(object):
""" expand feature data from shape (frame_num, frame_dim)
to shape (frame_num, frame_dim * 11)
"""
def __init__(self, nleft_context=5, nright_context=5):
""" init construction
"""
self._nleft_context = nleft_context
self._nright_context = nright_context
def perform_trans(self, sample):
""" splice
"""
(feature, label) = sample
nframe_num = feature.shape[0]
nframe_dim = feature.shape[1]
nnew_frame_dim = nframe_dim * (
self._nleft_context + self._nright_context + 1)
mat = numpy.zeros(
(nframe_num + self._nleft_context + self._nright_context,
nframe_dim),
dtype="float32")
ret = numpy.zeros((nframe_num, nnew_frame_dim), dtype="float32")
#copy left
for i in xrange(self._nleft_context):
mat[i, :] = feature[0, :]
#copy middle
mat[self._nleft_context:self._nleft_context +
nframe_num, :] = feature[:, :]
#copy right
for i in xrange(self._nright_context):
mat[i + self._nleft_context + nframe_num, :] = feature[-1, :]
mat = mat.reshape(mat.shape[0] * mat.shape[1])
ret = ret.reshape(ret.shape[0] * ret.shape[1])
for i in xrange(nframe_num):
numpy.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim],
mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim])
ret = ret.reshape((nframe_num, nnew_frame_dim))
return (ret, label)
#by zhxfl 2018.01.31
def to_lodtensor(data, place):
"""convert tensor to lodtensor
"""
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def lodtensor_to_ndarray(lod_tensor):
"""conver lodtensor to ndarray
"""
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import argparse
import time
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import data_utils.load_data as load_data
import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.trans_add_delta as trans_add_delta
import data_utils.trans_splice as trans_splice
def parse_args():
parser = argparse.ArgumentParser("LSTM model benchmark.")
parser.add_argument(
'--batch_size',
type=int,
default=32,
help='The sequence number of a batch data. (default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
default=5,
help='Number of lstm layers to stack. (default: %(default)d)')
parser.add_argument(
'--proj_dim',
type=int,
default=512,
help='Project size of lstm unit. (default: %(default)d)')
parser.add_argument(
'--hidden_dim',
type=int,
default=1024,
help='Hidden size of lstm unit. (default: %(default)d)')
parser.add_argument(
'--pass_num',
type=int,
default=100,
help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
default=0.002,
help='Learning rate used to train. (default: %(default)f)')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
args = parser.parse_args()
return args
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def dynamic_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num=1749,
is_train=True):
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=False,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
if not is_train: return feature, prediction
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return prediction, label, avg_cost
def train(args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
prediction, label, avg_cost = dynamic_lstmp_model(
args.hidden_dim, args.proj_dim, args.stacked_num)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
# clone from default main program
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
test_accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
inference_program = fluid.io.get_inference_program(test_target)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(
"data/global_mean_var_search26kHr"), trans_splice.TransSplice()
]
load_data.set_trans(ltrans)
load_data.load_list("/home/disk2/mini_speech_fbank_40/data/feature.lst",
"/home/disk2/mini_speech_fbank_40/data/label.lst")
res_feature = fluid.LoDTensor()
res_label = fluid.LoDTensor()
for pass_id in xrange(args.pass_num):
pass_start_time = time.time()
words_seen = 0
accuracy.reset(exe)
batch_id = 0
while True:
# load_data
one_batch = load_data.get_one_batch(args.batch_size)
if one_batch == None:
break
(bat_feature, bat_label, lod) = one_batch
res_feature.set(bat_feature, place)
res_feature.set_lod([lod])
res_label.set(bat_label, place)
res_label.set_lod([lod])
batch_id += 1
words_seen += lod[-1]
loss, acc = exe.run(
fluid.default_main_program(),
feed={"feature": res_feature,
"label": res_label},
fetch_list=[avg_cost] + accuracy.metrics,
return_numpy=False)
train_acc = accuracy.eval(exe)
print("acc:", lodtensor_to_ndarray(loss))
pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time
words_per_sec = words_seen / time_consumed
def lodtensor_to_ndarray(lod_tensor):
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.infer_only:
pass
else:
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(args)
else:
train(args)
#by zhxfl 2018.01.31
import sys
import unittest
import numpy
sys.path.append("../")
import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.trans_add_delta as trans_add_delta
import data_utils.trans_splice as trans_splice
class TestTransMeanVarianceNorm(unittest.TestCase):
"""unit test for TransMeanVarianceNorm
"""
def test(self):
feature = numpy.zeros((2, 120), dtype="float32")
feature.fill(1)
trans = trans_mean_variance_norm.TransMeanVarianceNorm(
"../data/global_mean_var_search26kHr")
(feature1, label1) = trans.perform_trans((feature, None))
(mean, var) = trans.get_mean_var()
feature_flat1 = feature1.flatten()
feature_flat = feature.flatten()
one = numpy.ones((1), dtype="float32")
for idx, val in enumerate(feature_flat1):
cur_idx = idx % 120
self.assertAlmostEqual(val, (one[0] - mean[cur_idx]) * var[cur_idx])
class TestTransAddDelta(unittest.TestCase):
"""unit test TestTransAddDelta
"""
def test_regress(self):
"""test regress
"""
feature = numpy.zeros((14, 120), dtype="float32")
feature[0:5, 0:40].fill(1)
feature[0 + 5, 0:40].fill(1)
feature[1 + 5, 0:40].fill(2)
feature[2 + 5, 0:40].fill(3)
feature[3 + 5, 0:40].fill(4)
feature[8:14, 0:40].fill(4)
trans = trans_add_delta.TransAddDelta()
feature = feature.reshape((14 * 120))
trans._regress(feature, 5 * 120, feature, 5 * 120 + 40, 40, 4, 120)
trans._regress(feature, 5 * 120 + 40, feature, 5 * 120 + 80, 40, 4, 120)
feature = feature.reshape((14, 120))
tmp_feature = feature[5:5 + 4, :]
self.assertAlmostEqual(1.0, tmp_feature[0][0])
self.assertAlmostEqual(0.24, tmp_feature[0][119])
self.assertAlmostEqual(2.0, tmp_feature[1][0])
self.assertAlmostEqual(0.13, tmp_feature[1][119])
self.assertAlmostEqual(3.0, tmp_feature[2][0])
self.assertAlmostEqual(-0.13, tmp_feature[2][119])
self.assertAlmostEqual(4.0, tmp_feature[3][0])
self.assertAlmostEqual(-0.24, tmp_feature[3][119])
def test_perform(self):
"""test perform
"""
feature = numpy.zeros((4, 40), dtype="float32")
feature[0, 0:40].fill(1)
feature[1, 0:40].fill(2)
feature[2, 0:40].fill(3)
feature[3, 0:40].fill(4)
trans = trans_add_delta.TransAddDelta()
(feature, label) = trans.perform_trans((feature, None))
self.assertAlmostEqual(feature.shape[0], 4)
self.assertAlmostEqual(feature.shape[1], 120)
self.assertAlmostEqual(1.0, feature[0][0])
self.assertAlmostEqual(0.24, feature[0][119])
self.assertAlmostEqual(2.0, feature[1][0])
self.assertAlmostEqual(0.13, feature[1][119])
self.assertAlmostEqual(3.0, feature[2][0])
self.assertAlmostEqual(-0.13, feature[2][119])
self.assertAlmostEqual(4.0, feature[3][0])
self.assertAlmostEqual(-0.24, feature[3][119])
class TestTransSplict(unittest.TestCase):
"""unit test Test TransSplict
"""
def test_perfrom(self):
feature = numpy.zeros((8, 10), dtype="float32")
for i in xrange(feature.shape[0]):
feature[i, :].fill(i)
trans = trans_splice.TransSplice()
(feature, label) = trans.perform_trans((feature, None))
self.assertEqual(feature.shape[1], 110)
for i in xrange(8):
nzero_num = 5 - i
cur_val = 0.0
if nzero_num < 0:
cur_val = i - 5 - 1
for j in xrange(11):
if j <= nzero_num:
for k in xrange(10):
self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
else:
if cur_val < 7:
cur_val += 1.0
for k in xrange(10):
print i, j, k
print feature[i].reshape(11, 10)
self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册