From 52cda823e3e1e76fc78a53f37e767aedff1bb43b Mon Sep 17 00:00:00 2001 From: zhxfl <291221622@qq.com> Date: Thu, 1 Feb 2018 17:20:43 +0800 Subject: [PATCH] first data load --- fluid/DeepASR/data_utils/__init__.py | 0 fluid/DeepASR/data_utils/load_data.py | 195 +++++++++++++++ fluid/DeepASR/data_utils/trans_add_delta.py | 91 +++++++ .../data_utils/trans_mean_variance_norm.py | 56 +++++ fluid/DeepASR/data_utils/trans_slit.py | 14 ++ fluid/DeepASR/data_utils/trans_splice.py | 50 ++++ fluid/DeepASR/data_utils/util.py | 26 ++ fluid/DeepASR/stacked_dynamic_lstm.py | 224 ++++++++++++++++++ fluid/DeepASR/test/test_data_trans.py | 113 +++++++++ 9 files changed, 769 insertions(+) create mode 100644 fluid/DeepASR/data_utils/__init__.py create mode 100644 fluid/DeepASR/data_utils/load_data.py create mode 100644 fluid/DeepASR/data_utils/trans_add_delta.py create mode 100644 fluid/DeepASR/data_utils/trans_mean_variance_norm.py create mode 100644 fluid/DeepASR/data_utils/trans_slit.py create mode 100644 fluid/DeepASR/data_utils/trans_splice.py create mode 100644 fluid/DeepASR/data_utils/util.py create mode 100644 fluid/DeepASR/stacked_dynamic_lstm.py create mode 100644 fluid/DeepASR/test/test_data_trans.py diff --git a/fluid/DeepASR/data_utils/__init__.py b/fluid/DeepASR/data_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fluid/DeepASR/data_utils/load_data.py b/fluid/DeepASR/data_utils/load_data.py new file mode 100644 index 00000000..834f5e70 --- /dev/null +++ b/fluid/DeepASR/data_utils/load_data.py @@ -0,0 +1,195 @@ +#by zhxfl 2018.01.24 +""" @package docstring +load speech data from disk +""" + +import random +import Queue +import numpy +import struct +import data_utils.trans_mean_variance_norm as trans_mean_variance_norm +import data_utils.trans_add_delta as trans_add_delta + +g_lblock = [] +g_que_sample = Queue.Queue() +g_nframe_dim = 120 * 11 +g_nstart_block_idx = 0 +g_nload_block_num = 1 +g_ndrop_frame_len = 256 + + +class OneBlock(object): + """ Documentation for a class. + struct for one block : + contain label, label desc, feature, feature_desc + """ + + def __init__(self): + """The constructor.""" + self.label = "" + self.label_desc = "" + self.feature = "" + self.feature_desc = "" + + +def set_trans(ltrans): + global g_ltrans + g_ltrans = ltrans + + +def load_list(sfeature_lst, slabel_lst): + """ load list """ + global g_lblock + + lFeature = open(sfeature_lst).readlines() + lLabel = open(slabel_lst).readlines() + assert len(lLabel) == len(lFeature) + for i in range(0, len(lFeature), 2): + one_block = OneBlock() + + one_block.label = lLabel[i] + one_block.label_desc = lLabel[i + 1] + one_block.feature = lFeature[i] + one_block.feature_desc = lFeature[i + 1] + g_lblock.append(one_block) + + random.shuffle(g_lblock) + + +def load_one_block(lsample, id): + """read one block""" + global g_lblock + if id >= len(g_lblock): + return + + slabel_path = g_lblock[id].label.replace("\n", "") + slabel_desc_path = g_lblock[id].label_desc.replace("\n", "") + sfeature_path = g_lblock[id].feature.replace("\n", "") + sfeature_desc_path = g_lblock[id].feature_desc.replace("\n", "") + + llabel_line = open(slabel_desc_path).readlines() + lfeature_line = open(sfeature_desc_path).readlines() + + file_lable_bin = open(slabel_path, "r") + file_feature_bin = open(sfeature_path, "r") + + sample_num = int(llabel_line[0].split()[1]) + assert sample_num == int(lfeature_line[0].split()[1]) + + llabel_line = llabel_line[1:] + lfeature_line = lfeature_line[1:] + + for i in range(sample_num): + # read label + llabel_split = llabel_line[i].split() + nlabel_start = int(llabel_split[2]) + nlabel_size = int(llabel_split[3]) + nlabel_frame_num = int(llabel_split[4]) + + file_lable_bin.seek(nlabel_start, 0) + label_bytes = file_lable_bin.read(nlabel_size) + assert nlabel_frame_num * 4 == len(label_bytes) + label_array = struct.unpack('I' * nlabel_frame_num, label_bytes) + label_data = numpy.array(label_array, dtype=int) + label_data = label_data.reshape((nlabel_frame_num, 1)) + + # read feature + lfeature_split = lfeature_line[i].split() + nfeature_start = int(lfeature_split[2]) + nfeature_size = int(lfeature_split[3]) + nfeature_frame_num = int(lfeature_split[4]) + nfeature_frame_dim = int(lfeature_split[5]) + + file_feature_bin.seek(nfeature_start, 0) + feature_bytes = file_feature_bin.read(nfeature_size) + assert nfeature_frame_num * nfeature_frame_dim * 4 == len(feature_bytes) + feature_array = struct.unpack('f' * nfeature_frame_num * + nfeature_frame_dim, feature_bytes) + feature_data = numpy.array(feature_array, dtype=float) + feature_data = feature_data.reshape( + (nfeature_frame_num, nfeature_frame_dim)) + global g_ndrop_frame_len + #drop long sentence + if g_ndrop_frame_len < feature_data.shape[0]: + continue + lsample.append((feature_data, label_data)) + + +def load_block(lblock_id): + """ + read blocks + """ + global g_ltrans + lsample = [] + for id in lblock_id: + load_one_block(lsample, id) + + # transform sample + for (nidx, sample) in enumerate(lsample): + for trans in g_ltrans: + sample = trans.perform_trans(sample) + print nidx + lsample[nidx] = sample + + return lsample + + +def move_sample(lsample): + """ + move sample to queue + """ + # random + random.shuffle(lsample) + + global g_que_sample + for sample in lsample: + g_que_sample.put(sample) + + +def get_one_batch(nbatch_size): + """ + construct one batch + """ + global g_que_sample + global g_nstart_block_idx + global g_nframe_dim + global g_nload_block_num + if g_que_sample.empty(): + lsample = load_block( + range(g_nstart_block_idx, g_nstart_block_idx + g_nload_block_num, + 1)) + move_sample(lsample) + g_nstart_block_idx += g_nload_block_num + + if g_que_sample.empty(): + g_nstart_block_idx = 0 + return None + #cal all frame num + ncur_len = 0 + lod = [0] + samples = [] + bat_feature = numpy.zeros((nbatch_size, g_nframe_dim)) + for i in range(nbatch_size): + # empty clear zero + if g_que_sample.empty(): + g_nstart_block_idx = 0 + # copy + else: + (one_feature, one_label) = g_que_sample.get() + samples.append((one_feature, one_label)) + ncur_len += one_feature.shape[0] + lod.append(ncur_len) + + bat_feature = numpy.zeros((ncur_len, g_nframe_dim), dtype="float32") + bat_label = numpy.zeros((ncur_len, 1), dtype="int64") + ncur_len = 0 + for sample in samples: + one_feature = sample[0] + one_label = sample[1] + nframe_num = one_feature.shape[0] + nstart = ncur_len + nend = ncur_len + nframe_num + bat_feature[nstart:nend, :] = one_feature + bat_label[nstart:nend, :] = one_label + ncur_len += nframe_num + return (bat_feature, bat_label, lod) diff --git a/fluid/DeepASR/data_utils/trans_add_delta.py b/fluid/DeepASR/data_utils/trans_add_delta.py new file mode 100644 index 00000000..68e094b3 --- /dev/null +++ b/fluid/DeepASR/data_utils/trans_add_delta.py @@ -0,0 +1,91 @@ +#by zhxfl 2018.01.29 +import numpy +import math +import copy + + +class TransAddDelta(object): + """ add delta of feature data + trans feature for shape(a, b) to shape(a, b * 3) + """ + + def __init__(self, norder=2, nwindow=2): + """ init construction + Args: + norder: default 2 + nwindow: default 2 + """ + self._norder = norder + self._nwindow = nwindow + + def perform_trans(self, sample): + """ add delta for feature + trans feature shape from (a,b) to (a, b * 3) + """ + (feature, label) = sample + frame_dim = feature.shape[1] + d_frame_dim = frame_dim * 3 + head_filled = 5 + tail_filled = 5 + mat = numpy.zeros( + (feature.shape[0] + head_filled + tail_filled, d_frame_dim), + dtype="float32") + #copy first frame + for i in xrange(head_filled): + numpy.copyto(mat[i, 0:frame_dim], feature[0, :]) + + numpy.copyto( + mat[head_filled:head_filled + feature.shape[0], 0:frame_dim], + feature[:, :]) + + # copy last frame + for i in xrange(head_filled + feature.shape[0], mat.shape[0], 1): + numpy.copyto(mat[i, 0:frame_dim], feature[feature.shape[0] - 1, :]) + + nframe = feature.shape[0] + start = head_filled + tmp_shape = mat.shape + mat = mat.reshape((tmp_shape[0] * tmp_shape[1])) + self._regress(mat, start * d_frame_dim, mat, + start * d_frame_dim + frame_dim, frame_dim, nframe, + d_frame_dim) + self._regress(mat, start * d_frame_dim + frame_dim, mat, + start * d_frame_dim + 2 * frame_dim, frame_dim, nframe, + d_frame_dim) + mat.shape = tmp_shape + return (mat[head_filled:mat.shape[0] - tail_filled, :], label) + + def _regress(self, data_in, start_in, data_out, start_out, size, n, step): + """ regress + Args: + data_in: in data + start_in: start index of data_in + data_out: out data + start_out: start index of data_out + size: frame dimentional + n: frame num + step: 3 * (frame num) + """ + sigma_t2 = 0.0 + delta_window = self._nwindow + for t in xrange(1, delta_window + 1): + sigma_t2 += t * t + + sigma_t2 *= 2.0 + for i in xrange(n): + fp1 = start_in + fp2 = start_out + for j in xrange(size): + back = fp1 + forw = fp1 + sum = 0.0 + for t in xrange(1, delta_window + 1): + back -= step + forw += step + sum += t * (data_in[forw] - data_in[back]) + + data_out[fp2] = sum / sigma_t2 + fp1 += 1 + fp2 += 1 + start_in += step + start_out += step diff --git a/fluid/DeepASR/data_utils/trans_mean_variance_norm.py b/fluid/DeepASR/data_utils/trans_mean_variance_norm.py new file mode 100644 index 00000000..ff609f4d --- /dev/null +++ b/fluid/DeepASR/data_utils/trans_mean_variance_norm.py @@ -0,0 +1,56 @@ +#by zhxfl 2018.01.29 +import numpy +import math + + +class TransMeanVarianceNorm(object): + """ normalization of mean variance for feature data + """ + + def __init__(self, snorm_path): + """init construction + Args: + snorm_path: the path of mean and variance + """ + self._mean = None + self._var = None + self._load_norm(snorm_path) + + def _load_norm(self, snorm_path): + """ load global mean var file + """ + lLines = open(snorm_path).readlines() + nLen = len(lLines) + self._mean = numpy.zeros((nLen), dtype="float32") + self._var = numpy.zeros((nLen), dtype="float32") + self._nLen = nLen + for nidx, l in enumerate(lLines): + s = l.split() + assert len(s) == 2 + self._mean[nidx] = float(s[0]) + self._var[nidx] = 1.0 / math.sqrt(float(s[1])) + if self._var[nidx] > 100000.0: + self._var[nidx] = 100000.0 + + def get_mean_var(self): + """ get mean and var + """ + return (self._mean, self._var) + + def perform_trans(self, sample): + """ feature = (feature - mean) * var + """ + (feature, label) = sample + shape = feature.shape + assert len(shape) == 2 + nfeature_len = shape[0] * shape[1] + assert nfeature_len % self._nLen == 0 + ncur_idx = 0 + feature = feature.reshape((nfeature_len)) + while ncur_idx < nfeature_len: + block = feature[ncur_idx:ncur_idx + self._nLen] + block = (block - self._mean) * self._var + feature[ncur_idx:ncur_idx + self._nLen] = block + ncur_idx += self._nLen + feature = feature.reshape(shape) + return (feature, label) diff --git a/fluid/DeepASR/data_utils/trans_slit.py b/fluid/DeepASR/data_utils/trans_slit.py new file mode 100644 index 00000000..07d9784c --- /dev/null +++ b/fluid/DeepASR/data_utils/trans_slit.py @@ -0,0 +1,14 @@ +#by zhxfl +import numpy +import math + + +class TransSplit(object): + """ expand feature data from shape (frame_num, frame_dim) + to shape (frame_num, frame_dim * 11) + + """ + + def __init__(self, nleft_context=5, nright_context=5): + self._nleft_context = nleft_context + self._nright_context = nright_context diff --git a/fluid/DeepASR/data_utils/trans_splice.py b/fluid/DeepASR/data_utils/trans_splice.py new file mode 100644 index 00000000..6bad9c87 --- /dev/null +++ b/fluid/DeepASR/data_utils/trans_splice.py @@ -0,0 +1,50 @@ +#by zhxfl 2018.01.31 +import numpy +import math + + +class TransSplice(object): + """ expand feature data from shape (frame_num, frame_dim) + to shape (frame_num, frame_dim * 11) + + """ + + def __init__(self, nleft_context=5, nright_context=5): + """ init construction + """ + self._nleft_context = nleft_context + self._nright_context = nright_context + + def perform_trans(self, sample): + """ splice + """ + (feature, label) = sample + nframe_num = feature.shape[0] + nframe_dim = feature.shape[1] + nnew_frame_dim = nframe_dim * ( + self._nleft_context + self._nright_context + 1) + mat = numpy.zeros( + (nframe_num + self._nleft_context + self._nright_context, + nframe_dim), + dtype="float32") + ret = numpy.zeros((nframe_num, nnew_frame_dim), dtype="float32") + + #copy left + for i in xrange(self._nleft_context): + mat[i, :] = feature[0, :] + + #copy middle + mat[self._nleft_context:self._nleft_context + + nframe_num, :] = feature[:, :] + + #copy right + for i in xrange(self._nright_context): + mat[i + self._nleft_context + nframe_num, :] = feature[-1, :] + + mat = mat.reshape(mat.shape[0] * mat.shape[1]) + ret = ret.reshape(ret.shape[0] * ret.shape[1]) + for i in xrange(nframe_num): + numpy.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim], + mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim]) + ret = ret.reshape((nframe_num, nnew_frame_dim)) + return (ret, label) diff --git a/fluid/DeepASR/data_utils/util.py b/fluid/DeepASR/data_utils/util.py new file mode 100644 index 00000000..fbc77192 --- /dev/null +++ b/fluid/DeepASR/data_utils/util.py @@ -0,0 +1,26 @@ +#by zhxfl 2018.01.31 +def to_lodtensor(data, place): + """convert tensor to lodtensor + """ + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = numpy.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def lodtensor_to_ndarray(lod_tensor): + """conver lodtensor to ndarray + """ + dims = lod_tensor.get_dims() + ret = np.zeros(shape=dims).astype('float32') + for i in xrange(np.product(dims)): + ret.ravel()[i] = lod_tensor.get_float_element(i) + return ret, lod_tensor.lod() diff --git a/fluid/DeepASR/stacked_dynamic_lstm.py b/fluid/DeepASR/stacked_dynamic_lstm.py new file mode 100644 index 00000000..4b482cfe --- /dev/null +++ b/fluid/DeepASR/stacked_dynamic_lstm.py @@ -0,0 +1,224 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import argparse +import time + +import paddle.v2 as paddle +import paddle.v2.fluid as fluid +import paddle.v2.fluid.profiler as profiler +import data_utils.load_data as load_data +import data_utils.trans_mean_variance_norm as trans_mean_variance_norm +import data_utils.trans_add_delta as trans_add_delta +import data_utils.trans_splice as trans_splice + + +def parse_args(): + parser = argparse.ArgumentParser("LSTM model benchmark.") + parser.add_argument( + '--batch_size', + type=int, + default=32, + help='The sequence number of a batch data. (default: %(default)d)') + parser.add_argument( + '--stacked_num', + type=int, + default=5, + help='Number of lstm layers to stack. (default: %(default)d)') + parser.add_argument( + '--proj_dim', + type=int, + default=512, + help='Project size of lstm unit. (default: %(default)d)') + parser.add_argument( + '--hidden_dim', + type=int, + default=1024, + help='Hidden size of lstm unit. (default: %(default)d)') + parser.add_argument( + '--pass_num', + type=int, + default=100, + help='Epoch number to train. (default: %(default)d)') + parser.add_argument( + '--learning_rate', + type=float, + default=0.002, + help='Learning rate used to train. (default: %(default)f)') + parser.add_argument( + '--device', + type=str, + default='GPU', + choices=['CPU', 'GPU'], + help='The device type. (default: %(default)s)') + parser.add_argument( + '--infer_only', action='store_true', help='If set, run forward only.') + parser.add_argument( + '--use_cprof', action='store_true', help='If set, use cProfile.') + parser.add_argument( + '--use_nvprof', + action='store_true', + help='If set, use nvprof for CUDA.') + args = parser.parse_args() + return args + + +def print_arguments(args): + vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and + vars(args)['device'] == 'GPU') + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).iteritems()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +def dynamic_lstmp_model(hidden_dim, + proj_dim, + stacked_num, + class_num=1749, + is_train=True): + feature = fluid.layers.data( + name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1) + + seq_conv1 = fluid.layers.sequence_conv( + input=feature, + num_filters=1024, + filter_size=3, + filter_stride=1, + bias_attr=True) + bn1 = fluid.layers.batch_norm( + input=seq_conv1, + act="sigmoid", + is_test=False, + momentum=0.9, + epsilon=1e-05, + data_layout='NCHW') + + stack_input = bn1 + for i in range(stacked_num): + fc = fluid.layers.fc(input=stack_input, + size=hidden_dim * 4, + bias_attr=True) + proj, cell = fluid.layers.dynamic_lstmp( + input=fc, + size=hidden_dim * 4, + proj_size=proj_dim, + bias_attr=True, + use_peepholes=True, + is_reverse=False, + cell_activation="tanh", + proj_activation="tanh") + bn = fluid.layers.batch_norm( + input=proj, + act="sigmoid", + is_test=False, + momentum=0.9, + epsilon=1e-05, + data_layout='NCHW') + stack_input = bn + + prediction = fluid.layers.fc(input=stack_input, + size=class_num, + act='softmax') + + if not is_train: return feature, prediction + + label = fluid.layers.data( + name="label", shape=[-1, 1], dtype="int64", lod_level=1) + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + + return prediction, label, avg_cost + + +def train(args): + if args.use_cprof: + pr = cProfile.Profile() + pr.enable() + + prediction, label, avg_cost = dynamic_lstmp_model( + args.hidden_dim, args.proj_dim, args.stacked_num) + + adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) + adam_optimizer.minimize(avg_cost) + + accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) + + # clone from default main program + inference_program = fluid.default_main_program().clone() + with fluid.program_guard(inference_program): + test_accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) + test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states + inference_program = fluid.io.get_inference_program(test_target) + + place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + ltrans = [ + trans_add_delta.TransAddDelta(2, 2), + trans_mean_variance_norm.TransMeanVarianceNorm( + "data/global_mean_var_search26kHr"), trans_splice.TransSplice() + ] + load_data.set_trans(ltrans) + load_data.load_list("/home/disk2/mini_speech_fbank_40/data/feature.lst", + "/home/disk2/mini_speech_fbank_40/data/label.lst") + + res_feature = fluid.LoDTensor() + res_label = fluid.LoDTensor() + for pass_id in xrange(args.pass_num): + pass_start_time = time.time() + words_seen = 0 + accuracy.reset(exe) + batch_id = 0 + while True: + # load_data + one_batch = load_data.get_one_batch(args.batch_size) + if one_batch == None: + break + (bat_feature, bat_label, lod) = one_batch + res_feature.set(bat_feature, place) + res_feature.set_lod([lod]) + res_label.set(bat_label, place) + res_label.set_lod([lod]) + + batch_id += 1 + + words_seen += lod[-1] + + loss, acc = exe.run( + fluid.default_main_program(), + feed={"feature": res_feature, + "label": res_label}, + fetch_list=[avg_cost] + accuracy.metrics, + return_numpy=False) + train_acc = accuracy.eval(exe) + print("acc:", lodtensor_to_ndarray(loss)) + + pass_end_time = time.time() + time_consumed = pass_end_time - pass_start_time + words_per_sec = words_seen / time_consumed + + +def lodtensor_to_ndarray(lod_tensor): + dims = lod_tensor.get_dims() + ret = np.zeros(shape=dims).astype('float32') + for i in xrange(np.product(dims)): + ret.ravel()[i] = lod_tensor.get_float_element(i) + return ret, lod_tensor.lod() + + +if __name__ == '__main__': + args = parse_args() + print_arguments(args) + + if args.infer_only: + pass + else: + if args.use_nvprof and args.device == 'GPU': + with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: + train(args) + else: + train(args) diff --git a/fluid/DeepASR/test/test_data_trans.py b/fluid/DeepASR/test/test_data_trans.py new file mode 100644 index 00000000..52623313 --- /dev/null +++ b/fluid/DeepASR/test/test_data_trans.py @@ -0,0 +1,113 @@ +#by zhxfl 2018.01.31 +import sys +import unittest +import numpy +sys.path.append("../") +import data_utils.trans_mean_variance_norm as trans_mean_variance_norm +import data_utils.trans_add_delta as trans_add_delta +import data_utils.trans_splice as trans_splice + + +class TestTransMeanVarianceNorm(unittest.TestCase): + """unit test for TransMeanVarianceNorm + """ + + def test(self): + feature = numpy.zeros((2, 120), dtype="float32") + feature.fill(1) + trans = trans_mean_variance_norm.TransMeanVarianceNorm( + "../data/global_mean_var_search26kHr") + (feature1, label1) = trans.perform_trans((feature, None)) + (mean, var) = trans.get_mean_var() + feature_flat1 = feature1.flatten() + feature_flat = feature.flatten() + one = numpy.ones((1), dtype="float32") + for idx, val in enumerate(feature_flat1): + cur_idx = idx % 120 + self.assertAlmostEqual(val, (one[0] - mean[cur_idx]) * var[cur_idx]) + + +class TestTransAddDelta(unittest.TestCase): + """unit test TestTransAddDelta + """ + + def test_regress(self): + """test regress + """ + feature = numpy.zeros((14, 120), dtype="float32") + feature[0:5, 0:40].fill(1) + feature[0 + 5, 0:40].fill(1) + feature[1 + 5, 0:40].fill(2) + feature[2 + 5, 0:40].fill(3) + feature[3 + 5, 0:40].fill(4) + feature[8:14, 0:40].fill(4) + trans = trans_add_delta.TransAddDelta() + feature = feature.reshape((14 * 120)) + trans._regress(feature, 5 * 120, feature, 5 * 120 + 40, 40, 4, 120) + trans._regress(feature, 5 * 120 + 40, feature, 5 * 120 + 80, 40, 4, 120) + feature = feature.reshape((14, 120)) + tmp_feature = feature[5:5 + 4, :] + self.assertAlmostEqual(1.0, tmp_feature[0][0]) + self.assertAlmostEqual(0.24, tmp_feature[0][119]) + self.assertAlmostEqual(2.0, tmp_feature[1][0]) + self.assertAlmostEqual(0.13, tmp_feature[1][119]) + self.assertAlmostEqual(3.0, tmp_feature[2][0]) + self.assertAlmostEqual(-0.13, tmp_feature[2][119]) + self.assertAlmostEqual(4.0, tmp_feature[3][0]) + self.assertAlmostEqual(-0.24, tmp_feature[3][119]) + + def test_perform(self): + """test perform + """ + feature = numpy.zeros((4, 40), dtype="float32") + feature[0, 0:40].fill(1) + feature[1, 0:40].fill(2) + feature[2, 0:40].fill(3) + feature[3, 0:40].fill(4) + trans = trans_add_delta.TransAddDelta() + (feature, label) = trans.perform_trans((feature, None)) + self.assertAlmostEqual(feature.shape[0], 4) + self.assertAlmostEqual(feature.shape[1], 120) + self.assertAlmostEqual(1.0, feature[0][0]) + self.assertAlmostEqual(0.24, feature[0][119]) + self.assertAlmostEqual(2.0, feature[1][0]) + self.assertAlmostEqual(0.13, feature[1][119]) + self.assertAlmostEqual(3.0, feature[2][0]) + self.assertAlmostEqual(-0.13, feature[2][119]) + self.assertAlmostEqual(4.0, feature[3][0]) + self.assertAlmostEqual(-0.24, feature[3][119]) + + +class TestTransSplict(unittest.TestCase): + """unit test Test TransSplict + """ + + def test_perfrom(self): + feature = numpy.zeros((8, 10), dtype="float32") + for i in xrange(feature.shape[0]): + feature[i, :].fill(i) + + trans = trans_splice.TransSplice() + (feature, label) = trans.perform_trans((feature, None)) + self.assertEqual(feature.shape[1], 110) + + for i in xrange(8): + nzero_num = 5 - i + cur_val = 0.0 + if nzero_num < 0: + cur_val = i - 5 - 1 + for j in xrange(11): + if j <= nzero_num: + for k in xrange(10): + self.assertAlmostEqual(feature[i][j * 10 + k], cur_val) + else: + if cur_val < 7: + cur_val += 1.0 + for k in xrange(10): + print i, j, k + print feature[i].reshape(11, 10) + self.assertAlmostEqual(feature[i][j * 10 + k], cur_val) + + +if __name__ == '__main__': + unittest.main() -- GitLab