diff --git a/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py b/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py index 9f76a9f8590d5f148398c4ffaff77dc95421df83..6b18f3fa5958a9e44899b39b1f583311f186f72e 100644 --- a/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py +++ b/fluid/DeepASR/data_utils/augmentor/tests/test_data_trans.py @@ -8,6 +8,7 @@ import numpy as np import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_splice as trans_splice +import data_utils.augmentor.trans_delay as trans_delay class TestTransMeanVarianceNorm(unittest.TestCase): @@ -112,5 +113,24 @@ class TestTransSplict(unittest.TestCase): self.assertAlmostEqual(feature[i][j * 10 + k], cur_val) +class TestTransDelay(unittest.TestCase): + """unittest TransDelay + """ + + def test_perform(self): + label = np.zeros((10, 1), dtype="int64") + for i in xrange(10): + label[i][0] = i + + trans = trans_delay.TransDelay(5) + (_, label, _) = trans.perform_trans((None, label, None)) + + for i in xrange(5): + self.assertAlmostEqual(label[i + 5][0], i) + + for i in xrange(5): + self.assertAlmostEqual(label[i][0], 0) + + if __name__ == '__main__': unittest.main() diff --git a/fluid/DeepASR/data_utils/augmentor/trans_delay.py b/fluid/DeepASR/data_utils/augmentor/trans_delay.py new file mode 100644 index 0000000000000000000000000000000000000000..b782498edfd5443806a6c80e3b4fe91b8e2b1cc9 --- /dev/null +++ b/fluid/DeepASR/data_utils/augmentor/trans_delay.py @@ -0,0 +1,37 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import math + + +class TransDelay(object): + """ Delay label, and copy first label value in the front. + Attributes: + _delay_time : the delay frame num of label + """ + + def __init__(self, delay_time): + """init construction + Args: + delay_time : the delay frame num of label + """ + self._delay_time = delay_time + + def perform_trans(self, sample): + """ + Args: + sample(object):input sample, contain feature numpy and label numpy, sample name list + Returns: + (feature, label, name) + """ + (feature, label, name) = sample + + shape = label.shape + assert len(shape) == 2 + label[self._delay_time:shape[0]] = label[0:shape[0] - self._delay_time] + for i in xrange(self._delay_time): + label[i][0] = label[self._delay_time][0] + + return (feature, label, name) diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py index 4a4073c02279bfd74b8ce31d0877a5338400d93b..831581924e357d0122860de3ff12676adae43bdf 100644 --- a/fluid/DeepASR/infer_by_ckpt.py +++ b/fluid/DeepASR/infer_by_ckpt.py @@ -12,6 +12,7 @@ import paddle.fluid as fluid import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_splice as trans_splice +import data_utils.augmentor.trans_delay as trans_delay import data_utils.async_data_reader as reader from decoder.post_decode_faster import Decoder from data_utils.util import lodtensor_to_ndarray @@ -36,7 +37,7 @@ def parse_args(): parser.add_argument( '--frame_dim', type=int, - default=120 * 11, + default=80, help='Frame dimension of feature data. (default: %(default)d)') parser.add_argument( '--stacked_num', @@ -179,7 +180,7 @@ def infer_from_ckpt(args): ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), - trans_splice.TransSplice() + trans_splice.TransSplice(), trans_delay.TransDelay(5) ] feature_t = fluid.LoDTensor() diff --git a/fluid/DeepASR/model_utils/model.py b/fluid/DeepASR/model_utils/model.py index 8fb7596e122447979cf392d6610ad2b7281d195b..3f4fdccfc93bb14496ce54ae59e49651eca2a537 100644 --- a/fluid/DeepASR/model_utils/model.py +++ b/fluid/DeepASR/model_utils/model.py @@ -32,25 +32,23 @@ def stacked_lstmp_model(frame_dim, # network configuration def _net_conf(feature, label): - seq_conv1 = fluid.layers.sequence_conv( + conv1 = fluid.layers.conv2d( input=feature, - num_filters=1024, + num_filters=32, filter_size=3, - filter_stride=1, - bias_attr=True) - bn1 = fluid.layers.batch_norm( - input=seq_conv1, - act="sigmoid", - is_test=not is_train, - momentum=0.9, - epsilon=1e-05, - data_layout='NCHW') + stride=1, + padding=1, + bias_attr=True, + act="relu") - stack_input = bn1 + pool1 = fluid.layers.pool2d( + conv1, pool_size=3, pool_type="max", pool_stride=2, pool_padding=0) + + stack_input = pool1 for i in range(stacked_num): fc = fluid.layers.fc(input=stack_input, size=hidden_dim * 4, - bias_attr=True) + bias_attr=None) proj, cell = fluid.layers.dynamic_lstmp( input=fc, size=hidden_dim * 4, @@ -62,7 +60,6 @@ def stacked_lstmp_model(frame_dim, proj_activation="tanh") bn = fluid.layers.batch_norm( input=proj, - act="sigmoid", is_test=not is_train, momentum=0.9, epsilon=1e-05, @@ -80,7 +77,10 @@ def stacked_lstmp_model(frame_dim, # data feeder feature = fluid.layers.data( - name="feature", shape=[-1, frame_dim], dtype="float32", lod_level=1) + name="feature", + shape=[-1, 3, 11, frame_dim], + dtype="float32", + lod_level=1) label = fluid.layers.data( name="label", shape=[-1, 1], dtype="int64", lod_level=1) diff --git a/fluid/DeepASR/tools/profile.py b/fluid/DeepASR/tools/profile.py index 8d720c16cd0ec6a9d4bb533a878b07973ced7176..5af55614a459380292ad969efd21e62e56f6c84b 100644 --- a/fluid/DeepASR/tools/profile.py +++ b/fluid/DeepASR/tools/profile.py @@ -13,6 +13,7 @@ import _init_paths import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_splice as trans_splice +import data_utils.augmentor.trans_delay as trans_delay import data_utils.async_data_reader as reader from model_utils.model import stacked_lstmp_model from data_utils.util import lodtensor_to_ndarray @@ -87,7 +88,7 @@ def parse_args(): parser.add_argument( '--max_batch_num', type=int, - default=10, + default=11, help='Maximum number of batches for profiling. (default: %(default)d)') parser.add_argument( '--first_batches_to_skip', @@ -146,10 +147,10 @@ def profile(args): ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), - trans_splice.TransSplice() + trans_splice.TransSplice(), trans_delay.TransDelay(5) ] - data_reader = reader.AsyncDataReader(args.feature_lst, args.label_lst) + data_reader = reader.AsyncDataReader(args.feature_lst, args.label_lst, -1) data_reader.set_transformers(ltrans) feature_t = fluid.LoDTensor() diff --git a/fluid/DeepASR/train.py b/fluid/DeepASR/train.py index be99998c8aa7f88d49dab711e94dcd7cfef042d6..0111161a235715d7969e732554e3cb41fadb4c9a 100644 --- a/fluid/DeepASR/train.py +++ b/fluid/DeepASR/train.py @@ -12,6 +12,7 @@ import paddle.fluid as fluid import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_splice as trans_splice +import data_utils.augmentor.trans_delay as trans_delay import data_utils.async_data_reader as reader from data_utils.util import lodtensor_to_ndarray from model_utils.model import stacked_lstmp_model @@ -33,7 +34,7 @@ def parse_args(): parser.add_argument( '--frame_dim', type=int, - default=120 * 11, + default=80, help='Frame dimension of feature data. (default: %(default)d)') parser.add_argument( '--stacked_num', @@ -53,7 +54,7 @@ def parse_args(): parser.add_argument( '--class_num', type=int, - default=1749, + default=3040, help='Number of classes in label. (default: %(default)d)') parser.add_argument( '--pass_num', @@ -157,6 +158,7 @@ def train(args): # program for test test_program = fluid.default_main_program().clone() + #optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(avg_cost) @@ -171,7 +173,7 @@ def train(args): ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), - trans_splice.TransSplice() + trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) ] feature_t = fluid.LoDTensor() @@ -220,6 +222,8 @@ def train(args): args.minimum_batch_size)): # load_data (features, labels, lod, name_lst) = batch_data + features = np.reshape(features, (-1, 11, 3, args.frame_dim)) + features = np.transpose(features, (0, 2, 1, 3)) feature_t.set(features, place) feature_t.set_lod([lod]) label_t.set(labels, place)