diff --git a/fluid/DeepASR/data_utils/__init__.py b/fluid/DeepASR/data_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/fluid/DeepASR/data_utils/data_read.py b/fluid/DeepASR/data_utils/data_read.py
new file mode 100644
index 0000000000000000000000000000000000000000..85654fa759866353ffb97669e6ff65534cbd7664
--- /dev/null
+++ b/fluid/DeepASR/data_utils/data_read.py
@@ -0,0 +1,253 @@
+"""This model read the sample from disk. 
+   use multiprocessing to reading samples
+   push samples from one block to multiprocessing queue 
+   Todos:
+        1. multiprocess read block from disk
+"""
+import random
+import Queue
+import numpy as np
+import struct
+import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.trans_add_delta as trans_add_delta
+
+
+class OneBlock(object):
+    """ struct for one block :
+        contain label, label desc, feature, feature_desc
+
+        Attributes:
+            label(str) :  label path of one block
+            label_desc(str) : label description path of one block
+            feature(str) : feature path of on block
+            feature_desc(str) : feature description path of on block
+    """
+
+    def __init__(self):
+        """the constructor."""
+
+        self.label = "label"
+        self.label_desc = "label_desc"
+        self.feature = "feature"
+        self.feature_desc = "feature_desc"
+
+
+class DataRead(object):
+    """
+    Attributes:
+        _lblock(obj:`OneBlock`) : the list of OneBlock
+        _ndrop_sentence_len(int): dropout the sentence which's frame_num large than _ndrop_sentence_len  
+        _que_sample(obj:`Queue`): sample buffer
+        _nframe_dim(int): the batch sample frame_dim(todo remove)
+        _nstart_block_idx(int): the start block id
+        _nload_block_num(int): the block num
+    """
+
+    def __init__(self, sfeature_lst, slabel_lst, ndrop_sentence_len=512):
+        """
+        Args:
+            sfeature_lst(str):feature lst path
+            slabel_lst(str):label lst path
+        Returns:
+            None
+        """
+        self._lblock = []
+        self._ndrop_sentence_len = ndrop_sentence_len
+        self._que_sample = Queue.Queue()
+        self._nframe_dim = 120 * 11
+        self._nstart_block_idx = 0
+        self._nload_block_num = 1
+        self._ndrop_frame_len = 256
+
+        self._load_list(sfeature_lst, slabel_lst)
+
+    def _load_list(self, sfeature_lst, slabel_lst):
+        """ load list and shuffle
+        Args:
+            sfeature_lst(str):feature lst path
+            slabel_lst(str):label lst path
+        Returns:
+            None
+        """
+        lfeature = open(sfeature_lst).readlines()
+        llabel = open(slabel_lst).readlines()
+        assert len(llabel) == len(lfeature)
+        for i in range(0, len(lfeature), 2):
+            one_block = OneBlock()
+
+            one_block.label = llabel[i]
+            one_block.label_desc = llabel[i + 1]
+            one_block.feature = lfeature[i]
+            one_block.feature_desc = lfeature[i + 1]
+            self._lblock.append(one_block)
+
+        random.shuffle(self._lblock)
+
+    def _load_one_block(self, lsample, id):
+        """read one block by id and push load sample in list lsample 
+        Args:
+            lsample(list): return sample list
+            id(int): block id 
+        Returns:
+            None
+        """
+        if id >= len(self._lblock):
+            return
+
+        slabel_path = self._lblock[id].label.strip()
+        slabel_desc_path = self._lblock[id].label_desc.strip()
+        sfeature_path = self._lblock[id].feature.strip()
+        sfeature_desc_path = self._lblock[id].feature_desc.strip()
+
+        llabel_line = open(slabel_desc_path).readlines()
+        lfeature_line = open(sfeature_desc_path).readlines()
+
+        file_lable_bin = open(slabel_path, "r")
+        file_feature_bin = open(sfeature_path, "r")
+
+        sample_num = int(llabel_line[0].split()[1])
+        assert sample_num == int(lfeature_line[0].split()[1])
+
+        llabel_line = llabel_line[1:]
+        lfeature_line = lfeature_line[1:]
+
+        for i in range(sample_num):
+            # read label 
+            llabel_split = llabel_line[i].split()
+            nlabel_start = int(llabel_split[2])
+            nlabel_size = int(llabel_split[3])
+            nlabel_frame_num = int(llabel_split[4])
+
+            file_lable_bin.seek(nlabel_start, 0)
+            label_bytes = file_lable_bin.read(nlabel_size)
+            assert nlabel_frame_num * 4 == len(label_bytes)
+            label_array = struct.unpack('I' * nlabel_frame_num, label_bytes)
+            label_data = np.array(label_array, dtype="int64")
+            label_data = label_data.reshape((nlabel_frame_num, 1))
+
+            # read feature
+            lfeature_split = lfeature_line[i].split()
+            nfeature_start = int(lfeature_split[2])
+            nfeature_size = int(lfeature_split[3])
+            nfeature_frame_num = int(lfeature_split[4])
+            nfeature_frame_dim = int(lfeature_split[5])
+
+            file_feature_bin.seek(nfeature_start, 0)
+            feature_bytes = file_feature_bin.read(nfeature_size)
+            assert nfeature_frame_num * nfeature_frame_dim * 4 == len(
+                feature_bytes)
+            feature_array = struct.unpack('f' * nfeature_frame_num *
+                                          nfeature_frame_dim, feature_bytes)
+            feature_data = np.array(feature_array, dtype="float32")
+            feature_data = feature_data.reshape(
+                (nfeature_frame_num, nfeature_frame_dim))
+
+            #drop long sentence
+            if self._ndrop_frame_len < feature_data.shape[0]:
+                continue
+            lsample.append((feature_data, label_data))
+
+    def get_one_batch(self, nbatch_size):
+        """construct one batch(feature, label), batch size is nbatch_size
+        Args:
+            nbatch_size(int): batch size
+        Returns:
+            None
+        """
+        if self._que_sample.empty():
+            lsample = self._load_block(
+                range(self._nstart_block_idx, self._nstart_block_idx +
+                      self._nload_block_num, 1))
+            self._move_sample(lsample)
+            self._nstart_block_idx += self._nload_block_num
+
+        if self._que_sample.empty():
+            self._nstart_block_idx = 0
+            return None
+        #cal all frame num
+        ncur_len = 0
+        lod = [0]
+        samples = []
+        bat_feature = np.zeros((nbatch_size, self._nframe_dim))
+        for i in range(nbatch_size):
+            # empty clear zero 
+            if self._que_sample.empty():
+                self._nstart_block_idx = 0
+            # copy
+            else:
+                (one_feature, one_label) = self._que_sample.get()
+                samples.append((one_feature, one_label))
+                ncur_len += one_feature.shape[0]
+                lod.append(ncur_len)
+
+        bat_feature = np.zeros((ncur_len, self._nframe_dim), dtype="float32")
+        bat_label = np.zeros((ncur_len, 1), dtype="int64")
+        ncur_len = 0
+        for sample in samples:
+            one_feature = sample[0]
+            one_label = sample[1]
+            nframe_num = one_feature.shape[0]
+            nstart = ncur_len
+            nend = ncur_len + nframe_num
+            bat_feature[nstart:nend, :] = one_feature
+            bat_label[nstart:nend, :] = one_label
+            ncur_len += nframe_num
+        return (bat_feature, bat_label, lod)
+
+    def set_trans(self, ltrans):
+        """ set transform list
+        Args:
+            ltrans(list): data tranform list
+        Returns:
+            None
+        """
+        self._ltrans = ltrans
+
+    def _load_block(self, lblock_id):
+        """read blocks
+        """
+        lsample = []
+        for id in lblock_id:
+            self._load_one_block(lsample, id)
+
+        # transform sample
+        for (nidx, sample) in enumerate(lsample):
+            for trans in self._ltrans:
+                sample = trans.perform_trans(sample)
+            #print nidx
+            lsample[nidx] = sample
+
+        return lsample
+
+    def load_block(self, lblock_id):
+        """read blocks
+        Args:
+            lblock_id(list):the block list id
+        Returns:
+            None
+        """
+        lsample = []
+        for id in lblock_id:
+            self._load_one_block(lsample, id)
+
+        # transform sample
+        for (nidx, sample) in enumerate(lsample):
+            for trans in self._ltrans:
+                sample = trans.perform_trans(sample)
+            #print nidx
+            lsample[nidx] = sample
+
+        return lsample
+
+    def _move_sample(self, lsample):
+        """move sample to queue
+        Args:
+            lsample(list): one block of samples read from disk
+        Returns:
+            None
+        """
+        # random
+        random.shuffle(lsample)
+
+        for sample in lsample:
+            self._que_sample.put(sample)
diff --git a/fluid/DeepASR/data_utils/trans_add_delta.py b/fluid/DeepASR/data_utils/trans_add_delta.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cb62ce898a3ee00bb062de6fafdcc0b33fb59b4
--- /dev/null
+++ b/fluid/DeepASR/data_utils/trans_add_delta.py
@@ -0,0 +1,100 @@
+import numpy as np
+import math
+import copy
+
+
+class TransAddDelta(object):
+    """ add delta of feature data 
+        trans feature for shape(a, b) to shape(a, b * 3)
+
+        Attributes:
+            _norder(int):
+            _window(int):
+    """
+
+    def __init__(self, norder=2, nwindow=2):
+        """ init construction
+            Args:
+                norder: default 2 
+                nwindow: default 2
+        """
+        self._norder = norder
+        self._nwindow = nwindow
+
+    def perform_trans(self, sample):
+        """ add delta for feature
+            trans feature shape from (a,b) to (a, b * 3)
+
+            Args: 
+                sample(object,tuple): contain feature numpy and label numpy
+            Returns:
+                (feature, label)
+        """
+        (feature, label) = sample
+        frame_dim = feature.shape[1]
+        d_frame_dim = frame_dim * 3
+        head_filled = 5
+        tail_filled = 5
+        mat = np.zeros(
+            (feature.shape[0] + head_filled + tail_filled, d_frame_dim),
+            dtype="float32")
+        #copy first frame
+        for i in xrange(head_filled):
+            np.copyto(mat[i, 0:frame_dim], feature[0, :])
+
+        np.copyto(mat[head_filled:head_filled + feature.shape[0], 0:frame_dim],
+                  feature[:, :])
+
+        # copy last frame
+        for i in xrange(head_filled + feature.shape[0], mat.shape[0], 1):
+            np.copyto(mat[i, 0:frame_dim], feature[feature.shape[0] - 1, :])
+
+        nframe = feature.shape[0]
+        start = head_filled
+        tmp_shape = mat.shape
+        mat = mat.reshape((tmp_shape[0] * tmp_shape[1]))
+        self._regress(mat, start * d_frame_dim, mat,
+                      start * d_frame_dim + frame_dim, frame_dim, nframe,
+                      d_frame_dim)
+        self._regress(mat, start * d_frame_dim + frame_dim, mat,
+                      start * d_frame_dim + 2 * frame_dim, frame_dim, nframe,
+                      d_frame_dim)
+        mat.shape = tmp_shape
+        return (mat[head_filled:mat.shape[0] - tail_filled, :], label)
+
+    def _regress(self, data_in, start_in, data_out, start_out, size, n, step):
+        """ regress
+            Args:
+                data_in: in data
+                start_in: start index of data_in
+                data_out: out data
+                start_out: start index of data_out
+                size: frame dimentional
+                n: frame num
+                step: 3 * (frame num)
+            Returns:
+                None
+        """
+        sigma_t2 = 0.0
+        delta_window = self._nwindow
+        for t in xrange(1, delta_window + 1):
+            sigma_t2 += t * t
+
+        sigma_t2 *= 2.0
+        for i in xrange(n):
+            fp1 = start_in
+            fp2 = start_out
+            for j in xrange(size):
+                back = fp1
+                forw = fp1
+                sum = 0.0
+                for t in xrange(1, delta_window + 1):
+                    back -= step
+                    forw += step
+                    sum += t * (data_in[forw] - data_in[back])
+
+                data_out[fp2] = sum / sigma_t2
+                fp1 += 1
+                fp2 += 1
+            start_in += step
+            start_out += step
diff --git a/fluid/DeepASR/data_utils/trans_mean_variance_norm.py b/fluid/DeepASR/data_utils/trans_mean_variance_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3336531935ddb11e742bbf99d0240c43b91def6
--- /dev/null
+++ b/fluid/DeepASR/data_utils/trans_mean_variance_norm.py
@@ -0,0 +1,67 @@
+import numpy as np
+import math
+
+
+class TransMeanVarianceNorm(object):
+    """ normalization of mean variance for feature data 
+        Attributes:
+            _mean(numpy.array): the feature mean vector
+            _var(numpy.array): the feature variance 
+    """
+
+    def __init__(self, snorm_path):
+        """init construction
+            Args:
+                snorm_path: the path of mean and variance
+        """
+        self._mean = None
+        self._var = None
+        self._load_norm(snorm_path)
+
+    def _load_norm(self, snorm_path):
+        """ load mean var file
+            Args: 
+                snorm_path(str):the file path
+        """
+        lLines = open(snorm_path).readlines()
+        nLen = len(lLines)
+        self._mean = np.zeros((nLen), dtype="float32")
+        self._var = np.zeros((nLen), dtype="float32")
+        self._nLen = nLen
+        for nidx, l in enumerate(lLines):
+            s = l.split()
+            assert len(s) == 2
+            self._mean[nidx] = float(s[0])
+            self._var[nidx] = 1.0 / math.sqrt(float(s[1]))
+            if self._var[nidx] > 100000.0:
+                self._var[nidx] = 100000.0
+
+    def get_mean_var(self):
+        """ get mean and var 
+            Args:
+            Returns:
+                (mean, var)
+        """
+        return (self._mean, self._var)
+
+    def perform_trans(self, sample):
+        """ feature = (feature - mean) * var
+            Args:
+                sample(object):input sample, contain feature numpy and label numpy
+            Returns:
+                (feature, label)
+        """
+        (feature, label) = sample
+        shape = feature.shape
+        assert len(shape) == 2
+        nfeature_len = shape[0] * shape[1]
+        assert nfeature_len % self._nLen == 0
+        ncur_idx = 0
+        feature = feature.reshape((nfeature_len))
+        while ncur_idx < nfeature_len:
+            block = feature[ncur_idx:ncur_idx + self._nLen]
+            block = (block - self._mean) * self._var
+            feature[ncur_idx:ncur_idx + self._nLen] = block
+            ncur_idx += self._nLen
+        feature = feature.reshape(shape)
+        return (feature, label)
diff --git a/fluid/DeepASR/data_utils/trans_splice.py b/fluid/DeepASR/data_utils/trans_splice.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cebe2c163085fbcbbd5184b59c8f3f1072202a3
--- /dev/null
+++ b/fluid/DeepASR/data_utils/trans_splice.py
@@ -0,0 +1,60 @@
+import numpy as np
+import math
+
+
+class TransSplice(object):
+    """ copy feature context to construct new feature
+        expand feature data from shape (frame_num, frame_dim) 
+        to shape (frame_num, frame_dim * 11)
+
+        Attributes:
+            _nleft_context(int): copy left context number
+            _nright_context(int): copy right context number
+    """
+
+    def __init__(self, nleft_context=5, nright_context=5):
+        """ init construction
+            Args:
+                nleft_context(int):
+                nright_context(int):
+        """
+        self._nleft_context = nleft_context
+        self._nright_context = nright_context
+
+    def perform_trans(self, sample):
+        """ copy feature context 
+        Args:
+            sample(object): input sample(feature, label)
+        Return:
+            (feature, label)
+        """
+        (feature, label) = sample
+        nframe_num = feature.shape[0]
+        nframe_dim = feature.shape[1]
+        nnew_frame_dim = nframe_dim * (
+            self._nleft_context + self._nright_context + 1)
+        mat = np.zeros(
+            (nframe_num + self._nleft_context + self._nright_context,
+             nframe_dim),
+            dtype="float32")
+        ret = np.zeros((nframe_num, nnew_frame_dim), dtype="float32")
+
+        #copy left
+        for i in xrange(self._nleft_context):
+            mat[i, :] = feature[0, :]
+
+        #copy middle 
+        mat[self._nleft_context:self._nleft_context +
+            nframe_num, :] = feature[:, :]
+
+        #copy right
+        for i in xrange(self._nright_context):
+            mat[i + self._nleft_context + nframe_num, :] = feature[-1, :]
+
+        mat = mat.reshape(mat.shape[0] * mat.shape[1])
+        ret = ret.reshape(ret.shape[0] * ret.shape[1])
+        for i in xrange(nframe_num):
+            np.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim],
+                      mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim])
+        ret = ret.reshape((nframe_num, nnew_frame_dim))
+        return (ret, label)
diff --git a/fluid/DeepASR/data_utils/util.py b/fluid/DeepASR/data_utils/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..a09a7dea062743b958ebb65a740555c0c41852d0
--- /dev/null
+++ b/fluid/DeepASR/data_utils/util.py
@@ -0,0 +1,25 @@
+def to_lodtensor(data, place):
+    """convert tensor to lodtensor
+    """
+    seq_lens = [len(seq) for seq in data]
+    cur_len = 0
+    lod = [cur_len]
+    for l in seq_lens:
+        cur_len += l
+        lod.append(cur_len)
+    flattened_data = numpy.concatenate(data, axis=0).astype("int64")
+    flattened_data = flattened_data.reshape([len(flattened_data), 1])
+    res = fluid.LoDTensor()
+    res.set(flattened_data, place)
+    res.set_lod([lod])
+    return res
+
+
+def lodtensor_to_ndarray(lod_tensor):
+    """conver lodtensor to ndarray
+    """
+    dims = lod_tensor.get_dims()
+    ret = np.zeros(shape=dims).astype('float32')
+    for i in xrange(np.product(dims)):
+        ret.ravel()[i] = lod_tensor.get_float_element(i)
+    return ret, lod_tensor.lod()
diff --git a/fluid/DeepASR/stacked_dynamic_lstm.py b/fluid/DeepASR/stacked_dynamic_lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..4218f0458965aac12196b0f71421e239576bd2c5
--- /dev/null
+++ b/fluid/DeepASR/stacked_dynamic_lstm.py
@@ -0,0 +1,227 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import argparse
+import time
+
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.profiler as profiler
+import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.trans_add_delta as trans_add_delta
+import data_utils.trans_splice as trans_splice
+import data_utils.data_read as reader
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("LSTM model benchmark.")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=32,
+        help='The sequence number of a batch data. (default: %(default)d)')
+    parser.add_argument(
+        '--stacked_num',
+        type=int,
+        default=5,
+        help='Number of lstm layers to stack. (default: %(default)d)')
+    parser.add_argument(
+        '--proj_dim',
+        type=int,
+        default=512,
+        help='Project size of lstm unit. (default: %(default)d)')
+    parser.add_argument(
+        '--hidden_dim',
+        type=int,
+        default=1024,
+        help='Hidden size of lstm unit. (default: %(default)d)')
+    parser.add_argument(
+        '--pass_num',
+        type=int,
+        default=100,
+        help='Epoch number to train. (default: %(default)d)')
+    parser.add_argument(
+        '--learning_rate',
+        type=float,
+        default=0.002,
+        help='Learning rate used to train. (default: %(default)f)')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='GPU',
+        choices=['CPU', 'GPU'],
+        help='The device type. (default: %(default)s)')
+    parser.add_argument(
+        '--infer_only', action='store_true', help='If set, run forward only.')
+    parser.add_argument(
+        '--use_cprof', action='store_true', help='If set, use cProfile.')
+    parser.add_argument(
+        '--use_nvprof',
+        action='store_true',
+        help='If set, use nvprof for CUDA.')
+    parser.add_argument('--mean_var', type=str, help='mean var path')
+    parser.add_argument('--feature_lst', type=str, help='mean var path')
+    parser.add_argument('--label_lst', type=str, help='mean var path')
+    args = parser.parse_args()
+    return args
+
+
+def print_arguments(args):
+    vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
+                                vars(args)['device'] == 'GPU')
+    print('-----------  Configuration Arguments -----------')
+    for arg, value in sorted(vars(args).iteritems()):
+        print('%s: %s' % (arg, value))
+    print('------------------------------------------------')
+
+
+def dynamic_lstmp_model(hidden_dim,
+                        proj_dim,
+                        stacked_num,
+                        class_num=1749,
+                        is_train=True):
+    feature = fluid.layers.data(
+        name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
+
+    seq_conv1 = fluid.layers.sequence_conv(
+        input=feature,
+        num_filters=1024,
+        filter_size=3,
+        filter_stride=1,
+        bias_attr=True)
+    bn1 = fluid.layers.batch_norm(
+        input=seq_conv1,
+        act="sigmoid",
+        is_test=False,
+        momentum=0.9,
+        epsilon=1e-05,
+        data_layout='NCHW')
+
+    stack_input = bn1
+    for i in range(stacked_num):
+        fc = fluid.layers.fc(input=stack_input,
+                             size=hidden_dim * 4,
+                             bias_attr=True)
+        proj, cell = fluid.layers.dynamic_lstmp(
+            input=fc,
+            size=hidden_dim * 4,
+            proj_size=proj_dim,
+            bias_attr=True,
+            use_peepholes=True,
+            is_reverse=False,
+            cell_activation="tanh",
+            proj_activation="tanh")
+        bn = fluid.layers.batch_norm(
+            input=proj,
+            act="sigmoid",
+            is_test=False,
+            momentum=0.9,
+            epsilon=1e-05,
+            data_layout='NCHW')
+        stack_input = bn
+
+    prediction = fluid.layers.fc(input=stack_input,
+                                 size=class_num,
+                                 act='softmax')
+
+    if not is_train: return feature, prediction
+
+    label = fluid.layers.data(
+        name="label", shape=[-1, 1], dtype="int64", lod_level=1)
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+
+    return prediction, label, avg_cost
+
+
+def train(args):
+    if args.use_cprof:
+        pr = cProfile.Profile()
+        pr.enable()
+
+    prediction, label, avg_cost = dynamic_lstmp_model(
+        args.hidden_dim, args.proj_dim, args.stacked_num)
+
+    adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
+    adam_optimizer.minimize(avg_cost)
+
+    accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
+
+    # clone from default main program
+    inference_program = fluid.default_main_program().clone()
+    with fluid.program_guard(inference_program):
+        test_accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
+        test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
+        inference_program = fluid.io.get_inference_program(test_target)
+
+    place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+
+    ltrans = [
+        trans_add_delta.TransAddDelta(2, 2),
+        trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
+        trans_splice.TransSplice()
+    ]
+
+    data_reader = reader.DataRead(args.feature_lst, args.label_lst)
+    data_reader.set_trans(ltrans)
+
+    res_feature = fluid.LoDTensor()
+    res_label = fluid.LoDTensor()
+    for pass_id in xrange(args.pass_num):
+        pass_start_time = time.time()
+        words_seen = 0
+        accuracy.reset(exe)
+        batch_id = 0
+        while True:
+            # load_data
+            one_batch = data_reader.get_one_batch(args.batch_size)
+            if one_batch == None:
+                break
+            (bat_feature, bat_label, lod) = one_batch
+            res_feature.set(bat_feature, place)
+            res_feature.set_lod([lod])
+            res_label.set(bat_label, place)
+            res_label.set_lod([lod])
+
+            batch_id += 1
+
+            words_seen += lod[-1]
+
+            loss, acc = exe.run(
+                fluid.default_main_program(),
+                feed={"feature": res_feature,
+                      "label": res_label},
+                fetch_list=[avg_cost] + accuracy.metrics,
+                return_numpy=False)
+            train_acc = accuracy.eval(exe)
+            print("acc:", lodtensor_to_ndarray(loss))
+
+        pass_end_time = time.time()
+        time_consumed = pass_end_time - pass_start_time
+        words_per_sec = words_seen / time_consumed
+
+
+def lodtensor_to_ndarray(lod_tensor):
+    dims = lod_tensor.get_dims()
+    ret = np.zeros(shape=dims).astype('float32')
+    for i in xrange(np.product(dims)):
+        ret.ravel()[i] = lod_tensor.get_float_element(i)
+    return ret, lod_tensor.lod()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    print_arguments(args)
+
+    if args.infer_only:
+        pass
+    else:
+        if args.use_nvprof and args.device == 'GPU':
+            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
+                train(args)
+        else:
+            train(args)
diff --git a/fluid/DeepASR/test/test_data_trans.py b/fluid/DeepASR/test/test_data_trans.py
new file mode 100644
index 0000000000000000000000000000000000000000..7767d26ca6cb87c703e49642c7c83de2437a0b5e
--- /dev/null
+++ b/fluid/DeepASR/test/test_data_trans.py
@@ -0,0 +1,111 @@
+#by zhxfl 2018.01.31
+import sys
+import unittest
+import numpy as np
+sys.path.append("../")
+import data_utils.trans_mean_variance_norm as trans_mean_variance_norm
+import data_utils.trans_add_delta as trans_add_delta
+import data_utils.trans_splice as trans_splice
+
+
+class TestTransMeanVarianceNorm(unittest.TestCase):
+    """unit test for TransMeanVarianceNorm
+    """
+
+    def test(self):
+        feature = np.zeros((2, 120), dtype="float32")
+        feature.fill(1)
+        trans = trans_mean_variance_norm.TransMeanVarianceNorm(
+            "../data/global_mean_var_search26kHr")
+        (feature1, label1) = trans.perform_trans((feature, None))
+        (mean, var) = trans.get_mean_var()
+        feature_flat1 = feature1.flatten()
+        feature_flat = feature.flatten()
+        one = np.ones((1), dtype="float32")
+        for idx, val in enumerate(feature_flat1):
+            cur_idx = idx % 120
+            self.assertAlmostEqual(val, (one[0] - mean[cur_idx]) * var[cur_idx])
+
+
+class TestTransAddDelta(unittest.TestCase):
+    """unit test TestTransAddDelta
+    """
+
+    def test_regress(self):
+        """test regress
+        """
+        feature = np.zeros((14, 120), dtype="float32")
+        feature[0:5, 0:40].fill(1)
+        feature[0 + 5, 0:40].fill(1)
+        feature[1 + 5, 0:40].fill(2)
+        feature[2 + 5, 0:40].fill(3)
+        feature[3 + 5, 0:40].fill(4)
+        feature[8:14, 0:40].fill(4)
+        trans = trans_add_delta.TransAddDelta()
+        feature = feature.reshape((14 * 120))
+        trans._regress(feature, 5 * 120, feature, 5 * 120 + 40, 40, 4, 120)
+        trans._regress(feature, 5 * 120 + 40, feature, 5 * 120 + 80, 40, 4, 120)
+        feature = feature.reshape((14, 120))
+        tmp_feature = feature[5:5 + 4, :]
+        self.assertAlmostEqual(1.0, tmp_feature[0][0])
+        self.assertAlmostEqual(0.24, tmp_feature[0][119])
+        self.assertAlmostEqual(2.0, tmp_feature[1][0])
+        self.assertAlmostEqual(0.13, tmp_feature[1][119])
+        self.assertAlmostEqual(3.0, tmp_feature[2][0])
+        self.assertAlmostEqual(-0.13, tmp_feature[2][119])
+        self.assertAlmostEqual(4.0, tmp_feature[3][0])
+        self.assertAlmostEqual(-0.24, tmp_feature[3][119])
+
+    def test_perform(self):
+        """test perform
+        """
+        feature = np.zeros((4, 40), dtype="float32")
+        feature[0, 0:40].fill(1)
+        feature[1, 0:40].fill(2)
+        feature[2, 0:40].fill(3)
+        feature[3, 0:40].fill(4)
+        trans = trans_add_delta.TransAddDelta()
+        (feature, label) = trans.perform_trans((feature, None))
+        self.assertAlmostEqual(feature.shape[0], 4)
+        self.assertAlmostEqual(feature.shape[1], 120)
+        self.assertAlmostEqual(1.0, feature[0][0])
+        self.assertAlmostEqual(0.24, feature[0][119])
+        self.assertAlmostEqual(2.0, feature[1][0])
+        self.assertAlmostEqual(0.13, feature[1][119])
+        self.assertAlmostEqual(3.0, feature[2][0])
+        self.assertAlmostEqual(-0.13, feature[2][119])
+        self.assertAlmostEqual(4.0, feature[3][0])
+        self.assertAlmostEqual(-0.24, feature[3][119])
+
+
+class TestTransSplict(unittest.TestCase):
+    """unit test Test TransSplict
+    """
+
+    def test_perfrom(self):
+        feature = np.zeros((8, 10), dtype="float32")
+        for i in xrange(feature.shape[0]):
+            feature[i, :].fill(i)
+
+        trans = trans_splice.TransSplice()
+        (feature, label) = trans.perform_trans((feature, None))
+        self.assertEqual(feature.shape[1], 110)
+
+        for i in xrange(8):
+            nzero_num = 5 - i
+            cur_val = 0.0
+            if nzero_num < 0:
+                cur_val = i - 5 - 1
+            for j in xrange(11):
+                if j <= nzero_num:
+                    for k in xrange(10):
+                        self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
+                else:
+                    if cur_val < 7:
+                        cur_val += 1.0
+                    for k in xrange(10):
+                        self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
+
+
+if __name__ == '__main__':
+    unittest.main()