model.py 3.8 KB
Newer Older
Y
Yibing Liu 已提交
1 2 3 4 5 6 7 8 9 10 11
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.v2 as paddle
import paddle.v2.fluid as fluid


def stacked_lstmp_model(hidden_dim,
                        proj_dim,
                        stacked_num,
12
                        class_num,
13
                        parallel=False,
14
                        is_train=True):
15 16 17
    """ The model for DeepASR. The main structure is composed of stacked 
        identical LSTMP (LSTM with recurrent projection) layers.

18 19 20 21
        When running in training and validation phase, the feeding dictionary
        is {'feature', 'label'}, fed by the LodTensor for feature data and 
        label data respectively. And in inference, only `feature` is needed.

22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
    Args:
	hidden_dim(int): The hidden state's dimension of the LSTMP layer.
	proj_dim(int): The projection size of the LSTMP layer.
	stacked_num(int): The number of stacked LSTMP layers.
	parallel(bool): Run in parallel or not, default `False`.
	is_train(bool): Run in training phase or not, default `True`.
	class_dim(int): The number of output classes.
    """

    # network configuration
    def _net_conf(feature, label):
        seq_conv1 = fluid.layers.sequence_conv(
            input=feature,
            num_filters=1024,
            filter_size=3,
            filter_stride=1,
            bias_attr=True)
        bn1 = fluid.layers.batch_norm(
            input=seq_conv1,
            act="sigmoid",
            is_test=not is_train,
            momentum=0.9,
            epsilon=1e-05,
            data_layout='NCHW')

        stack_input = bn1
        for i in range(stacked_num):
            fc = fluid.layers.fc(input=stack_input,
                                 size=hidden_dim * 4,
                                 bias_attr=True)
            proj, cell = fluid.layers.dynamic_lstmp(
                input=fc,
                size=hidden_dim * 4,
                proj_size=proj_dim,
                bias_attr=True,
                use_peepholes=True,
                is_reverse=False,
                cell_activation="tanh",
                proj_activation="tanh")
            bn = fluid.layers.batch_norm(
                input=proj,
                act="sigmoid",
                is_test=not is_train,
                momentum=0.9,
                epsilon=1e-05,
                data_layout='NCHW')
            stack_input = bn

        prediction = fluid.layers.fc(input=stack_input,
                                     size=class_num,
                                     act='softmax')

        cost = fluid.layers.cross_entropy(input=prediction, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc = fluid.layers.accuracy(input=prediction, label=label)
        return prediction, avg_cost, acc

    # data feeder
Y
Yibing Liu 已提交
80 81
    feature = fluid.layers.data(
        name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
82 83 84 85
    label = fluid.layers.data(
        name="label", shape=[-1, 1], dtype="int64", lod_level=1)

    if parallel:
86 87 88
        # When the execution place is specified to CUDAPlace, the program will
        # run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will 
        # run on all CPU devices.
89 90 91 92 93
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            feat_ = pd.read_input(feature)
            label_ = pd.read_input(label)
94
            prediction, avg_cost, acc = _net_conf(feat_, label_)
95 96 97 98 99 100 101 102
            for out in [avg_cost, acc]:
                pd.write_output(out)

        # get mean loss and acc through every devices.
        avg_cost, acc = pd()
        avg_cost = fluid.layers.mean(x=avg_cost)
        acc = fluid.layers.mean(x=acc)
    else:
103
        prediction, avg_cost, acc = _net_conf(feature, label)
Y
Yibing Liu 已提交
104

105
    return prediction, avg_cost, acc