model.py 4.0 KB
Newer Older
Y
Yibing Liu 已提交
1 2 3 4 5
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.v2 as paddle
L
Luo Tao 已提交
6
import paddle.fluid as fluid
Y
Yibing Liu 已提交
7 8


9 10
def stacked_lstmp_model(frame_dim,
                        hidden_dim,
Y
Yibing Liu 已提交
11 12
                        proj_dim,
                        stacked_num,
13
                        class_num,
14
                        parallel=False,
15
                        is_train=True):
16 17 18
    """ The model for DeepASR. The main structure is composed of stacked 
        identical LSTMP (LSTM with recurrent projection) layers.

19 20 21 22
        When running in training and validation phase, the feeding dictionary
        is {'feature', 'label'}, fed by the LodTensor for feature data and 
        label data respectively. And in inference, only `feature` is needed.

23
    Args:
Y
Yibing Liu 已提交
24 25 26 27 28 29 30
        frame_dim(int): The frame dimension of feature data.
        hidden_dim(int): The hidden state's dimension of the LSTMP layer.
        proj_dim(int): The projection size of the LSTMP layer.
        stacked_num(int): The number of stacked LSTMP layers.
        parallel(bool): Run in parallel or not, default `False`.
        is_train(bool): Run in training phase or not, default `True`.
        class_dim(int): The number of output classes.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    """

    # network configuration
    def _net_conf(feature, label):
        seq_conv1 = fluid.layers.sequence_conv(
            input=feature,
            num_filters=1024,
            filter_size=3,
            filter_stride=1,
            bias_attr=True)
        bn1 = fluid.layers.batch_norm(
            input=seq_conv1,
            act="sigmoid",
            is_test=not is_train,
            momentum=0.9,
            epsilon=1e-05,
            data_layout='NCHW')

        stack_input = bn1
        for i in range(stacked_num):
            fc = fluid.layers.fc(input=stack_input,
                                 size=hidden_dim * 4,
                                 bias_attr=True)
            proj, cell = fluid.layers.dynamic_lstmp(
                input=fc,
                size=hidden_dim * 4,
                proj_size=proj_dim,
                bias_attr=True,
                use_peepholes=True,
                is_reverse=False,
                cell_activation="tanh",
                proj_activation="tanh")
            bn = fluid.layers.batch_norm(
                input=proj,
                act="sigmoid",
                is_test=not is_train,
                momentum=0.9,
                epsilon=1e-05,
                data_layout='NCHW')
            stack_input = bn

        prediction = fluid.layers.fc(input=stack_input,
                                     size=class_num,
                                     act='softmax')

        cost = fluid.layers.cross_entropy(input=prediction, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc = fluid.layers.accuracy(input=prediction, label=label)
        return prediction, avg_cost, acc

    # data feeder
Y
Yibing Liu 已提交
82
    feature = fluid.layers.data(
83
        name="feature", shape=[-1, frame_dim], dtype="float32", lod_level=1)
84 85 86 87
    label = fluid.layers.data(
        name="label", shape=[-1, 1], dtype="int64", lod_level=1)

    if parallel:
88 89 90
        # When the execution place is specified to CUDAPlace, the program will
        # run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will 
        # run on all CPU devices.
91 92 93 94 95
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            feat_ = pd.read_input(feature)
            label_ = pd.read_input(label)
96
            prediction, avg_cost, acc = _net_conf(feat_, label_)
97
            for out in [prediction, avg_cost, acc]:
98 99 100
                pd.write_output(out)

        # get mean loss and acc through every devices.
101 102
        prediction, avg_cost, acc = pd()
        prediction.stop_gradient = True
103 104 105
        avg_cost = fluid.layers.mean(x=avg_cost)
        acc = fluid.layers.mean(x=acc)
    else:
106
        prediction, avg_cost, acc = _net_conf(feature, label)
Y
Yibing Liu 已提交
107

108
    return prediction, avg_cost, acc