model.py 3.9 KB
Newer Older
Y
Yibing Liu 已提交
1 2 3 4 5
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.v2 as paddle
L
Luo Tao 已提交
6
import paddle.fluid as fluid
Y
Yibing Liu 已提交
7 8


9 10
def stacked_lstmp_model(frame_dim,
                        hidden_dim,
Y
Yibing Liu 已提交
11 12
                        proj_dim,
                        stacked_num,
13
                        class_num,
14
                        parallel=False,
15
                        is_train=True):
16 17 18
    """ The model for DeepASR. The main structure is composed of stacked 
        identical LSTMP (LSTM with recurrent projection) layers.

19 20 21 22
        When running in training and validation phase, the feeding dictionary
        is {'feature', 'label'}, fed by the LodTensor for feature data and 
        label data respectively. And in inference, only `feature` is needed.

23
    Args:
Y
Yibing Liu 已提交
24 25 26 27 28 29 30
        frame_dim(int): The frame dimension of feature data.
        hidden_dim(int): The hidden state's dimension of the LSTMP layer.
        proj_dim(int): The projection size of the LSTMP layer.
        stacked_num(int): The number of stacked LSTMP layers.
        parallel(bool): Run in parallel or not, default `False`.
        is_train(bool): Run in training phase or not, default `True`.
        class_dim(int): The number of output classes.
31 32 33 34
    """

    # network configuration
    def _net_conf(feature, label):
Z
zhxfl 已提交
35
        conv1 = fluid.layers.conv2d(
36
            input=feature,
Z
zhxfl 已提交
37
            num_filters=32,
38
            filter_size=3,
Z
zhxfl 已提交
39 40 41 42
            stride=1,
            padding=1,
            bias_attr=True,
            act="relu")
43

Z
zhxfl 已提交
44 45 46 47
        pool1 = fluid.layers.pool2d(
            conv1, pool_size=3, pool_type="max", pool_stride=2, pool_padding=0)

        stack_input = pool1
48 49 50
        for i in range(stacked_num):
            fc = fluid.layers.fc(input=stack_input,
                                 size=hidden_dim * 4,
Z
zhxfl 已提交
51
                                 bias_attr=None)
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
            proj, cell = fluid.layers.dynamic_lstmp(
                input=fc,
                size=hidden_dim * 4,
                proj_size=proj_dim,
                bias_attr=True,
                use_peepholes=True,
                is_reverse=False,
                cell_activation="tanh",
                proj_activation="tanh")
            bn = fluid.layers.batch_norm(
                input=proj,
                is_test=not is_train,
                momentum=0.9,
                epsilon=1e-05,
                data_layout='NCHW')
            stack_input = bn

        prediction = fluid.layers.fc(input=stack_input,
                                     size=class_num,
                                     act='softmax')

        cost = fluid.layers.cross_entropy(input=prediction, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc = fluid.layers.accuracy(input=prediction, label=label)
        return prediction, avg_cost, acc

    # data feeder
Y
Yibing Liu 已提交
79
    feature = fluid.layers.data(
Z
zhxfl 已提交
80 81 82 83
        name="feature",
        shape=[-1, 3, 11, frame_dim],
        dtype="float32",
        lod_level=1)
84 85 86 87
    label = fluid.layers.data(
        name="label", shape=[-1, 1], dtype="int64", lod_level=1)

    if parallel:
88 89 90
        # When the execution place is specified to CUDAPlace, the program will
        # run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will 
        # run on all CPU devices.
91 92 93 94 95
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places)
        with pd.do():
            feat_ = pd.read_input(feature)
            label_ = pd.read_input(label)
96
            prediction, avg_cost, acc = _net_conf(feat_, label_)
97
            for out in [prediction, avg_cost, acc]:
98 99 100
                pd.write_output(out)

        # get mean loss and acc through every devices.
101 102
        prediction, avg_cost, acc = pd()
        prediction.stop_gradient = True
103 104 105
        avg_cost = fluid.layers.mean(x=avg_cost)
        acc = fluid.layers.mean(x=acc)
    else:
106
        prediction, avg_cost, acc = _net_conf(feature, label)
Y
Yibing Liu 已提交
107

108
    return prediction, avg_cost, acc