basic_modules.py 7.5 KB
Newer Older
C
caoying03 已提交
1
#coding=utf-8
C
caoying03 已提交
2

C
caoying03 已提交
3 4 5 6 7 8 9
import collections

import paddle.v2 as paddle
from paddle.v2.layer import parse_network

__all__ = [
    "stacked_bidirectional_lstm",
C
caoying03 已提交
10
    "stacked_bidirectional_lstm_by_nested_seq",
C
caoying03 已提交
11 12 13 14
    "lstm_by_nested_sequence",
]


C
caoying03 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
def stacked_bidirectional_lstm(inputs,
                               hidden_dim,
                               depth,
                               drop_rate=0.,
                               prefix=""):
    """ The stacked bi-directional LSTM.

    In PaddlePaddle recurrent layers have two different implementations:
    1. recurrent layer implemented by recurrent_group: any intermedia states a
       recurent unit computes during one time step, such as hidden states,
       input-to-hidden mapping, memory cells and so on, is accessable.
    2. recurrent layer as a whole: only outputs of the recurrent layer are
       accessable.

    The second type (recurrent layer as a whole) is more computation efficient,
    because recurrent_group is made up of many basic layers (including add,
    element-wise multiplications, matrix multiplication and so on).

    This function uses the second type to implement the stacked bi-directional
    LSTM.

    Arguments:
        - inputs:      The input layer to the bi-directional LSTM.
        - hidden_dim:  The dimension of the hidden state of the LSTM.
        - depth:       Depth of the stacked bi-directional LSTM.
        - drop_rate:   The drop rate to drop the LSTM output states.
        - prefix:      A string which will be appended to name of each layer
                       created in this function. Each layer in a network should
                       has a unique name. The prefix makes this fucntion can be
                       called multiple times.
    """

C
caoying03 已提交
47 48 49 50 51 52 53 54
    if not isinstance(inputs, collections.Sequence):
        inputs = [inputs]

    lstm_last = []
    for dirt in ["fwd", "bwd"]:
        for i in range(depth):
            input_proj = paddle.layer.mixed(
                name="%s_in_proj_%0d_%s__" % (prefix, i, dirt),
C
caoying03 已提交
55
                size=hidden_dim * 4,
C
caoying03 已提交
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
                bias_attr=paddle.attr.Param(initial_std=0.),
                input=[paddle.layer.full_matrix_projection(lstm)] if i else [
                    paddle.layer.full_matrix_projection(in_layer)
                    for in_layer in inputs
                ])
            lstm = paddle.layer.lstmemory(
                input=input_proj,
                bias_attr=paddle.attr.Param(initial_std=0.),
                param_attr=paddle.attr.Param(initial_std=5e-4),
                reverse=(dirt == "bwd"))
        lstm_last.append(lstm)

    final_states = paddle.layer.concat(input=[
        paddle.layer.last_seq(input=lstm_last[0]),
        paddle.layer.first_seq(input=lstm_last[1]),
    ])
C
caoying03 已提交
72 73

    lstm_outs = paddle.layer.concat(
C
caoying03 已提交
74
        input=lstm_last,
C
caoying03 已提交
75 76
        layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=drop_rate))
    return final_states, lstm_outs
C
caoying03 已提交
77 78 79


def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False):
C
caoying03 已提交
80 81
    """This is a LSTM implemended by nested recurrent_group.

C
caoying03 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94
    Paragraph is a nature nested sequence:
    1. each paragraph is a sequence of sentence.
    2. each sentence is a sequence of words.

    This function ueses the nested recurrent_group to implement LSTM.
    1. The outer group iterates over sentence in a paragraph.
    2. The inner group iterates over words in a sentence.
    3. A LSTM is used to encode sentence, its final outputs is used to
       initialize memory of the LSTM that is used to encode the next sentence.
    4. Parameters are shared among these sentence-encoding LSTMs.
    5. Consequently, this function is just equivalent to concatenate all
       sentences in a paragraph into one (long) sentence, and use one LSTM to
       encode this new long sentence.
C
caoying03 已提交
95 96 97 98 99 100 101 102

    Arguments:
        - input_layer:    The input layer to the bi-directional LSTM.
        - hidden_dim:     The dimension of the hidden state of the LSTM.
        - name:           The name of the bi-directional LSTM.
        - reverse:        The boolean parameter indicating whether to prcess
                          the input sequence by the reverse order.
    """
C
caoying03 已提交
103 104 105 106 107 108 109 110 111 112

    def lstm_outer_step(lstm_group_input, hidden_dim, reverse, name=''):
        outer_memory = paddle.layer.memory(
            name="__inner_%s_last__" % name, size=hidden_dim)

        def lstm_inner_step(input_layer, hidden_dim, reverse, name):
            inner_memory = paddle.layer.memory(
                name="__inner_state_%s__" % name,
                size=hidden_dim,
                boot_layer=outer_memory)
113 114 115
            input_proj = paddle.layer.fc(size=hidden_dim * 4,
                                         bias_attr=False,
                                         input=input_layer)
C
caoying03 已提交
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
            return paddle.networks.lstmemory_unit(
                input=input_proj,
                name="__inner_state_%s__" % name,
                out_memory=inner_memory,
                size=hidden_dim,
                act=paddle.activation.Tanh(),
                gate_act=paddle.activation.Sigmoid(),
                state_act=paddle.activation.Tanh())

        inner_out = paddle.layer.recurrent_group(
            name="__inner_%s__" % name,
            step=lstm_inner_step,
            reverse=reverse,
            input=[lstm_group_input, hidden_dim, reverse, name])

        if reverse:
            inner_last_output = paddle.layer.first_seq(
                input=inner_out,
                name="__inner_%s_last__" % name,
C
caoying03 已提交
135
                agg_level=paddle.layer.AggregateLevel.TO_NO_SEQUENCE)
C
caoying03 已提交
136 137 138 139
        else:
            inner_last_output = paddle.layer.last_seq(
                input=inner_out,
                name="__inner_%s_last__" % name,
C
caoying03 已提交
140
                agg_level=paddle.layer.AggregateLevel.TO_NO_SEQUENCE)
C
caoying03 已提交
141 142 143 144 145 146 147 148 149 150 151 152
        return inner_out

    return paddle.layer.recurrent_group(
        input=[
            paddle.layer.SubsequenceInput(input_layer), hidden_dim, reverse,
            name
        ],
        step=lstm_outer_step,
        name="__outter_%s__" % name,
        reverse=reverse)


C
caoying03 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
def stacked_bidirectional_lstm_by_nested_seq(input_layer,
                                             depth,
                                             hidden_dim,
                                             prefix=""):
    """ The stacked bi-directional LSTM to process a nested sequence.

    The modules defined in this function is exactly equivalent to
    that defined in stacked_bidirectional_lstm, the only difference is the
    bi-directional LSTM defined in this function implemented by recurrent_group
    in PaddlePaddle, and receive a nested sequence as its input.

    Arguments:
        - inputs:      The input layer to the bi-directional LSTM.
        - hidden_dim:  The dimension of the hidden state of the LSTM.
        - depth:       Depth of the stacked bi-directional LSTM.
        - prefix:      A string which will be appended to name of each layer
                       created in this function. Each layer in a network should
                       has a unique name. The prefix makes this fucntion can be
                       called multiple times.
    """

C
caoying03 已提交
174 175 176 177 178 179 180 181 182 183
    lstm_final_outs = []
    for dirt in ["fwd", "bwd"]:
        for i in range(depth):
            lstm_out = lstm_by_nested_sequence(
                input_layer=(lstm_out if i else input_layer),
                hidden_dim=hidden_dim,
                name="__%s_%s_%02d__" % (prefix, dirt, i),
                reverse=(dirt == "bwd"))
        lstm_final_outs.append(lstm_out)
    return paddle.layer.concat(input=lstm_final_outs)