model_v2.py 3.5 KB
Newer Older
D
dangqingqing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
import paddle.v2 as paddle


def db_lstm(word_dict_len, label_dict_len, pred_len):
    mark_dict_len = 2
    word_dim = 32
    mark_dim = 5
    hidden_dim = 512
    depth = 8

    #8 features
    word = paddle.layer.data(name='word_data', size=word_dict_len)
    predicate = paddle.layer.data(name='verb_data', size=pred_len)

    ctx_n2 = paddle.layer.data(name='ctx_n2_data', size=word_dict_len)
    ctx_n1 = paddle.layer.data(name='ctx_n1_data', size=word_dict_len)
    ctx_0 = paddle.layer.data(name='ctx_0_data', size=word_dict_len)
    ctx_p1 = paddle.layer.data(name='ctx_p1_data', size=word_dict_len)
    ctx_p2 = paddle.layer.data(name='ctx_p2_data', size=word_dict_len)
    mark = paddle.layer.data(name='mark_data', size=mark_dict_len)

    default_std = 1 / math.sqrt(hidden_dim) / 3.0

    emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
    std_0 = paddle.attr.Param(initial_std=0.)
    std_default = paddle.attr.Param(initial_std=default_std)

    predicate_embedding = paddle.layer.embeding(
        size=word_dim,
        input=predicate,
        param_attr=paddle.attr.Param(
            name='vemb', initial_std=default_std))
    mark_embedding = paddle.layer.embeding(
        name='word_ctx-in_embedding',
        size=mark_dim,
        input=mark,
        param_attr=std_0)

    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
    emb_layers = [
        paddle.layer.embeding(
            size=word_dim, input=x, param_attr=emb_para) for x in word_input
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)

    hidden_0 = paddle.layer.mixed(
        size=hidden_dim,
        bias_attr=std_default,
        input=[
            paddle.layer.full_matrix_projection(
                input=emb, param_attr=std_default) for emb in emb_layers
        ])

    mix_hidden_lr = 1e-3
    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
    hidden_para_attr = paddle.attr.Param(
        initial_std=default_std, learning_rate=mix_hidden_lr)

    lstm_0 = paddle.layer.lstmemory(
        input=hidden_0,
        act=paddle.activation.Relu(),
        gate_act=paddle.activation.Sigmoid(),
        state_act=paddle.activation.Sigmoid(),
        bias_attr=std_0,
        param_attr=lstm_para_attr)

    #stack L-LSTM and R-LSTM with direct edges
    input_tmp = [hidden_0, lstm_0]

    for i in range(1, depth):
        mix_hidden = paddle.layer.mixed(
            size=hidden_dim,
            bias_attr=std_default,
            input=[
                paddle.layer.full_matrix_projection(
                    input=input_tmp[0], param_attr=hidden_para_attr),
                paddle.layer.full_matrix_projection(
                    input=input_tmp[1], param_attr=lstm_para_attr)
            ])

        lstm = paddle.layer.lstmemory(
            input=mix_hidden,
            act=paddle.activation.Relu(),
            gate_act=paddle.activation.Sigmoid(),
            state_act=paddle.activation.Sigmoid(),
            reverse=((i % 2) == 1),
            bias_attr=std_0,
            param_attr=lstm_para_attr)

        input_tmp = [mix_hidden, lstm]

    feature_out = paddle.layer.mixed(
        size=label_dict_len,
        bias_attr=std_default,
        input=[
            paddle.layer.full_matrix_projection(
                input=input_tmp[0], param_attr=hidden_para_attr),
            paddle.layer.full_matrix_projection(
                input=input_tmp[1], param_attr=lstm_para_attr)
        ], )

    return feature_out