network_conf.py 2.2 KB
Newer Older
C
caoying03 已提交
1
import math
C
caoying03 已提交
2

C
caoying03 已提交
3
import paddle.v2 as paddle
4
from paddle.v2.layer import parse_network
C
caoying03 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23


def ngram_lm(hidden_size, embed_size, dict_size, gram_num=4, is_train=True):
    emb_layers = []
    embed_param_attr = paddle.attr.Param(
        name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)
    for i in range(gram_num):
        word = paddle.layer.data(
            name="__word%02d__" % (i),
            type=paddle.data_type.integer_value(dict_size))
        emb_layers.append(
            paddle.layer.embedding(
                input=word, size=embed_size, param_attr=embed_param_attr))

    target_word = paddle.layer.data(
        name="__target_word__", type=paddle.data_type.integer_value(dict_size))

    embed_context = paddle.layer.concat(input=emb_layers)

24 25 26 27 28 29 30 31 32
    hidden_layer = paddle.layer.fc(input=embed_context,
                                   size=hidden_size,
                                   act=paddle.activation.Sigmoid(),
                                   layer_attr=paddle.attr.Extra(drop_rate=0.5),
                                   bias_attr=paddle.attr.Param(learning_rate=2),
                                   param_attr=paddle.attr.Param(
                                       initial_std=1. /
                                       math.sqrt(embed_size * 8),
                                       learning_rate=1))
C
caoying03 已提交
33 34

    if is_train == True:
35
        return paddle.layer.hsigmoid(
C
caoying03 已提交
36 37
            input=hidden_layer,
            label=target_word,
C
caoying03 已提交
38
            num_classes=dict_size,
C
caoying03 已提交
39 40 41
            param_attr=paddle.attr.Param(name="sigmoid_w"),
            bias_attr=paddle.attr.Param(name="sigmoid_b"))
    else:
42
        return paddle.layer.mixed(
C
caoying03 已提交
43
            size=dict_size - 1,
44 45
            input=paddle.layer.trans_full_matrix_projection(
                hidden_layer, param_attr=paddle.attr.Param(name="sigmoid_w")),
C
caoying03 已提交
46
            act=paddle.activation.Sigmoid(),
47 48 49 50 51 52 53 54 55 56 57 58 59
            bias_attr=paddle.attr.Param(name="sigmoid_b"))


if __name__ == "__main__":
    # this is to test and debug the network topology defination.
    # please set the hyper-parameters as needed.
    print(parse_network(
        ngram_lm(
            hidden_size=512,
            embed_size=512,
            dict_size=1024,
            gram_num=4,
            is_train=False)))