ngram.py 2.4 KB
Newer Older
Z
zhangruiqing01 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.trainer_config_helpers import *

import math

#################### Data Configure ####################
Z
zhangruiqing01 已提交
20 21 22 23
args = {
    'srcText': 'data/simple-examples/data/ptb.train.txt',
    'dictfile': 'data/vocabulary.txt'
}
Z
zhangruiqing01 已提交
24 25 26 27 28 29 30 31
define_py_data_sources2(
    train_list="data/train.list",
    test_list="data/test.list",
    module="dataprovider",
    obj="process",
    args=args)

settings(
Z
zhangruiqing01 已提交
32
    batch_size=100, regularization=L2Regularization(8e-4), learning_rate=3e-3)
Z
zhangruiqing01 已提交
33 34 35 36 37

dictsize = 1953
embsize = 32
hiddensize = 256

Z
zhangruiqing01 已提交
38 39 40 41 42 43
firstword = data_layer(name="firstw", size=dictsize)
secondword = data_layer(name="secondw", size=dictsize)
thirdword = data_layer(name="thirdw", size=dictsize)
fourthword = data_layer(name="fourthw", size=dictsize)
nextword = data_layer(name="fifthw", size=dictsize)

Z
zhangruiqing01 已提交
44 45 46 47

# construct word embedding for each datalayer
def wordemb(inlayer):
    wordemb = table_projection(
Z
zhangruiqing01 已提交
48 49 50 51
        input=inlayer,
        size=embsize,
        param_attr=ParamAttr(
            name="_proj",
Z
zhangruiqing01 已提交
52
            initial_std=0.001,
Z
zhangruiqing01 已提交
53 54
            learning_rate=1,
            l2_rate=0, ))
Z
zhangruiqing01 已提交
55 56
    return wordemb

Z
zhangruiqing01 已提交
57

Z
zhangruiqing01 已提交
58 59 60 61 62 63
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)

# concatentate Ngram embeddings into context embedding
Z
zhangruiqing01 已提交
64
contextemb = concat_layer(input=[Efirst, Esecond, Ethird, Efourth])
Z
zhangruiqing01 已提交
65
hidden1 = fc_layer(
Z
zhangruiqing01 已提交
66 67 68 69 70 71 72
    input=contextemb,
    size=hiddensize,
    act=SigmoidActivation(),
    layer_attr=ExtraAttr(drop_rate=0.5),
    bias_attr=ParamAttr(learning_rate=2),
    param_attr=ParamAttr(
        initial_std=1. / math.sqrt(embsize * 8), learning_rate=1))
Z
zhangruiqing01 已提交
73 74 75

# use context embedding to predict nextword
predictword = fc_layer(
Z
zhangruiqing01 已提交
76 77 78 79
    input=hidden1,
    size=dictsize,
    bias_attr=ParamAttr(learning_rate=2),
    act=SoftmaxActivation())
Z
zhangruiqing01 已提交
80

Z
zhangruiqing01 已提交
81
cost = classification_cost(input=predictword, label=nextword)
Z
zhangruiqing01 已提交
82 83 84

# network input and output
outputs(cost)