network.py 7.4 KB
Newer Older
H
hetianjian 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

H
hetianjian 已提交
15 16 17 18 19 20 21
import paddle
import math
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers


H
hutuxian 已提交
22
def network(items_num, hidden_size, step):
H
hetianjian 已提交
23 24
    stdv = 1.0 / math.sqrt(hidden_size)

H
hutuxian 已提交
25
    items = layers.data(
H
hetianjian 已提交
26
        name="items",
H
hutuxian 已提交
27 28 29
        shape=[1, 1],
        dtype="int64") #[batch_size, uniq_max, 1]
    seq_index = layers.data(
H
hetianjian 已提交
30
        name="seq_index",
H
hutuxian 已提交
31 32 33
        shape=[1],
        dtype="int32") #[batch_size, seq_max]
    last_index = layers.data(
H
hetianjian 已提交
34
        name="last_index",
H
hutuxian 已提交
35 36 37
        shape=[1],
        dtype="int32") #[batch_size, 1]
    adj_in = layers.data(
H
hetianjian 已提交
38
        name="adj_in",
H
hutuxian 已提交
39
        shape=[1,1],
H
hutuxian 已提交
40
        dtype="float32") #[batch_size, seq_max, seq_max]
H
hutuxian 已提交
41
    adj_out = layers.data(
H
hetianjian 已提交
42
        name="adj_out",
H
hutuxian 已提交
43
        shape=[1,1],
H
hutuxian 已提交
44
        dtype="float32") #[batch_size, seq_max, seq_max]
H
hutuxian 已提交
45
    mask = layers.data(
H
hetianjian 已提交
46
        name="mask",
H
hutuxian 已提交
47
        shape=[1, 1],
H
hutuxian 已提交
48
        dtype="float32") #[batch_size, seq_max, 1]
H
hutuxian 已提交
49
    label = layers.data(
H
hetianjian 已提交
50
        name="label",
H
hutuxian 已提交
51
        shape=[1],
H
hutuxian 已提交
52
        dtype="int64") #[batch_size, 1]
H
hetianjian 已提交
53

54 55 56 57 58 59
    datas = [items, seq_index, last_index, adj_in, adj_out, mask, label]
    py_reader = fluid.layers.create_py_reader_by_data(
                    capacity=256, feed_list=datas, name='py_reader', use_double_buffer=True)
    feed_datas = fluid.layers.read_file(py_reader)
    items, seq_index, last_index, adj_in, adj_out, mask, label = feed_datas

H
hutuxian 已提交
60
    items_emb = layers.embedding(
H
hetianjian 已提交
61 62 63 64 65 66
        input=items,
        param_attr=fluid.ParamAttr(
            name="emb",
            initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
        size=[items_num, hidden_size])  #[batch_size, uniq_max, h]
H
hutuxian 已提交
67
    items_emb_shape = layers.shape(items_emb)
H
hetianjian 已提交
68 69 70

    pre_state = items_emb
    for i in range(step):
H
hutuxian 已提交
71 72
        pre_state = layers.reshape(
            x=pre_state, shape=[-1, 1, hidden_size], actual_shape=items_emb_shape)
H
hetianjian 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
        state_in = layers.fc(
            input=pre_state,
            name="state_in",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]
        state_out = layers.fc(
            input=pre_state,
            name="state_out",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]

H
hetianjian 已提交
94 95
        state_adj_in = layers.matmul(adj_in, state_in)  #[batch_size, uniq_max, h]
        state_adj_out = layers.matmul(adj_out, state_out)   #[batch_size, uniq_max, h]
H
hetianjian 已提交
96 97 98 99

        gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

        gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
H
hetianjian 已提交
100 101 102 103 104
        gru_fc = layers.fc(
            input=gru_input,
            name="gru_fc",
            size=3 * hidden_size,
            bias_attr=False)
H
hetianjian 已提交
105 106
        pre_state, _, _ = fluid.layers.gru_unit(
            input=gru_fc,
H
hutuxian 已提交
107 108
            hidden=layers.reshape(
                x=pre_state, shape=[-1, hidden_size]),
H
hetianjian 已提交
109 110
            size=3 * hidden_size)

H
hutuxian 已提交
111 112 113 114 115 116 117 118 119 120 121 122 123 124
    final_state = pre_state #[batch_size * uniq_max, h]

    seq_origin_shape = layers.assign(np.array([0,0,hidden_size-1]).astype("int32"))
    seq_origin_shape += layers.shape(layers.unsqueeze(seq_index,[2])) #value: [batch_size, seq_max, h]
    seq_origin_shape.stop_gradient = True

    seq_index = layers.reshape(seq_index, shape=[-1])
    seq = layers.gather(final_state, seq_index)  #[batch_size * seq_max, h]
    last = layers.gather(final_state, last_index)  #[batch_size, h]

    seq = layers.reshape(
        seq, shape=[-1, 1, hidden_size], actual_shape=seq_origin_shape)  #[batch_size, seq_max, h]
    last = layers.reshape(
        last, shape=[-1, hidden_size])  #[batch_size, h]
H
hetianjian 已提交
125 126 127 128 129 130 131 132

    seq_fc = layers.fc(
        input=seq,
        name="seq_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=2,
H
hetianjian 已提交
133 134
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(
H
hutuxian 已提交
135
            low=-stdv, high=stdv)))  #[batch_size, seq_max, h]
H
hetianjian 已提交
136 137 138 139 140 141 142 143 144 145 146 147
    last_fc = layers.fc(
        input=last,
        name="last_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=1,
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[bathc_size, h]

    seq_fc_t = layers.transpose(
H
hutuxian 已提交
148
        seq_fc, perm=[1, 0, 2])  #[seq_max, batch_size, h]
H
hetianjian 已提交
149
    add = layers.elementwise_add(
H
hutuxian 已提交
150
        seq_fc_t, last_fc)  #[seq_max, batch_size, h]
H
hetianjian 已提交
151 152 153 154
    b = layers.create_parameter(
        shape=[hidden_size],
        dtype='float32',
        default_initializer=fluid.initializer.Constant(value=0.0))  #[h]
H
hutuxian 已提交
155
    add = layers.elementwise_add(add, b)  #[seq_max, batch_size, h]
H
hetianjian 已提交
156

H
hutuxian 已提交
157
    add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h] 
H
hetianjian 已提交
158
    add_sigmoid = layers.transpose(
H
hutuxian 已提交
159
        add_sigmoid, perm=[1, 0, 2])  #[batch_size, seq_max, h]
H
hetianjian 已提交
160 161 162 163 164 165 166 167 168 169

    weight = layers.fc(
        input=add_sigmoid,
        name="weight_fc",
        size=1,
        act=None,
        num_flatten_dims=2,
        bias_attr=False,
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(
H
hutuxian 已提交
170
                low=-stdv, high=stdv)))  #[batch_size, seq_max, 1]
H
hetianjian 已提交
171
    weight *= mask
H
hutuxian 已提交
172 173
    weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h]
    global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h]
H
hetianjian 已提交
174 175

    final_attention = layers.concat(
176
        [global_attention, last], axis=1)  #[batch_size, 2*h]
H
hetianjian 已提交
177 178
    final_attention_fc = layers.fc(
        input=final_attention,
179
        name="final_attention_fc",
H
hetianjian 已提交
180 181 182 183 184 185 186
        size=hidden_size,
        bias_attr=False,
        act=None,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, h]

    all_vocab = layers.create_global_var(
H
hutuxian 已提交
187
        shape=[items_num - 1, 1],
H
hetianjian 已提交
188 189 190 191 192
        value=0,
        dtype="int64",
        persistable=True,
        name="all_vocab")

H
hutuxian 已提交
193
    all_emb = layers.embedding(
H
hetianjian 已提交
194 195 196 197 198 199 200 201 202 203 204 205 206 207
        input=all_vocab,
        param_attr=fluid.ParamAttr(
            name="emb",
            initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
        size=[items_num, hidden_size])  #[all_vocab, h]

    logits = layers.matmul(
        x=final_attention_fc, y=all_emb,
        transpose_y=True)  #[batch_size, all_vocab]
    softmax = layers.softmax_with_cross_entropy(
        logits=logits, label=label)  #[batch_size, 1]
    loss = layers.reduce_mean(softmax)  # [1]
    acc = layers.accuracy(input=logits, label=label, k=20)
208
    return loss, acc, py_reader, feed_datas