model.py 10.2 KB
Newer Older
M
add gnn  
malin10 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
T
tangwei 已提交
16 17
import numpy as np

M
add gnn  
malin10 已提交
18 19 20
import paddle.fluid as fluid
import paddle.fluid.layers as layers

21
from paddlerec.core.utils import envs
C
Chengmo 已提交
22
from paddlerec.core.model import ModelBase
M
gnn  
malin10 已提交
23
from paddlerec.core.metrics import RecallK
M
add gnn  
malin10 已提交
24 25 26 27 28


class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)
T
for mat  
tangwei 已提交
29

M
malin10 已提交
30
    def _init_hyper_parameters(self):
M
malin10 已提交
31 32 33 34 35 36 37 38 39
        self.learning_rate = envs.get_global_env(
            "hyper_parameters.optimizer.learning_rate")
        self.decay_steps = envs.get_global_env(
            "hyper_parameters.optimizer.decay_steps")
        self.decay_rate = envs.get_global_env(
            "hyper_parameters.optimizer.decay_rate")
        self.l2 = envs.get_global_env("hyper_parameters.optimizer.l2")

        self.dict_size = envs.get_global_env(
M
malin10 已提交
40
            "hyper_parameters.sparse_feature_number")
M
malin10 已提交
41 42
        self.corpus_size = envs.get_global_env("hyper_parameters.corpus_size")

M
malin10 已提交
43 44 45 46
        self.train_batch_size = envs.get_global_env(
            "dataset.dataset_train.batch_size")
        self.evaluate_batch_size = envs.get_global_env(
            "dataset.dataset_infer.batch_size")
M
malin10 已提交
47

M
malin10 已提交
48 49 50 51
        self.hidden_size = envs.get_global_env(
            "hyper_parameters.sparse_feature_dim")
        self.step = envs.get_global_env(
            "hyper_parameters.gnn_propogation_steps")
M
malin10 已提交
52 53 54 55 56 57 58

    def input_data(self, is_infer=False, **kwargs):
        if is_infer:
            bs = self.evaluate_batch_size
        else:
            bs = self.train_batch_size
        items = fluid.data(
T
tangwei 已提交
59
            name="items", shape=[bs, -1],
T
for mat  
tangwei 已提交
60
            dtype="int64")  # [batch_size, uniq_max]
M
malin10 已提交
61
        seq_index = fluid.data(
T
tangwei 已提交
62
            name="seq_index", shape=[bs, -1, 2],
T
for mat  
tangwei 已提交
63
            dtype="int32")  # [batch_size, seq_max, 2]
M
malin10 已提交
64
        last_index = fluid.data(
T
tangwei 已提交
65
            name="last_index", shape=[bs, 2], dtype="int32")  # [batch_size, 2]
M
malin10 已提交
66
        adj_in = fluid.data(
T
tangwei 已提交
67
            name="adj_in", shape=[bs, -1, -1],
T
for mat  
tangwei 已提交
68
            dtype="float32")  # [batch_size, seq_max, seq_max]
M
malin10 已提交
69
        adj_out = fluid.data(
T
tangwei 已提交
70
            name="adj_out", shape=[bs, -1, -1],
T
for mat  
tangwei 已提交
71
            dtype="float32")  # [batch_size, seq_max, seq_max]
M
malin10 已提交
72
        mask = fluid.data(
T
tangwei 已提交
73
            name="mask", shape=[bs, -1, 1],
T
for mat  
tangwei 已提交
74
            dtype="float32")  # [batch_size, seq_max, 1]
M
malin10 已提交
75
        label = fluid.data(
T
tangwei 已提交
76
            name="label", shape=[bs, 1], dtype="int64")  # [batch_size, 1]
M
add gnn  
malin10 已提交
77

M
malin10 已提交
78
        res = [items, seq_index, last_index, adj_in, adj_out, mask, label]
M
add gnn  
malin10 已提交
79
        return res
T
for mat  
tangwei 已提交
80

M
malin10 已提交
81 82 83 84 85
    def net(self, inputs, is_infer=False):
        if is_infer:
            bs = self.evaluate_batch_size
        else:
            bs = self.train_batch_size
M
malin10 已提交
86

M
malin10 已提交
87
        stdv = 1.0 / math.sqrt(self.hidden_size)
M
add gnn  
malin10 已提交
88

T
tangwei 已提交
89 90 91 92
        def embedding_layer(input,
                            table_name,
                            emb_dim,
                            initializer_instance=None):
M
add gnn  
malin10 已提交
93 94
            emb = fluid.embedding(
                input=input,
M
malin10 已提交
95
                size=[self.dict_size, emb_dim],
M
add gnn  
malin10 已提交
96
                param_attr=fluid.ParamAttr(
M
malin10 已提交
97
                    name=table_name, initializer=initializer_instance))
T
for mat  
tangwei 已提交
98 99 100
            return emb

        sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
M
malin10 已提交
101
        items_emb = embedding_layer(inputs[0], "emb", self.hidden_size,
T
tangwei 已提交
102
                                    sparse_initializer)
M
add gnn  
malin10 已提交
103
        pre_state = items_emb
M
malin10 已提交
104
        for i in range(self.step):
T
tangwei 已提交
105
            pre_state = layers.reshape(
M
malin10 已提交
106
                x=pre_state, shape=[bs, -1, self.hidden_size])
M
add gnn  
malin10 已提交
107 108 109
            state_in = layers.fc(
                input=pre_state,
                name="state_in",
M
malin10 已提交
110
                size=self.hidden_size,
M
add gnn  
malin10 已提交
111 112
                act=None,
                num_flatten_dims=2,
T
tangwei 已提交
113 114 115 116 117 118
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]
M
add gnn  
malin10 已提交
119 120 121
            state_out = layers.fc(
                input=pre_state,
                name="state_out",
M
malin10 已提交
122
                size=self.hidden_size,
M
add gnn  
malin10 已提交
123 124
                act=None,
                num_flatten_dims=2,
T
tangwei 已提交
125 126 127 128 129 130
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]
T
for mat  
tangwei 已提交
131

M
malin10 已提交
132
            state_adj_in = layers.matmul(inputs[3],
T
tangwei 已提交
133
                                         state_in)  # [batch_size, uniq_max, h]
M
malin10 已提交
134 135
            state_adj_out = layers.matmul(
                inputs[4], state_out)  # [batch_size, uniq_max, h]
T
for mat  
tangwei 已提交
136

M
add gnn  
malin10 已提交
137
            gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)
T
for mat  
tangwei 已提交
138

T
tangwei 已提交
139
            gru_input = layers.reshape(
M
malin10 已提交
140
                x=gru_input, shape=[-1, self.hidden_size * 2])
T
tangwei 已提交
141 142
            gru_fc = layers.fc(input=gru_input,
                               name="gru_fc",
M
malin10 已提交
143
                               size=3 * self.hidden_size,
T
tangwei 已提交
144
                               bias_attr=False)
M
add gnn  
malin10 已提交
145 146
            pre_state, _, _ = fluid.layers.gru_unit(
                input=gru_fc,
T
tangwei 已提交
147
                hidden=layers.reshape(
M
malin10 已提交
148 149
                    x=pre_state, shape=[-1, self.hidden_size]),
                size=3 * self.hidden_size)
T
for mat  
tangwei 已提交
150

M
malin10 已提交
151 152
        final_state = layers.reshape(
            pre_state, shape=[bs, -1, self.hidden_size])
M
malin10 已提交
153 154
        seq = layers.gather_nd(final_state, inputs[1])
        last = layers.gather_nd(final_state, inputs[2])
T
for mat  
tangwei 已提交
155

M
add gnn  
malin10 已提交
156 157 158
        seq_fc = layers.fc(
            input=seq,
            name="seq_fc",
M
malin10 已提交
159
            size=self.hidden_size,
M
add gnn  
malin10 已提交
160 161 162
            bias_attr=False,
            act=None,
            num_flatten_dims=2,
T
tangwei 已提交
163 164 165 166
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, seq_max, h]
        last_fc = layers.fc(input=last,
                            name="last_fc",
M
malin10 已提交
167
                            size=self.hidden_size,
T
tangwei 已提交
168 169 170 171 172 173
                            bias_attr=False,
                            act=None,
                            num_flatten_dims=1,
                            param_attr=fluid.ParamAttr(
                                initializer=fluid.initializer.Uniform(
                                    low=-stdv, high=stdv)))  # [bathc_size, h]
T
for mat  
tangwei 已提交
174

M
add gnn  
malin10 已提交
175
        seq_fc_t = layers.transpose(
T
for mat  
tangwei 已提交
176
            seq_fc, perm=[1, 0, 2])  # [seq_max, batch_size, h]
T
tangwei 已提交
177 178
        add = layers.elementwise_add(seq_fc_t,
                                     last_fc)  # [seq_max, batch_size, h]
M
add gnn  
malin10 已提交
179
        b = layers.create_parameter(
M
malin10 已提交
180
            shape=[self.hidden_size],
M
add gnn  
malin10 已提交
181
            dtype='float32',
T
for mat  
tangwei 已提交
182 183 184 185
            default_initializer=fluid.initializer.Constant(value=0.0))  # [h]
        add = layers.elementwise_add(add, b)  # [seq_max, batch_size, h]

        add_sigmoid = layers.sigmoid(add)  # [seq_max, batch_size, h]
M
add gnn  
malin10 已提交
186
        add_sigmoid = layers.transpose(
T
for mat  
tangwei 已提交
187 188
            add_sigmoid, perm=[1, 0, 2])  # [batch_size, seq_max, h]

M
add gnn  
malin10 已提交
189 190 191 192 193 194 195
        weight = layers.fc(
            input=add_sigmoid,
            name="weight_fc",
            size=1,
            act=None,
            num_flatten_dims=2,
            bias_attr=False,
T
tangwei 已提交
196 197
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, seq_max, 1]
M
malin10 已提交
198
        weight *= inputs[5]
T
tangwei 已提交
199 200 201 202
        weight_mask = layers.elementwise_mul(
            seq, weight, axis=0)  # [batch_size, seq_max, h]
        global_attention = layers.reduce_sum(
            weight_mask, dim=1)  # [batch_size, h]
T
for mat  
tangwei 已提交
203

M
add gnn  
malin10 已提交
204
        final_attention = layers.concat(
T
for mat  
tangwei 已提交
205
            [global_attention, last], axis=1)  # [batch_size, 2*h]
M
add gnn  
malin10 已提交
206 207 208
        final_attention_fc = layers.fc(
            input=final_attention,
            name="final_attention_fc",
M
malin10 已提交
209
            size=self.hidden_size,
M
add gnn  
malin10 已提交
210 211 212
            bias_attr=False,
            act=None,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
213 214 215 216 217 218 219 220
                low=-stdv, high=stdv)))  # [batch_size, h]

        # all_vocab = layers.create_global_var(
        #     shape=[items_num - 1],
        #     value=0,
        #     dtype="int64",
        #     persistable=True,
        #     name="all_vocab")
M
malin10 已提交
221
        all_vocab = np.arange(1, self.dict_size).reshape((-1)).astype('int32')
T
tangwei 已提交
222 223
        all_vocab = fluid.layers.cast(
            x=fluid.layers.assign(all_vocab), dtype='int64')
M
add gnn  
malin10 已提交
224 225 226 227 228 229 230

        all_emb = fluid.embedding(
            input=all_vocab,
            param_attr=fluid.ParamAttr(
                name="emb",
                initializer=fluid.initializer.Uniform(
                    low=-stdv, high=stdv)),
M
malin10 已提交
231
            size=[self.dict_size, self.hidden_size])  # [all_vocab, h]
T
for mat  
tangwei 已提交
232

M
add gnn  
malin10 已提交
233 234
        logits = layers.matmul(
            x=final_attention_fc, y=all_emb,
T
for mat  
tangwei 已提交
235
            transpose_y=True)  # [batch_size, all_vocab]
M
add gnn  
malin10 已提交
236
        softmax = layers.softmax_with_cross_entropy(
M
malin10 已提交
237
            logits=logits, label=inputs[6])  # [batch_size, 1]
M
add gnn  
malin10 已提交
238
        self.loss = layers.reduce_mean(softmax)  # [1]
M
gnn  
malin10 已提交
239
        acc = RecallK(input=logits, label=inputs[6], k=20)
M
malin10 已提交
240
        self._cost = self.loss
M
gnn  
malin10 已提交
241

M
malin10 已提交
242
        if is_infer:
M
gnn  
malin10 已提交
243 244
            self._infer_results['P@20'] = acc
            self._infer_results['LOSS'] = self.loss
M
malin10 已提交
245
            return
M
add gnn  
malin10 已提交
246

M
malin10 已提交
247
        self._metrics["LOSS"] = self.loss
M
gnn  
malin10 已提交
248
        self._metrics["Train_P@20"] = acc
M
add gnn  
malin10 已提交
249 250

    def optimizer(self):
M
malin10 已提交
251
        step_per_epoch = self.corpus_size // self.train_batch_size
T
for mat  
tangwei 已提交
252
        optimizer = fluid.optimizer.Adam(
M
add gnn  
malin10 已提交
253
            learning_rate=fluid.layers.exponential_decay(
M
malin10 已提交
254 255 256
                learning_rate=self.learning_rate,
                decay_steps=self.decay_steps * step_per_epoch,
                decay_rate=self.decay_rate),
M
add gnn  
malin10 已提交
257
            regularization=fluid.regularizer.L2DecayRegularizer(
M
malin10 已提交
258
                regularization_coeff=self.l2))
T
for mat  
tangwei 已提交
259
        return optimizer