model.py 11.0 KB
Newer Older
M
add gnn  
malin10 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
T
tangwei 已提交
16 17
import numpy as np

M
add gnn  
malin10 已提交
18 19 20
import paddle.fluid as fluid
import paddle.fluid.layers as layers

21 22
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
M
add gnn  
malin10 已提交
23 24 25 26 27 28


class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)
        self.init_config()
T
for mat  
tangwei 已提交
29

M
add gnn  
malin10 已提交
30 31
    def init_config(self):
        self._fetch_interval = 1
T
for mat  
tangwei 已提交
32 33
        self.items_num, self.ins_num = self.config_read(
            envs.get_global_env("hyper_parameters.config_path", None, self._namespace))
M
add gnn  
malin10 已提交
34 35 36 37 38 39
        self.train_batch_size = envs.get_global_env("batch_size", None, "train.reader")
        self.evaluate_batch_size = envs.get_global_env("batch_size", None, "evaluate.reader")
        self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
        self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps", None, self._namespace)

    def config_read(self, config_path=None):
T
for mat  
tangwei 已提交
40 41
        if config_path is None:
            raise ValueError("please set train.model.hyper_parameters.config_path at first")
M
add gnn  
malin10 已提交
42 43 44 45 46 47 48 49 50
        with open(config_path, "r") as fin:
            item_nums = int(fin.readline().strip())
            ins_nums = int(fin.readline().strip())
        return item_nums, ins_nums

    def input(self, bs):
        self.items = fluid.data(
            name="items",
            shape=[bs, -1],
T
for mat  
tangwei 已提交
51
            dtype="int64")  # [batch_size, uniq_max]
M
add gnn  
malin10 已提交
52 53 54
        self.seq_index = fluid.data(
            name="seq_index",
            shape=[bs, -1, 2],
T
for mat  
tangwei 已提交
55
            dtype="int32")  # [batch_size, seq_max, 2]
M
add gnn  
malin10 已提交
56 57 58
        self.last_index = fluid.data(
            name="last_index",
            shape=[bs, 2],
T
for mat  
tangwei 已提交
59
            dtype="int32")  # [batch_size, 2]
M
add gnn  
malin10 已提交
60 61 62
        self.adj_in = fluid.data(
            name="adj_in",
            shape=[bs, -1, -1],
T
for mat  
tangwei 已提交
63
            dtype="float32")  # [batch_size, seq_max, seq_max]
M
add gnn  
malin10 已提交
64 65 66
        self.adj_out = fluid.data(
            name="adj_out",
            shape=[bs, -1, -1],
T
for mat  
tangwei 已提交
67
            dtype="float32")  # [batch_size, seq_max, seq_max]
M
add gnn  
malin10 已提交
68 69 70
        self.mask = fluid.data(
            name="mask",
            shape=[bs, -1, 1],
T
for mat  
tangwei 已提交
71
            dtype="float32")  # [batch_size, seq_max, 1]
M
add gnn  
malin10 已提交
72 73 74
        self.label = fluid.data(
            name="label",
            shape=[bs, 1],
T
for mat  
tangwei 已提交
75
            dtype="int64")  # [batch_size, 1]
M
add gnn  
malin10 已提交
76 77 78

        res = [self.items, self.seq_index, self.last_index, self.adj_in, self.adj_out, self.mask, self.label]
        return res
T
for mat  
tangwei 已提交
79

M
add gnn  
malin10 已提交
80 81 82 83
    def train_input(self):
        res = self.input(self.train_batch_size)
        self._data_var = res

T
for mat  
tangwei 已提交
84
        use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
M
add gnn  
malin10 已提交
85 86 87 88 89 90

        if self._platform != "LINUX" or use_dataloader:
            self._data_loader = fluid.io.DataLoader.from_generator(
                feed_list=self._data_var, capacity=256, use_double_buffer=False, iterable=False)

    def net(self, items_num, hidden_size, step, bs):
T
for mat  
tangwei 已提交
91
        stdv = 1.0 / math.sqrt(hidden_size)
M
add gnn  
malin10 已提交
92

T
for mat  
tangwei 已提交
93
        def embedding_layer(input, table_name, emb_dim, initializer_instance=None):
M
add gnn  
malin10 已提交
94 95 96 97 98 99 100
            emb = fluid.embedding(
                input=input,
                size=[items_num, emb_dim],
                param_attr=fluid.ParamAttr(
                    name=table_name,
                    initializer=initializer_instance),
            )
T
for mat  
tangwei 已提交
101 102 103 104
            return emb

        sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
        items_emb = embedding_layer(self.items, "emb", hidden_size, sparse_initializer)
M
add gnn  
malin10 已提交
105 106 107 108 109 110 111 112 113 114 115 116
        pre_state = items_emb
        for i in range(step):
            pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size])
            state_in = layers.fc(
                input=pre_state,
                name="state_in",
                size=hidden_size,
                act=None,
                num_flatten_dims=2,
                param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                    low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
117
                    low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]
M
add gnn  
malin10 已提交
118 119 120 121 122 123 124 125 126
            state_out = layers.fc(
                input=pre_state,
                name="state_out",
                size=hidden_size,
                act=None,
                num_flatten_dims=2,
                param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                    low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
127 128 129 130 131
                    low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]

            state_adj_in = layers.matmul(self.adj_in, state_in)  # [batch_size, uniq_max, h]
            state_adj_out = layers.matmul(self.adj_out, state_out)  # [batch_size, uniq_max, h]

M
add gnn  
malin10 已提交
132
            gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)
T
for mat  
tangwei 已提交
133

M
add gnn  
malin10 已提交
134 135 136 137 138 139 140 141 142 143
            gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
            gru_fc = layers.fc(
                input=gru_input,
                name="gru_fc",
                size=3 * hidden_size,
                bias_attr=False)
            pre_state, _, _ = fluid.layers.gru_unit(
                input=gru_fc,
                hidden=layers.reshape(x=pre_state, shape=[-1, hidden_size]),
                size=3 * hidden_size)
T
for mat  
tangwei 已提交
144

M
add gnn  
malin10 已提交
145 146 147
        final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
        seq = layers.gather_nd(final_state, self.seq_index)
        last = layers.gather_nd(final_state, self.last_index)
T
for mat  
tangwei 已提交
148

M
add gnn  
malin10 已提交
149 150 151 152 153 154 155 156 157
        seq_fc = layers.fc(
            input=seq,
            name="seq_fc",
            size=hidden_size,
            bias_attr=False,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
158
                    low=-stdv, high=stdv)))  # [batch_size, seq_max, h]
M
add gnn  
malin10 已提交
159 160 161 162 163 164 165 166 167
        last_fc = layers.fc(
            input=last,
            name="last_fc",
            size=hidden_size,
            bias_attr=False,
            act=None,
            num_flatten_dims=1,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
168 169
                    low=-stdv, high=stdv)))  # [bathc_size, h]

M
add gnn  
malin10 已提交
170
        seq_fc_t = layers.transpose(
T
for mat  
tangwei 已提交
171
            seq_fc, perm=[1, 0, 2])  # [seq_max, batch_size, h]
M
add gnn  
malin10 已提交
172
        add = layers.elementwise_add(
T
for mat  
tangwei 已提交
173
            seq_fc_t, last_fc)  # [seq_max, batch_size, h]
M
add gnn  
malin10 已提交
174 175 176
        b = layers.create_parameter(
            shape=[hidden_size],
            dtype='float32',
T
for mat  
tangwei 已提交
177 178 179 180
            default_initializer=fluid.initializer.Constant(value=0.0))  # [h]
        add = layers.elementwise_add(add, b)  # [seq_max, batch_size, h]

        add_sigmoid = layers.sigmoid(add)  # [seq_max, batch_size, h]
M
add gnn  
malin10 已提交
181
        add_sigmoid = layers.transpose(
T
for mat  
tangwei 已提交
182 183
            add_sigmoid, perm=[1, 0, 2])  # [batch_size, seq_max, h]

M
add gnn  
malin10 已提交
184 185 186 187 188 189 190 191 192
        weight = layers.fc(
            input=add_sigmoid,
            name="weight_fc",
            size=1,
            act=None,
            num_flatten_dims=2,
            bias_attr=False,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
193
                    low=-stdv, high=stdv)))  # [batch_size, seq_max, 1]
M
add gnn  
malin10 已提交
194
        weight *= self.mask
T
for mat  
tangwei 已提交
195 196 197
        weight_mask = layers.elementwise_mul(seq, weight, axis=0)  # [batch_size, seq_max, h]
        global_attention = layers.reduce_sum(weight_mask, dim=1)  # [batch_size, h]

M
add gnn  
malin10 已提交
198
        final_attention = layers.concat(
T
for mat  
tangwei 已提交
199
            [global_attention, last], axis=1)  # [batch_size, 2*h]
M
add gnn  
malin10 已提交
200 201 202 203 204 205 206
        final_attention_fc = layers.fc(
            input=final_attention,
            name="final_attention_fc",
            size=hidden_size,
            bias_attr=False,
            act=None,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
T
for mat  
tangwei 已提交
207 208 209 210 211 212 213 214
                low=-stdv, high=stdv)))  # [batch_size, h]

        # all_vocab = layers.create_global_var(
        #     shape=[items_num - 1],
        #     value=0,
        #     dtype="int64",
        #     persistable=True,
        #     name="all_vocab")
M
add gnn  
malin10 已提交
215 216 217 218 219 220 221 222 223
        all_vocab = np.arange(1, items_num).reshape((-1)).astype('int32')
        all_vocab = fluid.layers.cast(x=fluid.layers.assign(all_vocab), dtype='int64')

        all_emb = fluid.embedding(
            input=all_vocab,
            param_attr=fluid.ParamAttr(
                name="emb",
                initializer=fluid.initializer.Uniform(
                    low=-stdv, high=stdv)),
T
for mat  
tangwei 已提交
224 225
            size=[items_num, hidden_size])  # [all_vocab, h]

M
add gnn  
malin10 已提交
226 227
        logits = layers.matmul(
            x=final_attention_fc, y=all_emb,
T
for mat  
tangwei 已提交
228
            transpose_y=True)  # [batch_size, all_vocab]
M
add gnn  
malin10 已提交
229
        softmax = layers.softmax_with_cross_entropy(
T
for mat  
tangwei 已提交
230
            logits=logits, label=self.label)  # [batch_size, 1]
M
add gnn  
malin10 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
        self.loss = layers.reduce_mean(softmax)  # [1]
        self.acc = layers.accuracy(input=logits, label=self.label, k=20)

    def avg_loss(self):
        self._cost = self.loss

    def metrics(self):
        self._metrics["LOSS"] = self.loss
        self._metrics["train_acc"] = self.acc

    def train_net(self):
        self.train_input()
        self.net(self.items_num, self.hidden_size, self.step, self.train_batch_size)
        self.avg_loss()
        self.metrics()

    def optimizer(self):
        learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
        step_per_epoch = self.ins_num // self.train_batch_size
        decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None, self._namespace)
        decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None, self._namespace)
        l2 = envs.get_global_env("hyper_parameters.l2", None, self._namespace)
T
for mat  
tangwei 已提交
253
        optimizer = fluid.optimizer.Adam(
M
add gnn  
malin10 已提交
254 255 256 257 258 259 260
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=learning_rate,
                decay_steps=decay_steps * step_per_epoch,
                decay_rate=decay_rate),
            regularization=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=l2))

T
for mat  
tangwei 已提交
261
        return optimizer
M
add gnn  
malin10 已提交
262 263 264 265

    def infer_input(self):
        self._reader_namespace = "evaluate.reader"
        res = self.input(self.evaluate_batch_size)
T
for mat  
tangwei 已提交
266
        self._infer_data_var = res
M
add gnn  
malin10 已提交
267 268 269

        self._infer_data_loader = fluid.io.DataLoader.from_generator(
            feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
T
for mat  
tangwei 已提交
270

M
add gnn  
malin10 已提交
271
    def infer_net(self):
T
for mat  
tangwei 已提交
272 273
        self.infer_input()
        self.net(self.items_num, self.hidden_size, self.step, self.evaluate_batch_size)
M
add gnn  
malin10 已提交
274
        self._infer_results['acc'] = self.acc
T
for mat  
tangwei 已提交
275
        self._infer_results['loss'] = self.loss