model.py 4.6 KB
Newer Older
F
frankwhzhang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
import paddle.fluid as fluid

from paddlerec.core.utils import envs
C
Chengmo 已提交
19
from paddlerec.core.model import ModelBase
F
frankwhzhang 已提交
20 21 22 23 24 25 26
import numpy as np


class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)

F
fix ncf  
frankwhzhang 已提交
27 28 29 30
    def _init_hyper_parameters(self):
        self.num_users = envs.get_global_env("hyper_parameters.num_users")
        self.num_items = envs.get_global_env("hyper_parameters.num_items")
        self.latent_dim = envs.get_global_env("hyper_parameters.latent_dim")
F
frankwhzhang 已提交
31
        self.layers = envs.get_global_env("hyper_parameters.fc_layers")
F
fix ncf  
frankwhzhang 已提交
32 33

    def input_data(self, is_infer=False, **kwargs):
T
tangwei 已提交
34 35 36 37 38 39
        user_input = fluid.data(
            name="user_input", shape=[-1, 1], dtype="int64", lod_level=0)
        item_input = fluid.data(
            name="item_input", shape=[-1, 1], dtype="int64", lod_level=0)
        label = fluid.data(
            name="label", shape=[-1, 1], dtype="int64", lod_level=0)
F
frankwhzhang 已提交
40 41 42 43 44 45
        if is_infer:
            inputs = [user_input] + [item_input]
        else:
            inputs = [user_input] + [item_input] + [label]

        return inputs
T
tangwei 已提交
46

F
frankwhzhang 已提交
47 48
    def net(self, inputs, is_infer=False):

C
Chengmo 已提交
49
        num_layer = len(self.layers)  # Number of layers in the MLP
T
tangwei 已提交
50 51 52

        MF_Embedding_User = fluid.embedding(
            input=inputs[0],
F
fix ncf  
frankwhzhang 已提交
53
            size=[self.num_users, self.latent_dim],
T
tangwei 已提交
54 55 56 57 58
            param_attr=fluid.initializer.Normal(
                loc=0.0, scale=0.01),
            is_sparse=True)
        MF_Embedding_Item = fluid.embedding(
            input=inputs[1],
F
fix ncf  
frankwhzhang 已提交
59
            size=[self.num_items, self.latent_dim],
T
tangwei 已提交
60 61 62 63 64 65
            param_attr=fluid.initializer.Normal(
                loc=0.0, scale=0.01),
            is_sparse=True)

        MLP_Embedding_User = fluid.embedding(
            input=inputs[0],
F
fix ncf  
frankwhzhang 已提交
66
            size=[self.num_users, int(self.layers[0] / 2)],
T
tangwei 已提交
67 68 69 70 71
            param_attr=fluid.initializer.Normal(
                loc=0.0, scale=0.01),
            is_sparse=True)
        MLP_Embedding_Item = fluid.embedding(
            input=inputs[1],
F
fix ncf  
frankwhzhang 已提交
72
            size=[self.num_items, int(self.layers[0] / 2)],
T
tangwei 已提交
73 74 75 76
            param_attr=fluid.initializer.Normal(
                loc=0.0, scale=0.01),
            is_sparse=True)

F
frankwhzhang 已提交
77 78 79
        # MF part
        mf_user_latent = fluid.layers.flatten(x=MF_Embedding_User, axis=1)
        mf_item_latent = fluid.layers.flatten(x=MF_Embedding_Item, axis=1)
T
tangwei 已提交
80 81 82
        mf_vector = fluid.layers.elementwise_mul(mf_user_latent,
                                                 mf_item_latent)

C
Chengmo 已提交
83
        # MLP part
F
frankwhzhang 已提交
84 85 86
        # The 0-th layer is the concatenation of embedding layers
        mlp_user_latent = fluid.layers.flatten(x=MLP_Embedding_User, axis=1)
        mlp_item_latent = fluid.layers.flatten(x=MLP_Embedding_Item, axis=1)
T
tangwei 已提交
87 88 89
        mlp_vector = fluid.layers.concat(
            input=[mlp_user_latent, mlp_item_latent], axis=-1)

F
frankwhzhang 已提交
90
        for i in range(1, num_layer):
T
tangwei 已提交
91 92
            mlp_vector = fluid.layers.fc(
                input=mlp_vector,
F
fix ncf  
frankwhzhang 已提交
93
                size=self.layers[i],
T
tangwei 已提交
94 95 96 97 98 99 100 101
                act='relu',
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.TruncatedNormal(
                        loc=0.0, scale=1.0 / math.sqrt(mlp_vector.shape[1])),
                    regularizer=fluid.regularizer.L2DecayRegularizer(
                        regularization_coeff=1e-4)),
                name='layer_' + str(i))

F
frankwhzhang 已提交
102
        # Concatenate MF and MLP parts
T
tangwei 已提交
103 104
        predict_vector = fluid.layers.concat(
            input=[mf_vector, mlp_vector], axis=-1)
F
frankwhzhang 已提交
105 106

        # Final prediction layer
T
tangwei 已提交
107 108 109 110 111 112
        prediction = fluid.layers.fc(
            input=predict_vector,
            size=1,
            act='sigmoid',
            param_attr=fluid.initializer.MSRAInitializer(uniform=True),
            name='prediction')
F
frankwhzhang 已提交
113 114 115
        if is_infer:
            self._infer_results["prediction"] = prediction
            return
T
tangwei 已提交
116 117 118 119 120

        cost = fluid.layers.log_loss(
            input=prediction,
            label=fluid.layers.cast(
                x=inputs[2], dtype='float32'))
F
frankwhzhang 已提交
121 122 123 124
        avg_cost = fluid.layers.mean(cost)

        self._cost = avg_cost
        self._metrics["cost"] = avg_cost