model.py 7.2 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
import paddle.fluid as fluid

17
from paddlerec.core.utils import envs
C
Chengmo 已提交
18
from paddlerec.core.model import ModelBase
19 20 21 22 23 24


class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
    def _init_hyper_parameters(self):
        self.sparse_feature_number = envs.get_global_env(
            "hyper_parameters.sparse_feature_number", None)
        self.sparse_feature_dim = envs.get_global_env(
            "hyper_parameters.sparse_feature_dim", None)
        self.num_field = envs.get_global_env("hyper_parameters.num_field",
                                             None)
        self.layer_sizes_cin = envs.get_global_env(
            "hyper_parameters.layer_sizes_cin", None)
        self.layer_sizes_dnn = envs.get_global_env(
            "hyper_parameters.layer_sizes_dnn", None)
        self.act = envs.get_global_env("hyper_parameters.act", None)

    def net(self, inputs, is_infer=False):
        raw_feat_idx = self._sparse_data_var[1]
        raw_feat_value = self._dense_data_var[0]
        self.label = self._sparse_data_var[0]

43 44 45
        init_value_ = 0.1
        initer = fluid.initializer.TruncatedNormalInitializer(
            loc=0.0, scale=init_value_)
T
for mat  
tangwei 已提交
46

47
        is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
T
for mat  
tangwei 已提交
48

49
        # ------------------------- network input --------------------------
T
for mat  
tangwei 已提交
50

X
xujiaqi01 已提交
51
        feat_idx = raw_feat_idx
T
tangwei 已提交
52
        feat_value = fluid.layers.reshape(
53
            raw_feat_value, [-1, self.num_field, 1])  # None * num_field * 1
54 55 56 57 58

        feat_embeddings = fluid.embedding(
            input=feat_idx,
            is_sparse=True,
            dtype='float32',
59
            size=[self.sparse_feature_number + 1, self.sparse_feature_dim],
60 61
            padding_idx=0,
            param_attr=fluid.ParamAttr(initializer=initer))
T
tangwei 已提交
62
        feat_embeddings = fluid.layers.reshape(feat_embeddings, [
63
            -1, self.num_field, self.sparse_feature_dim
T
tangwei 已提交
64
        ])  # None * num_field * embedding_size
C
Chengmo 已提交
65 66
        # None * num_field * embedding_size
        feat_embeddings = feat_embeddings * feat_value
T
for mat  
tangwei 已提交
67

68 69 70 71 72 73
        # -------------------- linear  --------------------

        weights_linear = fluid.embedding(
            input=feat_idx,
            is_sparse=True,
            dtype='float32',
74
            size=[self.sparse_feature_number + 1, 1],
75 76 77
            padding_idx=0,
            param_attr=fluid.ParamAttr(initializer=initer))
        weights_linear = fluid.layers.reshape(
78
            weights_linear, [-1, self.num_field, 1])  # None * num_field * 1
79 80 81 82 83 84
        b_linear = fluid.layers.create_parameter(
            shape=[1],
            dtype='float32',
            default_initializer=fluid.initializer.ConstantInitializer(value=0))
        y_linear = fluid.layers.reduce_sum(
            (weights_linear * feat_value), 1) + b_linear
T
for mat  
tangwei 已提交
85

86 87 88
        # -------------------- CIN  --------------------

        Xs = [feat_embeddings]
89 90
        last_s = self.num_field
        for s in self.layer_sizes_cin:
91 92 93
            # calculate Z^(k+1) with X^k and X^0
            X_0 = fluid.layers.reshape(
                fluid.layers.transpose(Xs[0], [0, 2, 1]),
94
                [-1, self.sparse_feature_dim, self.num_field,
T
for mat  
tangwei 已提交
95
                 1])  # None, embedding_size, num_field, 1
96 97
            X_k = fluid.layers.reshape(
                fluid.layers.transpose(Xs[-1], [0, 2, 1]),
98
                [-1, self.sparse_feature_dim, 1,
T
tangwei 已提交
99
                 last_s])  # None, embedding_size, 1, last_s
100 101 102 103 104
            Z_k_1 = fluid.layers.matmul(
                X_0, X_k)  # None, embedding_size, num_field, last_s

            # compresses Z^(k+1) to X^(k+1)
            Z_k_1 = fluid.layers.reshape(Z_k_1, [
105
                -1, self.sparse_feature_dim, last_s * self.num_field
106 107 108 109
            ])  # None, embedding_size, last_s*num_field
            Z_k_1 = fluid.layers.transpose(
                Z_k_1, [0, 2, 1])  # None, s*num_field, embedding_size
            Z_k_1 = fluid.layers.reshape(
110 111
                Z_k_1,
                [-1, last_s * self.num_field, 1, self.sparse_feature_dim]
C
Chengmo 已提交
112
            )  # None, last_s*num_field, 1, embedding_size  (None, channal_in, h, w)
113 114 115 116 117 118 119 120 121
            X_k_1 = fluid.layers.conv2d(
                Z_k_1,
                num_filters=s,
                filter_size=(1, 1),
                act=None,
                bias_attr=False,
                param_attr=fluid.ParamAttr(
                    initializer=initer))  # None, s, 1, embedding_size
            X_k_1 = fluid.layers.reshape(
122 123
                X_k_1,
                [-1, s, self.sparse_feature_dim])  # None, s, embedding_size
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140

            Xs.append(X_k_1)
            last_s = s

        # sum pooling
        y_cin = fluid.layers.concat(Xs[1:],
                                    1)  # None, (num_field++), embedding_size
        y_cin = fluid.layers.reduce_sum(y_cin, -1)  # None, (num_field++)
        y_cin = fluid.layers.fc(input=y_cin,
                                size=1,
                                act=None,
                                param_attr=fluid.ParamAttr(initializer=initer),
                                bias_attr=None)
        y_cin = fluid.layers.reduce_sum(y_cin, dim=-1, keep_dim=True)

        # -------------------- DNN --------------------

141 142 143
        y_dnn = fluid.layers.reshape(
            feat_embeddings, [-1, self.num_field * self.sparse_feature_dim])
        for s in self.layer_sizes_dnn:
T
tangwei 已提交
144 145 146
            y_dnn = fluid.layers.fc(
                input=y_dnn,
                size=s,
147
                act=self.act,
T
tangwei 已提交
148 149
                param_attr=fluid.ParamAttr(initializer=initer),
                bias_attr=None)
150 151 152 153 154 155 156 157 158
        y_dnn = fluid.layers.fc(input=y_dnn,
                                size=1,
                                act=None,
                                param_attr=fluid.ParamAttr(initializer=initer),
                                bias_attr=None)

        # ------------------- xDeepFM ------------------

        self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn)
T
tangwei 已提交
159 160 161 162
        cost = fluid.layers.log_loss(
            input=self.predict,
            label=fluid.layers.cast(self.label, "float32"),
            epsilon=0.0000001)
163 164 165 166 167 168 169
        batch_cost = fluid.layers.reduce_mean(cost)
        self._cost = batch_cost

        # for auc
        predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
        label_int = fluid.layers.cast(self.label, 'int64')
        auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d,
T
for mat  
tangwei 已提交
170 171
                                                     label=label_int,
                                                     slide_steps=0)
172 173
        self._metrics["AUC"] = auc_var
        self._metrics["BATCH_AUC"] = batch_auc_var
174 175
        if is_infer:
            self._infer_results["AUC"] = auc_var