model.py 11.4 KB
Newer Older
M
add w2v  
malin10 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import paddle.fluid as fluid

18 19
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
M
add w2v  
malin10 已提交
20 21 22 23 24 25 26


class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)

    def input(self):
T
tangwei 已提交
27 28 29 30 31 32 33
        neg_num = int(
            envs.get_global_env("hyper_parameters.neg_num", None,
                                self._namespace))
        self.input_word = fluid.data(
            name="input_word", shape=[None, 1], dtype='int64')
        self.true_word = fluid.data(
            name='true_label', shape=[None, 1], dtype='int64')
M
add w2v  
malin10 已提交
34 35
        self._data_var.append(self.input_word)
        self._data_var.append(self.true_word)
T
tangwei 已提交
36 37 38 39
        with_shuffle_batch = bool(
            int(
                envs.get_global_env("hyper_parameters.with_shuffle_batch",
                                    None, self._namespace)))
M
add w2v  
malin10 已提交
40
        if not with_shuffle_batch:
T
tangwei 已提交
41 42
            self.neg_word = fluid.data(
                name="neg_label", shape=[None, neg_num], dtype='int64')
M
add w2v  
malin10 已提交
43 44 45 46
            self._data_var.append(self.neg_word)

        if self._platform != "LINUX":
            self._data_loader = fluid.io.DataLoader.from_generator(
T
tangwei 已提交
47 48 49 50
                feed_list=self._data_var,
                capacity=64,
                use_double_buffer=False,
                iterable=False)
M
add w2v  
malin10 已提交
51 52 53

    def net(self):
        is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
T
tangwei 已提交
54 55 56
        neg_num = int(
            envs.get_global_env("hyper_parameters.neg_num", None,
                                self._namespace))
C
chengmo 已提交
57 58 59 60
        sparse_feature_number = envs.get_global_env(
            "hyper_parameters.sparse_feature_number", None, self._namespace)
        sparse_feature_dim = envs.get_global_env(
            "hyper_parameters.sparse_feature_dim", None, self._namespace)
T
tangwei 已提交
61 62 63 64
        with_shuffle_batch = bool(
            int(
                envs.get_global_env("hyper_parameters.with_shuffle_batch",
                                    None, self._namespace)))
M
add w2v  
malin10 已提交
65

T
tangwei 已提交
66 67 68 69 70
        def embedding_layer(input,
                            table_name,
                            emb_dim,
                            initializer_instance=None,
                            squeeze=False):
M
add w2v  
malin10 已提交
71 72 73 74 75 76
            emb = fluid.embedding(
                input=input,
                is_sparse=True,
                is_distributed=is_distributed,
                size=[sparse_feature_number, emb_dim],
                param_attr=fluid.ParamAttr(
T
tangwei 已提交
77
                    name=table_name, initializer=initializer_instance), )
M
add w2v  
malin10 已提交
78 79 80 81 82 83 84 85 86
            if squeeze:
                return fluid.layers.squeeze(input=emb, axes=[1])
            else:
                return emb

        init_width = 0.5 / sparse_feature_dim
        emb_initializer = fluid.initializer.Uniform(-init_width, init_width)
        emb_w_initializer = fluid.initializer.Constant(value=0.0)

T
tangwei 已提交
87 88 89 90 91 92 93
        input_emb = embedding_layer(self.input_word, "emb", sparse_feature_dim,
                                    emb_initializer, True)
        true_emb_w = embedding_layer(self.true_word, "emb_w",
                                     sparse_feature_dim, emb_w_initializer,
                                     True)
        true_emb_b = embedding_layer(self.true_word, "emb_b", 1,
                                     emb_w_initializer, True)
C
chengmo 已提交
94

M
add w2v  
malin10 已提交
95 96 97
        if with_shuffle_batch:
            neg_emb_w_list = []
            for i in range(neg_num):
T
tangwei 已提交
98 99 100
                neg_emb_w_list.append(
                    fluid.contrib.layers.shuffle_batch(
                        true_emb_w))  # shuffle true_word
C
chengmo 已提交
101 102 103 104
            neg_emb_w_concat = fluid.layers.concat(neg_emb_w_list, axis=0)
            neg_emb_w = fluid.layers.reshape(
                neg_emb_w_concat, shape=[-1, neg_num, sparse_feature_dim])

M
add w2v  
malin10 已提交
105 106
            neg_emb_b_list = []
            for i in range(neg_num):
T
tangwei 已提交
107 108 109
                neg_emb_b_list.append(
                    fluid.contrib.layers.shuffle_batch(
                        true_emb_b))  # shuffle true_word
M
add w2v  
malin10 已提交
110
            neg_emb_b = fluid.layers.concat(neg_emb_b_list, axis=0)
C
chengmo 已提交
111 112 113
            neg_emb_b_vec = fluid.layers.reshape(
                neg_emb_b, shape=[-1, neg_num])

M
add w2v  
malin10 已提交
114
        else:
T
tangwei 已提交
115 116 117 118
            neg_emb_w = embedding_layer(self.neg_word, "emb_w",
                                        sparse_feature_dim, emb_w_initializer)
            neg_emb_b = embedding_layer(self.neg_word, "emb_b", 1,
                                        emb_w_initializer)
C
chengmo 已提交
119 120 121
            neg_emb_b_vec = fluid.layers.reshape(
                neg_emb_b, shape=[-1, neg_num])

M
add w2v  
malin10 已提交
122 123 124 125 126 127 128 129
        true_logits = fluid.layers.elementwise_add(
            fluid.layers.reduce_sum(
                fluid.layers.elementwise_mul(input_emb, true_emb_w),
                dim=1,
                keep_dim=True),
            true_emb_b)

        input_emb_re = fluid.layers.reshape(
C
chengmo 已提交
130 131 132
            input_emb, shape=[-1, 1, sparse_feature_dim])
        neg_matmul = fluid.layers.matmul(
            input_emb_re, neg_emb_w, transpose_y=True)
M
add w2v  
malin10 已提交
133
        neg_logits = fluid.layers.elementwise_add(
T
tangwei 已提交
134 135
            fluid.layers.reshape(
                neg_matmul, shape=[-1, neg_num]),
M
add w2v  
malin10 已提交
136
            neg_emb_b_vec)
C
chengmo 已提交
137 138

        label_ones = fluid.layers.fill_constant_batch_size_like(
M
add w2v  
malin10 已提交
139 140 141
            true_logits, shape=[-1, 1], value=1.0, dtype='float32')
        label_zeros = fluid.layers.fill_constant_batch_size_like(
            true_logits, shape=[-1, neg_num], value=0.0, dtype='float32')
C
chengmo 已提交
142

M
add w2v  
malin10 已提交
143 144 145 146 147 148 149 150 151 152
        true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits,
                                                                   label_ones)
        neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(neg_logits,
                                                                  label_zeros)
        cost = fluid.layers.elementwise_add(
            fluid.layers.reduce_sum(
                true_xent, dim=1),
            fluid.layers.reduce_sum(
                neg_xent, dim=1))
        self.avg_cost = fluid.layers.reduce_mean(cost)
C
chengmo 已提交
153
        global_right_cnt = fluid.layers.create_global_var(
T
tangwei 已提交
154 155 156 157 158
            name="global_right_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
C
chengmo 已提交
159
        global_total_cnt = fluid.layers.create_global_var(
T
tangwei 已提交
160 161 162 163 164
            name="global_total_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
M
add w2v  
malin10 已提交
165
        global_right_cnt.stop_gradient = True
C
chengmo 已提交
166
        global_total_cnt.stop_gradient = True
M
add w2v  
malin10 已提交
167 168 169 170 171 172 173 174 175 176 177 178 179 180

    def avg_loss(self):
        self._cost = self.avg_cost

    def metrics(self):
        self._metrics["LOSS"] = self.avg_cost

    def train_net(self):
        self.input()
        self.net()
        self.avg_loss()
        self.metrics()

    def optimizer(self):
T
tangwei 已提交
181 182 183 184 185 186
        learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
                                            None, self._namespace)
        decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None,
                                          self._namespace)
        decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None,
                                         self._namespace)
M
add w2v  
malin10 已提交
187 188 189 190 191 192 193 194 195
        optimizer = fluid.optimizer.SGD(
            learning_rate=fluid.layers.exponential_decay(
                learning_rate=learning_rate,
                decay_steps=decay_steps,
                decay_rate=decay_rate,
                staircase=True))
        return optimizer

    def analogy_input(self):
C
chengmo 已提交
196 197 198 199 200 201 202 203 204 205
        sparse_feature_number = envs.get_global_env(
            "hyper_parameters.sparse_feature_number", None, self._namespace)
        self.analogy_a = fluid.data(
            name="analogy_a", shape=[None], dtype='int64')
        self.analogy_b = fluid.data(
            name="analogy_b", shape=[None], dtype='int64')
        self.analogy_c = fluid.data(
            name="analogy_c", shape=[None], dtype='int64')
        self.analogy_d = fluid.data(
            name="analogy_d", shape=[None], dtype='int64')
T
tangwei 已提交
206 207 208
        self._infer_data_var = [
            self.analogy_a, self.analogy_b, self.analogy_c, self.analogy_d
        ]
M
add w2v  
malin10 已提交
209 210

        self._infer_data_loader = fluid.io.DataLoader.from_generator(
T
tangwei 已提交
211 212 213 214
            feed_list=self._infer_data_var,
            capacity=64,
            use_double_buffer=False,
            iterable=False)
C
chengmo 已提交
215

M
add w2v  
malin10 已提交
216
    def infer_net(self):
C
chengmo 已提交
217 218 219 220
        sparse_feature_dim = envs.get_global_env(
            "hyper_parameters.sparse_feature_dim", None, self._namespace)
        sparse_feature_number = envs.get_global_env(
            "hyper_parameters.sparse_feature_number", None, self._namespace)
M
add w2v  
malin10 已提交
221 222 223 224 225 226 227

        def embedding_layer(input, table_name, initializer_instance=None):
            emb = fluid.embedding(
                input=input,
                size=[sparse_feature_number, sparse_feature_dim],
                param_attr=table_name)
            return emb
C
chengmo 已提交
228

M
add w2v  
malin10 已提交
229
        self.analogy_input()
C
chengmo 已提交
230 231 232 233
        all_label = np.arange(sparse_feature_number).reshape(
            sparse_feature_number).astype('int32')
        self.all_label = fluid.layers.cast(
            x=fluid.layers.assign(all_label), dtype='int64')
M
add w2v  
malin10 已提交
234 235 236 237
        emb_all_label = embedding_layer(self.all_label, "emb")
        emb_a = embedding_layer(self.analogy_a, "emb")
        emb_b = embedding_layer(self.analogy_b, "emb")
        emb_c = embedding_layer(self.analogy_c, "emb")
C
chengmo 已提交
238

M
add w2v  
malin10 已提交
239 240 241 242
        target = fluid.layers.elementwise_add(
            fluid.layers.elementwise_sub(emb_b, emb_a), emb_c)

        emb_all_label_l2 = fluid.layers.l2_normalize(x=emb_all_label, axis=1)
C
chengmo 已提交
243 244
        dist = fluid.layers.matmul(
            x=target, y=emb_all_label_l2, transpose_y=True)
M
add w2v  
malin10 已提交
245
        values, pred_idx = fluid.layers.topk(input=dist, k=4)
T
tangwei 已提交
246 247 248 249
        label = fluid.layers.expand(
            fluid.layers.unsqueeze(
                self.analogy_d, axes=[1]),
            expand_times=[1, 4])
M
add w2v  
malin10 已提交
250 251
        label_ones = fluid.layers.fill_constant_batch_size_like(
            label, shape=[-1, 1], value=1.0, dtype='float32')
T
tangwei 已提交
252 253
        right_cnt = fluid.layers.reduce_sum(input=fluid.layers.cast(
            fluid.layers.equal(pred_idx, label), dtype='float32'))
M
add w2v  
malin10 已提交
254 255
        total_cnt = fluid.layers.reduce_sum(label_ones)

C
chengmo 已提交
256
        global_right_cnt = fluid.layers.create_global_var(
T
tangwei 已提交
257 258 259 260 261
            name="global_right_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
C
chengmo 已提交
262
        global_total_cnt = fluid.layers.create_global_var(
T
tangwei 已提交
263 264 265 266 267
            name="global_total_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
M
add w2v  
malin10 已提交
268 269 270 271 272 273 274
        global_right_cnt.stop_gradient = True
        global_total_cnt.stop_gradient = True

        tmp1 = fluid.layers.elementwise_add(right_cnt, global_right_cnt)
        fluid.layers.assign(tmp1, global_right_cnt)
        tmp2 = fluid.layers.elementwise_add(total_cnt, global_total_cnt)
        fluid.layers.assign(tmp2, global_total_cnt)
C
chengmo 已提交
275 276 277

        acc = fluid.layers.elementwise_div(
            global_right_cnt, global_total_cnt, name="total_acc")
M
add w2v  
malin10 已提交
278
        self._infer_results['acc'] = acc