model.py 9.9 KB
Newer Older
M
add w2v  
malin10 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import paddle.fluid as fluid

18
from paddlerec.core.utils import envs
C
Chengmo 已提交
19
from paddlerec.core.model import ModelBase
M
add w2v  
malin10 已提交
20 21 22 23 24 25


class Model(ModelBase):
    def __init__(self, config):
        ModelBase.__init__(self, config)

M
malin10 已提交
26
    def _init_hyper_parameters(self):
C
Chengmo 已提交
27 28
        self.is_distributed = True if envs.get_fleet_mode().upper(
        ) == "PSLIB" else False
M
malin10 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
        self.sparse_feature_number = envs.get_global_env(
            "hyper_parameters.sparse_feature_number")
        self.sparse_feature_dim = envs.get_global_env(
            "hyper_parameters.sparse_feature_dim")
        self.neg_num = envs.get_global_env("hyper_parameters.neg_num")
        self.with_shuffle_batch = envs.get_global_env(
            "hyper_parameters.with_shuffle_batch")
        self.learning_rate = envs.get_global_env(
            "hyper_parameters.optimizer.learning_rate")
        self.decay_steps = envs.get_global_env(
            "hyper_parameters.optimizer.decay_steps")
        self.decay_rate = envs.get_global_env(
            "hyper_parameters.optimizer.decay_rate")

    def input_data(self, is_infer=False, **kwargs):
        if is_infer:
            analogy_a = fluid.data(
                name="analogy_a", shape=[None], dtype='int64')
            analogy_b = fluid.data(
                name="analogy_b", shape=[None], dtype='int64')
            analogy_c = fluid.data(
                name="analogy_c", shape=[None], dtype='int64')
            analogy_d = fluid.data(
                name="analogy_d", shape=[None], dtype='int64')
            return [analogy_a, analogy_b, analogy_c, analogy_d]

        input_word = fluid.data(
T
tangwei 已提交
56
            name="input_word", shape=[None, 1], dtype='int64')
M
malin10 已提交
57
        true_word = fluid.data(
T
tangwei 已提交
58
            name='true_label', shape=[None, 1], dtype='int64')
M
malin10 已提交
59 60
        if self.with_shuffle_batch:
            return [input_word, true_word]
M
add w2v  
malin10 已提交
61

M
malin10 已提交
62 63 64
        neg_word = fluid.data(
            name="neg_label", shape=[None, self.neg_num], dtype='int64')
        return [input_word, true_word, neg_word]
M
add w2v  
malin10 已提交
65

M
malin10 已提交
66 67 68 69
    def net(self, inputs, is_infer=False):
        if is_infer:
            self.infer_net(inputs)
            return
M
add w2v  
malin10 已提交
70

T
tangwei 已提交
71 72
        def embedding_layer(input,
                            table_name,
M
malin10 已提交
73
                            emb_dim,
T
tangwei 已提交
74 75
                            initializer_instance=None,
                            squeeze=False):
M
add w2v  
malin10 已提交
76 77 78
            emb = fluid.embedding(
                input=input,
                is_sparse=True,
M
malin10 已提交
79
                is_distributed=self.is_distributed,
M
malin10 已提交
80
                size=[self.sparse_feature_number, emb_dim],
M
add w2v  
malin10 已提交
81
                param_attr=fluid.ParamAttr(
T
tangwei 已提交
82
                    name=table_name, initializer=initializer_instance), )
M
add w2v  
malin10 已提交
83 84 85 86 87
            if squeeze:
                return fluid.layers.squeeze(input=emb, axes=[1])
            else:
                return emb

M
malin10 已提交
88
        init_width = 0.5 / self.sparse_feature_dim
M
add w2v  
malin10 已提交
89 90 91
        emb_initializer = fluid.initializer.Uniform(-init_width, init_width)
        emb_w_initializer = fluid.initializer.Constant(value=0.0)

M
malin10 已提交
92 93 94 95 96 97
        input_emb = embedding_layer(inputs[0], "emb", self.sparse_feature_dim,
                                    emb_initializer, True)
        true_emb_w = embedding_layer(inputs[1], "emb_w",
                                     self.sparse_feature_dim,
                                     emb_w_initializer, True)
        true_emb_b = embedding_layer(inputs[1], "emb_b", 1, emb_w_initializer,
T
tangwei 已提交
98
                                     True)
C
chengmo 已提交
99

M
malin10 已提交
100
        if self.with_shuffle_batch:
M
add w2v  
malin10 已提交
101
            neg_emb_w_list = []
M
malin10 已提交
102
            for i in range(self.neg_num):
T
tangwei 已提交
103 104 105
                neg_emb_w_list.append(
                    fluid.contrib.layers.shuffle_batch(
                        true_emb_w))  # shuffle true_word
C
chengmo 已提交
106 107
            neg_emb_w_concat = fluid.layers.concat(neg_emb_w_list, axis=0)
            neg_emb_w = fluid.layers.reshape(
M
malin10 已提交
108 109
                neg_emb_w_concat,
                shape=[-1, self.neg_num, self.sparse_feature_dim])
M
malin10 已提交
110 111 112 113 114 115 116 117 118

            neg_emb_b_list = []
            for i in range(self.neg_num):
                neg_emb_b_list.append(
                    fluid.contrib.layers.shuffle_batch(
                        true_emb_b))  # shuffle true_word
            neg_emb_b = fluid.layers.concat(neg_emb_b_list, axis=0)
            neg_emb_b_vec = fluid.layers.reshape(
                neg_emb_b, shape=[-1, self.neg_num])
M
add w2v  
malin10 已提交
119
        else:
M
malin10 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132
            neg_emb_w = embedding_layer(
                inputs[2], "emb_w", self.sparse_feature_dim, emb_w_initializer)
            neg_emb_b = embedding_layer(inputs[2], "emb_b", 1,
                                        emb_w_initializer)
            neg_emb_b_vec = fluid.layers.reshape(
                neg_emb_b, shape=[-1, self.neg_num])

        true_logits = fluid.layers.elementwise_add(
            fluid.layers.reduce_sum(
                fluid.layers.elementwise_mul(input_emb, true_emb_w),
                dim=1,
                keep_dim=True),
            true_emb_b)
M
add w2v  
malin10 已提交
133 134

        input_emb_re = fluid.layers.reshape(
M
malin10 已提交
135
            input_emb, shape=[-1, 1, self.sparse_feature_dim])
C
chengmo 已提交
136 137
        neg_matmul = fluid.layers.matmul(
            input_emb_re, neg_emb_w, transpose_y=True)
M
malin10 已提交
138 139 140
        neg_matmul_re = fluid.layers.reshape(
            neg_matmul, shape=[-1, self.neg_num])
        neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
C
Chengmo 已提交
141
        # nce loss
M
add w2v  
malin10 已提交
142

M
malin10 已提交
143 144 145 146 147
        label_ones = fluid.layers.fill_constant(
            shape=[fluid.layers.shape(true_logits)[0], 1],
            value=1.0,
            dtype='float32')
        label_zeros = fluid.layers.fill_constant(
M
malin10 已提交
148
            shape=[fluid.layers.shape(true_logits)[0], self.neg_num],
M
malin10 已提交
149 150
            value=0.0,
            dtype='float32')
M
add w2v  
malin10 已提交
151

M
malin10 已提交
152 153 154 155 156 157 158 159 160 161
        true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits,
                                                                   label_ones)
        neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(neg_logits,
                                                                  label_zeros)
        cost = fluid.layers.elementwise_add(
            fluid.layers.reduce_sum(
                true_xent, dim=1),
            fluid.layers.reduce_sum(
                neg_xent, dim=1))
        avg_cost = fluid.layers.reduce_mean(cost)
M
add w2v  
malin10 已提交
162

M
malin10 已提交
163
        self._cost = avg_cost
M
malin10 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177
        global_right_cnt = fluid.layers.create_global_var(
            name="global_right_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
        global_total_cnt = fluid.layers.create_global_var(
            name="global_total_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
        global_right_cnt.stop_gradient = True
        global_total_cnt.stop_gradient = True
M
malin10 已提交
178
        self._metrics["LOSS"] = avg_cost
M
add w2v  
malin10 已提交
179 180 181 182

    def optimizer(self):
        optimizer = fluid.optimizer.SGD(
            learning_rate=fluid.layers.exponential_decay(
M
malin10 已提交
183 184 185
                learning_rate=self.learning_rate,
                decay_steps=self.decay_steps,
                decay_rate=self.decay_rate,
M
add w2v  
malin10 已提交
186 187 188
                staircase=True))
        return optimizer

M
malin10 已提交
189
    def infer_net(self, inputs):
M
add w2v  
malin10 已提交
190 191 192
        def embedding_layer(input, table_name, initializer_instance=None):
            emb = fluid.embedding(
                input=input,
M
malin10 已提交
193
                size=[self.sparse_feature_number, self.sparse_feature_dim],
M
add w2v  
malin10 已提交
194 195
                param_attr=table_name)
            return emb
C
chengmo 已提交
196

M
malin10 已提交
197 198
        all_label = np.arange(self.sparse_feature_number).reshape(
            self.sparse_feature_number).astype('int32')
C
chengmo 已提交
199 200
        self.all_label = fluid.layers.cast(
            x=fluid.layers.assign(all_label), dtype='int64')
M
add w2v  
malin10 已提交
201
        emb_all_label = embedding_layer(self.all_label, "emb")
M
malin10 已提交
202 203 204
        emb_a = embedding_layer(inputs[0], "emb")
        emb_b = embedding_layer(inputs[1], "emb")
        emb_c = embedding_layer(inputs[2], "emb")
C
chengmo 已提交
205

M
add w2v  
malin10 已提交
206 207 208 209
        target = fluid.layers.elementwise_add(
            fluid.layers.elementwise_sub(emb_b, emb_a), emb_c)

        emb_all_label_l2 = fluid.layers.l2_normalize(x=emb_all_label, axis=1)
C
chengmo 已提交
210 211
        dist = fluid.layers.matmul(
            x=target, y=emb_all_label_l2, transpose_y=True)
M
bug fix  
malin10 已提交
212
        values, pred_idx = fluid.layers.topk(input=dist, k=1)
T
tangwei 已提交
213 214
        label = fluid.layers.expand(
            fluid.layers.unsqueeze(
M
malin10 已提交
215
                inputs[3], axes=[1]), expand_times=[1, 1])
M
add w2v  
malin10 已提交
216 217
        label_ones = fluid.layers.fill_constant_batch_size_like(
            label, shape=[-1, 1], value=1.0, dtype='float32')
T
tangwei 已提交
218 219
        right_cnt = fluid.layers.reduce_sum(input=fluid.layers.cast(
            fluid.layers.equal(pred_idx, label), dtype='float32'))
M
add w2v  
malin10 已提交
220 221
        total_cnt = fluid.layers.reduce_sum(label_ones)

M
malin10 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        global_right_cnt = fluid.layers.create_global_var(
            name="global_right_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
        global_total_cnt = fluid.layers.create_global_var(
            name="global_total_cnt",
            persistable=True,
            dtype='float32',
            shape=[1],
            value=0)
        global_right_cnt.stop_gradient = True
        global_total_cnt.stop_gradient = True

        tmp1 = fluid.layers.elementwise_add(right_cnt, global_right_cnt)
        fluid.layers.assign(tmp1, global_right_cnt)
        tmp2 = fluid.layers.elementwise_add(total_cnt, global_total_cnt)
        fluid.layers.assign(tmp2, global_total_cnt)

        acc = fluid.layers.elementwise_div(
            global_right_cnt, global_total_cnt, name="total_acc")
M
add w2v  
malin10 已提交
244
        self._infer_results['acc'] = acc