ps_dnn_model.py 12.9 KB
Newer Older
Z
ziyoujiyi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
import math

Z
ziyoujiyi 已提交
17
import paddle
18
from paddle import nn
Z
ziyoujiyi 已提交
19 20 21


class DNNLayer(nn.Layer):
22 23 24 25 26 27 28 29 30
    def __init__(
        self,
        sparse_feature_number,
        sparse_feature_dim,
        dense_feature_dim,
        num_field,
        layer_sizes,
        sync_mode=None,
    ):
31
        super().__init__()
Z
ziyoujiyi 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44
        self.sync_mode = sync_mode
        self.sparse_feature_number = sparse_feature_number
        self.sparse_feature_dim = sparse_feature_dim
        self.dense_feature_dim = dense_feature_dim
        self.num_field = num_field
        self.layer_sizes = layer_sizes

        self.embedding = paddle.nn.Embedding(
            self.sparse_feature_number,
            self.sparse_feature_dim,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                name="SparseFeatFactors",
45 46 47 48 49 50 51 52 53
                initializer=paddle.nn.initializer.Uniform(),
            ),
        )

        sizes = (
            [sparse_feature_dim * num_field + dense_feature_dim]
            + self.layer_sizes
            + [2]
        )
Z
ziyoujiyi 已提交
54 55 56 57 58 59 60 61
        acts = ["relu" for _ in range(len(self.layer_sizes))] + [None]
        self._mlp_layers = []
        for i in range(len(layer_sizes) + 1):
            linear = paddle.nn.Linear(
                in_features=sizes[i],
                out_features=sizes[i + 1],
                weight_attr=paddle.ParamAttr(
                    initializer=paddle.nn.initializer.Normal(
62 63 64 65
                        std=1.0 / math.sqrt(sizes[i])
                    )
                ),
            )
Z
ziyoujiyi 已提交
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
            self.add_sublayer('linear_%d' % i, linear)
            self._mlp_layers.append(linear)
            if acts[i] == 'relu':
                act = paddle.nn.ReLU()
                self.add_sublayer('act_%d' % i, act)
                self._mlp_layers.append(act)

    def forward(self, sparse_inputs, dense_inputs):

        sparse_embs = []
        for s_input in sparse_inputs:
            if self.sync_mode == "gpubox":
                emb = paddle.fluid.contrib.sparse_embedding(
                    input=s_input,
                    size=[self.sparse_feature_number, self.sparse_feature_dim],
81 82
                    param_attr=paddle.ParamAttr(name="embedding"),
                )
Z
ziyoujiyi 已提交
83 84 85
            else:
                emb = self.embedding(s_input)
            emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim])
Z
ziyoujiyi 已提交
86
            # emb.stop_gradient = True
Z
ziyoujiyi 已提交
87 88 89 90
            sparse_embs.append(emb)

        y_dnn = paddle.concat(x=sparse_embs + [dense_inputs], axis=1)

91 92 93 94 95 96 97
        if self.sync_mode == 'heter':
            with paddle.fluid.device_guard('gpu'):
                for n_layer in self._mlp_layers:
                    y_dnn = n_layer(y_dnn)
        else:
            for n_layer in self._mlp_layers:
                y_dnn = n_layer(y_dnn)
Z
ziyoujiyi 已提交
98 99 100 101

        return y_dnn


102
class FlDNNLayer(nn.Layer):
103 104 105 106 107 108 109 110
    def __init__(
        self,
        sparse_feature_number,
        sparse_feature_dim,
        dense_feature_dim,
        sparse_number,
        sync_mode=None,
    ):
111
        super().__init__()
112 113 114 115 116 117 118 119 120 121 122 123

        self.PART_A_DEVICE_FlAG = 'gpu:0'
        self.PART_A_JOINT_OP_DEVICE_FlAG = 'gpu:2'
        self.PART_B_DEVICE_FlAG = 'gpu:1'
        self.PART_B_JOINT_OP_DEVICE_FlAG = 'gpu:3'

        self.sync_mode = sync_mode
        self.sparse_feature_number = sparse_feature_number
        self.sparse_feature_dim = sparse_feature_dim
        self.slot_num = sparse_number
        self.dense_feature_dim = dense_feature_dim

124 125 126 127 128
        layer_sizes_a = [
            self.slot_num * self.sparse_feature_dim,
            5,
            7,
        ]  # for test
129 130 131 132 133 134 135 136 137
        layer_sizes_b = [self.dense_feature_dim, 6, 7]
        layer_sizes_top = [7, 2]

        self.embedding = paddle.nn.Embedding(
            self.sparse_feature_number,
            self.sparse_feature_dim,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                name="SparseFeatFactors",
138 139 140
                initializer=paddle.nn.initializer.Uniform(),
            ),
        )
141 142 143 144 145 146 147 148 149 150

        # part_a fc
        acts = ["relu" for _ in range(len(layer_sizes_a))]
        self._mlp_layers_a = []
        for i in range(len(layer_sizes_a) - 1):
            linear = paddle.nn.Linear(
                in_features=layer_sizes_a[i],
                out_features=layer_sizes_a[i + 1],
                weight_attr=paddle.ParamAttr(
                    initializer=paddle.nn.initializer.Normal(
151 152 153 154
                        std=1.0 / math.sqrt(layer_sizes_a[i])
                    )
                ),
            )
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
            self.add_sublayer('linear_%d' % i, linear)
            self._mlp_layers_a.append(linear)
            act = paddle.nn.ReLU()
            self.add_sublayer('act_%d' % i, act)
            self._mlp_layers_a.append(act)

        # part_b fc
        acts = ["relu" for _ in range(len(layer_sizes_b))]
        self._mlp_layers_b = []
        for i in range(len(layer_sizes_b) - 1):
            linear = paddle.nn.Linear(
                in_features=layer_sizes_b[i],
                out_features=layer_sizes_b[i + 1],
                weight_attr=paddle.ParamAttr(
                    initializer=paddle.nn.initializer.Normal(
170 171 172 173
                        std=1.0 / math.sqrt(layer_sizes_b[i])
                    )
                ),
            )
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
            self.add_sublayer('linear_%d' % i, linear)
            self._mlp_layers_b.append(linear)
            act = paddle.nn.ReLU()
            self.add_sublayer('act_%d' % i, act)
            self._mlp_layers_b.append(act)

        # top fc
        acts = ["relu" for _ in range(len(layer_sizes_top))]
        self._mlp_layers_top = []
        for i in range(len(layer_sizes_top) - 1):
            linear = paddle.nn.Linear(
                in_features=layer_sizes_top[i],
                out_features=layer_sizes_top[i + 1],
                weight_attr=paddle.ParamAttr(
                    initializer=paddle.nn.initializer.Normal(
189 190 191 192
                        std=1.0 / math.sqrt(layer_sizes_top[i])
                    )
                ),
            )
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
            self.add_sublayer('linear_%d' % i, linear)
            self._mlp_layers_top.append(linear)
            act = paddle.nn.ReLU()
            self.add_sublayer('act_%d' % i, act)
            self._mlp_layers_top.append(act)

    def bottom_a_layer(self, sparse_inputs):
        with paddle.fluid.device_guard(self.PART_A_DEVICE_FlAG):
            sparse_embs = []
            for s_input in sparse_inputs:
                emb = self.embedding(s_input)
                emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim])
                sparse_embs.append(emb)

            y = paddle.concat(x=sparse_embs, axis=1)
            y = self._mlp_layers_a[0](y)
            y = self._mlp_layers_a[1](y)

            y = self._mlp_layers_a[2](y)
        with paddle.fluid.device_guard(
213 214
            self.PART_A_JOINT_OP_DEVICE_FlAG
        ):  # joint point
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
            bottom_a = self._mlp_layers_a[3](y)

        return bottom_a

    def bottom_b_layer(self, dense_inputs):
        with paddle.fluid.device_guard(self.PART_B_DEVICE_FlAG):
            y = self._mlp_layers_b[0](dense_inputs)
            y = self._mlp_layers_b[1](y)

            y = self._mlp_layers_b[2](y)
            bottom_b = self._mlp_layers_b[3](y)

        return bottom_b

    def interactive_layer(self, bottom_a, bottom_b):
        with paddle.fluid.device_guard(
231 232
            self.PART_B_JOINT_OP_DEVICE_FlAG
        ):  # joint point
233
            interactive = paddle.add(bottom_a, bottom_b)
234 235 236 237 238 239 240
        return interactive

    def top_layer(self, interactive, label_input):
        with paddle.fluid.device_guard(self.PART_B_DEVICE_FlAG):
            y = self._mlp_layers_top[0](interactive)
            y_top = self._mlp_layers_top[1](y)
            predict_2d = paddle.nn.functional.softmax(y_top)
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
            (
                auc,
                batch_auc,
                [
                    self.batch_stat_pos,
                    self.batch_stat_neg,
                    self.stat_pos,
                    self.stat_neg,
                ],
            ) = paddle.static.auc(
                input=predict_2d,
                label=label_input,
                num_thresholds=2**12,
                slide_steps=20,
            )

            cost = paddle.nn.functional.cross_entropy(
                input=y_top, label=label_input
            )
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
            avg_cost = paddle.mean(x=cost)

        return auc, avg_cost

    def forward(self, sparse_inputs, dense_inputs, label_input):
        bottom_a = self.bottom_a_layer(sparse_inputs)

        bottom_b = self.bottom_b_layer(dense_inputs)

        interactive = self.interactive_layer(bottom_a, bottom_b)

        auc, avg_cost = self.top_layer(interactive, label_input)

        return auc, avg_cost


276
class StaticModel:
Z
ziyoujiyi 已提交
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
    def __init__(self, config):
        self.cost = None
        self.infer_target_var = None
        self.config = config
        self._init_hyper_parameters()
        self.sync_mode = config.get("runner.sync_mode")

    def _init_hyper_parameters(self):
        self.is_distributed = False
        self.distributed_embedding = False

        if self.config.get("hyper_parameters.distributed_embedding", 0) == 1:
            self.distributed_embedding = True

        self.sparse_feature_number = self.config.get(
292 293
            "hyper_parameters.sparse_feature_number"
        )
Z
ziyoujiyi 已提交
294
        self.sparse_feature_dim = self.config.get(
295 296
            "hyper_parameters.sparse_feature_dim"
        )
Z
ziyoujiyi 已提交
297
        self.sparse_inputs_slots = self.config.get(
298 299
            "hyper_parameters.sparse_inputs_slots"
        )
Z
ziyoujiyi 已提交
300
        self.dense_input_dim = self.config.get(
301 302
            "hyper_parameters.dense_input_dim"
        )
Z
ziyoujiyi 已提交
303
        self.learning_rate = self.config.get(
304 305
            "hyper_parameters.optimizer.learning_rate"
        )
Z
ziyoujiyi 已提交
306 307 308
        self.fc_sizes = self.config.get("hyper_parameters.fc_sizes")

    def create_feeds(self, is_infer=False):
309 310 311 312 313
        dense_input = paddle.static.data(
            name="dense_input",
            shape=[None, self.dense_input_dim],
            dtype="float32",
        )
Z
ziyoujiyi 已提交
314 315

        sparse_input_ids = [
316
            paddle.static.data(name=str(i), shape=[None, 1], dtype="int64")
Z
ziyoujiyi 已提交
317 318 319 320 321 322 323 324 325 326
            for i in range(1, self.sparse_inputs_slots)
        ]

        label = paddle.static.data(name="label", shape=[None, 1], dtype="int64")

        feeds_list = [label] + sparse_input_ids + [dense_input]
        return feeds_list

    def net(self, input, is_infer=False):
        self.label_input = input[0]
327
        self.sparse_inputs = input[1 : self.sparse_inputs_slots]
Z
ziyoujiyi 已提交
328 329 330
        self.dense_input = input[-1]
        sparse_number = self.sparse_inputs_slots - 1

331 332 333 334 335 336 337 338
        dnn_model = DNNLayer(
            self.sparse_feature_number,
            self.sparse_feature_dim,
            self.dense_input_dim,
            sparse_number,
            self.fc_sizes,
            sync_mode=self.sync_mode,
        )
Z
ziyoujiyi 已提交
339 340 341
        raw_predict_2d = dnn_model.forward(self.sparse_inputs, self.dense_input)
        predict_2d = paddle.nn.functional.softmax(raw_predict_2d)
        self.predict = predict_2d
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
        (
            auc,
            batch_auc,
            [
                self.batch_stat_pos,
                self.batch_stat_neg,
                self.stat_pos,
                self.stat_neg,
            ],
        ) = paddle.static.auc(
            input=self.predict,
            label=self.label_input,
            num_thresholds=2**12,
            slide_steps=20,
        )
Z
ziyoujiyi 已提交
357 358 359 360 361
        self.inference_target_var = auc
        if is_infer:
            fetch_dict = {'auc': auc}
            return fetch_dict

362 363 364
        cost = paddle.nn.functional.cross_entropy(
            input=raw_predict_2d, label=self.label_input
        )
Z
ziyoujiyi 已提交
365 366 367 368 369
        avg_cost = paddle.mean(x=cost)
        self._cost = avg_cost

        fetch_dict = {'cost': avg_cost, 'auc': auc}
        return fetch_dict
370 371 372

    def fl_net(self, input, is_infer=False):
        self.label_input = input[0]
373
        self.sparse_inputs = input[1 : self.sparse_inputs_slots]
374 375 376
        self.dense_input = input[-1]
        self.sparse_number = self.sparse_inputs_slots - 1

377 378 379 380 381 382 383 384 385 386 387
        fl_dnn_model = FlDNNLayer(
            self.sparse_feature_number,
            self.sparse_feature_dim,
            self.dense_input_dim,
            self.sparse_number,
            sync_mode=self.sync_mode,
        )

        auc, avg_cost = fl_dnn_model.forward(
            self.sparse_inputs, self.dense_input, self.label_input
        )
388 389 390
        fetch_dict = {'cost': avg_cost, 'auc': auc}
        self._cost = avg_cost
        return fetch_dict