model.py 3.8 KB
Newer Older
T
tangwei 已提交
1 2
import math
import paddle.fluid as fluid
T
tangwei 已提交
3

T
tangwei 已提交
4
from ...utils import envs
T
tangwei 已提交
5 6


T
tangwei 已提交
7
class Train(object):
T
tangwei 已提交
8

T
tangwei 已提交
9 10 11 12
    def __init__(self):
        self.sparse_inputs = []
        self.dense_input = None
        self.label_input = None
T
tangwei 已提交
13

T
tangwei 已提交
14 15 16
        self.sparse_input_varnames = []
        self.dense_input_varname = None
        self.label_input_varname = None
T
tangwei 已提交
17 18

    def input(self):
T
tangwei 已提交
19 20
        def sparse_inputs():
            ids = envs.get_global_env("sparse_inputs_counts")
T
tangwei 已提交
21

T
tangwei 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
            sparse_input_ids = [
                fluid.layers.data(name="C" + str(i),
                                  shape=[1],
                                  lod_level=1,
                                  dtype="int64") for i in range(ids)
            ]
            return sparse_input_ids, [var.name for var in sparse_input_ids]

        def dense_input():
            dense_input_dim = envs.get_global_env("dense_input_dim")

            dense_input_var = fluid.layers.data(name="dense_input",
                                                shape=dense_input_dim,
                                                dtype="float32")
            return dense_input_var, dense_input_var.name

        def label_input():
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
            return label, label.name

        self.sparse_inputs, self.sparse_input_varnames = sparse_inputs()
        self.dense_input, self.dense_input_varname = dense_input()
        self.label_input, self.label_input_varname = label_input()
T
tangwei 已提交
45 46

    def net(self):
T
tangwei 已提交
47 48 49
        def embedding_layer(input):
            sparse_feature_number = envs.get_global_env("sparse_feature_number")
            sparse_feature_dim = envs.get_global_env("sparse_feature_dim")
T
tangwei 已提交
50

T
tangwei 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
            emb = fluid.layers.embedding(
                input=input,
                is_sparse=True,
                size=[{sparse_feature_number}, {sparse_feature_dim}],
                param_attr=fluid.ParamAttr(
                    name="SparseFeatFactors",
                    initializer=fluid.initializer.Uniform()),
            )
            emb_sum = fluid.layers.sequence_pool(
                input=emb, pool_type='sum')
            return emb_sum

        def fc(input, output_size):
            output = fluid.layers.fc(
                input=input, size=output_size,
                act='relu', param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Normal(
                        scale=1.0 / math.sqrt(input.shape[1]))))
            return output

        sparse_embed_seq = list(map(embedding_layer, self.sparse_inputs))
        concated = fluid.layers.concat(sparse_embed_seq + [self.dense_input], axis=1)

        fcs = [concated]
        hidden_layers = envs.get_global_env("fc_sizes")

        for size in hidden_layers:
            fcs.append(fc(fcs[-1], size))

        predict = fluid.layers.fc(
            input=fcs[-1],
            size=2,
            act="softmax",
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                scale=1 / math.sqrt(fcs[-1].shape[1]))),
        )

        self.predict = predict

    def loss(self, predict):
        cost = fluid.layers.cross_entropy(input=predict, label=self.label_input)
        avg_cost = fluid.layers.reduce_sum(cost)
        self.loss = avg_cost

    def metric(self):
        auc, batch_auc, _ = fluid.layers.auc(input=self.predict,
                                             label=self.label_input,
                                             num_thresholds=2 ** 12,
                                             slide_steps=20)
T
tangwei 已提交
100 101

    def optimizer(self):
T
tangwei 已提交
102 103 104 105 106 107 108 109 110 111
        learning_rate = envs.get_global_env("learning_rate")
        optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True)
        return optimizer


class Evaluate(object):
    def input(self):
        pass

    def net(self):
T
tangwei 已提交
112
        pass