network_conf.py 7.9 KB
Newer Older
Q
Qiao Longfei 已提交
1
import paddle.fluid as fluid
Q
Qiao Longfei 已提交
2
import math
Q
Qiao Longfei 已提交
3 4 5

dense_feature_dim = 13

T
tangwei12 已提交
6

T
tangwei12 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, sparse_input):
    def dense_fm_layer(input, emb_dict_size, factor_size, fm_param_attr):
        """
        dense_fm_layer
        """
        first_order = fluid.layers.fc(input=input, size=1)
        emb_table = fluid.layers.create_parameter(shape=[emb_dict_size, factor_size],
                                                  dtype='float32', attr=fm_param_attr)

        input_mul_factor = fluid.layers.matmul(input, emb_table)
        input_mul_factor_square = fluid.layers.square(input_mul_factor)
        input_square = fluid.layers.square(input)
        factor_square = fluid.layers.square(emb_table)
        input_square_mul_factor_square = fluid.layers.matmul(input_square, factor_square)

        second_order = 0.5 * (input_mul_factor_square - input_square_mul_factor_square)
        return first_order, second_order

    def sparse_fm_layer(input, emb_dict_size, factor_size, fm_param_attr):
        """
        sparse_fm_layer
        """
        first_embeddings = fluid.layers.embedding(
            input=input, dtype='float32', size=[emb_dict_size, 1], is_sparse=True)
        first_order = fluid.layers.sequence_pool(input=first_embeddings, pool_type='sum')

        nonzero_embeddings = fluid.layers.embedding(
            input=input, dtype='float32', size=[emb_dict_size, factor_size],
            param_attr=fm_param_attr, is_sparse=True)
        summed_features_emb = fluid.layers.sequence_pool(input=nonzero_embeddings, pool_type='sum')
        summed_features_emb_square = fluid.layers.square(summed_features_emb)

        squared_features_emb = fluid.layers.square(nonzero_embeddings)
        squared_sum_features_emb = fluid.layers.sequence_pool(
            input=squared_features_emb, pool_type='sum')

        second_order = 0.5 * (summed_features_emb_square - squared_sum_features_emb)
        return first_order, second_order

    dense_input = fluid.layers.data(name="dense_input", shape=[dense_feature_dim], dtype='float32')
T
tangwei12 已提交
47

Q
Qiao Longfei 已提交
48
    sparse_input_ids = [
T
tangwei12 已提交
49 50
        fluid.layers.data(name="C" + str(i), shape=[1], lod_level=1, dtype='int64')
        for i in range(1, 27)]
Q
Qiao Longfei 已提交
51

T
tangwei12 已提交
52 53 54 55 56 57 58 59 60 61
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    datas = [dense_input] + sparse_input_ids + [label]

    py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
                                                      feed_list=datas,
                                                      name='py_reader',
                                                      use_double_buffer=True)
    words = fluid.layers.read_file(py_reader)

T
tangwei12 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
    sparse_fm_param_attr = fluid.param_attr.ParamAttr(name="SparseFeatFactors",
                                                      initializer=fluid.initializer.Normal(
                                                          scale=1 / math.sqrt(sparse_feature_dim)))
    dense_fm_param_attr = fluid.param_attr.ParamAttr(name="DenseFeatFactors",
                                                     initializer=fluid.initializer.Normal(
                                                         scale=1 / math.sqrt(dense_feature_dim)))

    sparse_fm_first, sparse_fm_second = sparse_fm_layer(
        sparse_input, sparse_feature_dim, factor_size, sparse_fm_param_attr)
    dense_fm_first, dense_fm_second = dense_fm_layer(
        dense_input, dense_feature_dim, factor_size, dense_fm_param_attr)

    def embedding_layer(input):
        """embedding_layer"""
        emb = fluid.layers.embedding(
            input=input, dtype='float32', size=[sparse_feature_dim, factor_size],
            param_attr=sparse_fm_param_attr, is_sparse=True)
        return fluid.layers.sequence_pool(input=emb, pool_type='average')

Q
Qiao Longfei 已提交
81
    sparse_embed_seq = list(map(embedding_layer, sparse_input_ids))
T
tangwei12 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
    concated = fluid.layers.concat(sparse_embed_seq + [dense_input], axis=1)
    fc1 = fluid.layers.fc(input=concated, size=400, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(concated.shape[1]))))
    fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc1.shape[1]))))
    fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc2.shape[1]))))
    predict = fluid.layers.fc(
        input=[sparse_fm_first, sparse_fm_second, dense_fm_first, dense_fm_second, fc3],
        size=2,
        act="softmax",
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(scale=1 / math.sqrt(fc3.shape[1]))))

T
bug fix  
tangwei12 已提交
98
    cost = fluid.layers.cross_entropy(input=predict, label=words[-1])
T
tangwei12 已提交
99
    avg_cost = fluid.layers.reduce_sum(cost)
T
bug fix  
tangwei12 已提交
100
    accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
T
tangwei12 已提交
101
    auc_var, batch_auc_var, auc_states = \
T
bug fix  
tangwei12 已提交
102
        fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20)
T
tangwei12 已提交
103 104 105 106

    return avg_cost, auc_var, batch_auc_var, py_reader


Q
Qiao Longfei 已提交
107
def ctr_dnn_model(embedding_size, sparse_feature_dim, use_py_reader=True):
T
tangwei12 已提交
108

Q
Qiao Longfei 已提交
109 110 111
    def embedding_layer(input):
        return fluid.layers.embedding(
            input=input,
Q
Qiao Longfei 已提交
112
            is_sparse=True,
Q
Qiao Longfei 已提交
113 114 115
            # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190
            # if you want to set is_distributed to True
            is_distributed=False,
Q
Qiao Longfei 已提交
116
            size=[sparse_feature_dim, embedding_size],
T
tangwei12 已提交
117 118 119 120 121 122 123 124 125 126 127 128
            param_attr=fluid.ParamAttr(name="SparseFeatFactors",
                                       initializer=fluid.initializer.Uniform()))

    dense_input = fluid.layers.data(
        name="dense_input", shape=[dense_feature_dim], dtype='float32')

    sparse_input_ids = [
        fluid.layers.data(name="C" + str(i), shape=[1], lod_level=1, dtype='int64')
        for i in range(1, 27)]

    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

Q
Qiao Longfei 已提交
129
    words = [dense_input] + sparse_input_ids + [label]
T
tangwei12 已提交
130

Q
Qiao Longfei 已提交
131 132 133 134 135 136 137
    py_reader = None
    if use_py_reader:
        py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
                                                          feed_list=words,
                                                          name='py_reader',
                                                          use_double_buffer=True)
        words = fluid.layers.read_file(py_reader)
Q
Qiao Longfei 已提交
138

Q
Qiao Longfei 已提交
139
    sparse_embed_seq = list(map(embedding_layer, words[1:-1]))
T
tangwei12 已提交
140
    concated = fluid.layers.concat(sparse_embed_seq + words[0:1], axis=1)
Q
Qiao Longfei 已提交
141

Q
Qiao Longfei 已提交
142
    fc1 = fluid.layers.fc(input=concated, size=400, act='relu',
T
tangwei12 已提交
143 144
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(concated.shape[1]))))
Q
Qiao Longfei 已提交
145
    fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
T
tangwei12 已提交
146 147
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc1.shape[1]))))
Q
Qiao Longfei 已提交
148
    fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
T
tangwei12 已提交
149 150
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc2.shape[1]))))
Q
Qiao Longfei 已提交
151
    predict = fluid.layers.fc(input=fc3, size=2, act='softmax',
T
tangwei12 已提交
152 153
                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                                  scale=1 / math.sqrt(fc3.shape[1]))))
Q
Qiao Longfei 已提交
154

T
bug fix  
tangwei12 已提交
155
    cost = fluid.layers.cross_entropy(input=predict, label=words[-1])
Q
Qiao Longfei 已提交
156
    avg_cost = fluid.layers.reduce_sum(cost)
T
bug fix  
tangwei12 已提交
157
    accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
T
tangwei12 已提交
158
    auc_var, batch_auc_var, auc_states = \
T
bug fix  
tangwei12 已提交
159
        fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20)
Q
Qiao Longfei 已提交
160

Q
Qiao Longfei 已提交
161
    return avg_cost, auc_var, batch_auc_var, py_reader, words