deepmf_conf.py 4.9 KB
Newer Older
W
wangsijiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
import paddle.fluid as fluid
import math

dense_feature_dim = 13

user_dense_feature_dim = 13
item_dense_feature_dim = 13

## text cnn conf
WORD_SIZE = 100000
EMBED_SIZE = 64
CNN_DIM = 128
CNN_FILTER_SIZE = 5


def text_cnn(word):
    """
    """
    embed = fluid.layers.embedding(
        input=word,
        size=[WORD_SIZE, EMBED_SIZE],
        dtype='float32',
        param_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.Normal(scale=1/math.sqrt(WORD_SIZE))),
        is_sparse=IS_SPARSE,
        is_distributed=False)
    cnn = fluid.nets.sequence_conv_pool(
         input = embed,
         num_filters = CNN_DIM,
         filter_size = CNN_FILTER_SIZE,
         param_attr=fluid.ParamAttr(
                         initializer=fluid.initializer.Normal(scale=1/math.sqrt(CNN_FILTER_SIZE * embed.shape[1]))),
         act='tanh',
         pool_type = "max")
    return cnn


def deepmf_ctr_model(embedding_size, sparse_feature_dim):

    def embedding_layer(input):
        return fluid.layers.embedding(
            input=input,
            is_sparse=True,
            # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190
            # if you want to set is_distributed to True
            is_distributed=False,
            size=[sparse_feature_dim, embedding_size],
            param_attr=fluid.ParamAttr(name="SparseFeatFactors",
                                       initializer=fluid.initializer.Uniform()))

    user_dense_input = fluid.layers.data(
        name="dense_input", shape=[user_dense_feature_dim], dtype='float32')

    user_sparse_input_ids = [
        fluid.layers.data(name="USER" + str(i), shape=[1], lod_level=1, dtype='int64')
        for i in range(1, user_sparse_slot_num)]

    item_dense_input = fluid.layers.data(
        name="dense_input", shape=[item_dense_feature_dim], dtype='float32')

    item_sparse_input_ids = [
        fluid.layers.data(name="ITEM" + str(i), shape=[1], lod_level=1, dtype='int64')
        for i in range(1, item_sparse_slot_num)]



    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    datas = [user_dense_input] + [item_dense_input] + user_sparse_input_ids  + item_sparse_input_ids + [label]

    py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
                                                      feed_list=datas,
                                                      name='py_reader',
                                                      use_double_buffer=True)
    words = fluid.layers.read_file(py_reader)

    user_sparse_embed_seq = list(map(embedding_layer, words[2: user_sparse_slot_num + 2]))
    item_sparse_embed_seq = list(map(embedding_layer, words[user_sparse_slot_num + 2: user_sparse_slot_num + item_sparse_slot_num + 2]))
    
    
    user_concated = fluid.layers.concat(user_sparse_embed_seq + words[0:1], axis=1)
    item_concated = fluid.layers.concat(item_sparse_embed_seq + words[1:2], axis=1)

    user_fc1 = fluid.layers.fc(input=user_concated, size=400, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(concated.shape[1]))))
    user_fc2 = fluid.layers.fc(input=fc1, size=128, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc1.shape[1]))))
    user_fc3 = fluid.layers.fc(input=fc2, size=64, act='tanh',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc2.shape[1]))))

    item_fc1 = fluid.layers.fc(input=user_concated, size=400, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(concated.shape[1]))))
    item_fc2 = fluid.layers.fc(input=fc1, size=128, act='relu',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc1.shape[1]))))
    item_fc3 = fluid.layers.fc(input=fc2, size=64, act='tanh',
                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                              scale=1 / math.sqrt(fc2.shape[1]))))

    sim = fluid.layers.cos_sim(X = user_fc3, Y = item_fc3)

    predict = fluid.layers.fc(input=sim, size=2, act='softmax',
                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
                                  scale=1 / math.sqrt(fc3.shape[1]))))

    cost = fluid.layers.cross_entropy(input=predict, label=words[-1])
    avg_cost = fluid.layers.reduce_sum(cost)
    accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
    auc_var, batch_auc_var, auc_states = \
        fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20)

    return avg_cost, auc_var, batch_auc_var, py_reader