add deepmf nn

305de2d0 · wangsijiang · 6494c6fc · 305de2d0 · 305de2d0
显示空白变更内容
内联并排

Showing with 172 addition and 0 deletion

fluid/PaddleRec/ctr/deepmf_conf.py fluid/PaddleRec/ctr/deepmf_conf.py +117 -0

fluid/PaddleRec/ctr/network_conf.py fluid/PaddleRec/ctr/network_conf.py +55 -0

未找到文件。
--- a/fluid/PaddleRec/ctr/deepmf_conf.py
+++ b/fluid/PaddleRec/ctr/deepmf_conf.py
+import paddle.fluid as fluid
+import math
+
+dense_feature_dim = 13
+
+user_dense_feature_dim = 13
+item_dense_feature_dim = 13
+
+## text cnn conf
+WORD_SIZE = 100000
+EMBED_SIZE = 64
+CNN_DIM = 128
+CNN_FILTER_SIZE = 5
+
+
+def text_cnn(word):
+    """
+    """
+    embed = fluid.layers.embedding(
+        input=word,
+        size=[WORD_SIZE, EMBED_SIZE],
+        dtype='float32',
+        param_attr=fluid.ParamAttr(
+                        initializer=fluid.initializer.Normal(scale=1/math.sqrt(WORD_SIZE))),
+        is_sparse=IS_SPARSE,
+        is_distributed=False)
+    cnn = fluid.nets.sequence_conv_pool(
+         input = embed,
+         num_filters = CNN_DIM,
+         filter_size = CNN_FILTER_SIZE,
+         param_attr=fluid.ParamAttr(
+                         initializer=fluid.initializer.Normal(scale=1/math.sqrt(CNN_FILTER_SIZE * embed.shape[1]))),
+         act='tanh',
+         pool_type = "max")
+    return cnn
+
+
+def deepmf_ctr_model(embedding_size, sparse_feature_dim):
+
+    def embedding_layer(input):
+        return fluid.layers.embedding(
+            input=input,
+            is_sparse=True,
+            # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190
+            # if you want to set is_distributed to True
+            is_distributed=False,
+            size=[sparse_feature_dim, embedding_size],
+            param_attr=fluid.ParamAttr(name="SparseFeatFactors",
+                                       initializer=fluid.initializer.Uniform()))
+
+    user_dense_input = fluid.layers.data(
+        name="dense_input", shape=[user_dense_feature_dim], dtype='float32')
+
+    user_sparse_input_ids = [
+        fluid.layers.data(name="USER" + str(i), shape=[1], lod_level=1, dtype='int64')
+        for i in range(1, user_sparse_slot_num)]
+
+    item_dense_input = fluid.layers.data(
+        name="dense_input", shape=[item_dense_feature_dim], dtype='float32')
+
+    item_sparse_input_ids = [
+        fluid.layers.data(name="ITEM" + str(i), shape=[1], lod_level=1, dtype='int64')
+        for i in range(1, item_sparse_slot_num)]
+
+
+
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+
+    datas = [user_dense_input] + [item_dense_input] + user_sparse_input_ids  + item_sparse_input_ids + [label]
+
+    py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
+                                                      feed_list=datas,
+                                                      name='py_reader',
+                                                      use_double_buffer=True)
+    words = fluid.layers.read_file(py_reader)
+
+    user_sparse_embed_seq = list(map(embedding_layer, words[2: user_sparse_slot_num + 2]))
+    item_sparse_embed_seq = list(map(embedding_layer, words[user_sparse_slot_num + 2: user_sparse_slot_num + item_sparse_slot_num + 2]))
+    
+    
+    user_concated = fluid.layers.concat(user_sparse_embed_seq + words[0:1], axis=1)
+    item_concated = fluid.layers.concat(item_sparse_embed_seq + words[1:2], axis=1)
+
+    user_fc1 = fluid.layers.fc(input=user_concated, size=400, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(concated.shape[1]))))
+    user_fc2 = fluid.layers.fc(input=fc1, size=128, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(fc1.shape[1]))))
+    user_fc3 = fluid.layers.fc(input=fc2, size=64, act='tanh',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(fc2.shape[1]))))
+
+    item_fc1 = fluid.layers.fc(input=user_concated, size=400, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(concated.shape[1]))))
+    item_fc2 = fluid.layers.fc(input=fc1, size=128, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(fc1.shape[1]))))
+    item_fc3 = fluid.layers.fc(input=fc2, size=64, act='tanh',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(fc2.shape[1]))))
+
+    sim = fluid.layers.cos_sim(X = user_fc3, Y = item_fc3)
+
+    predict = fluid.layers.fc(input=sim, size=2, act='softmax',
+                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                                  scale=1 / math.sqrt(fc3.shape[1]))))
+
+    cost = fluid.layers.cross_entropy(input=predict, label=words[-1])
+    avg_cost = fluid.layers.reduce_sum(cost)
+    accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
+    auc_var, batch_auc_var, auc_states = \
+        fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20)
+
+    return avg_cost, auc_var, batch_auc_var, py_reader
+
--- a/fluid/PaddleRec/ctr/network_conf.py
+++ b/fluid/PaddleRec/ctr/network_conf.py
@@ -33,6 +33,61 @@ def text_cnn(word):
    return cnn


+def deepmf_ctr_model(embedding_size, sparse_feature_dim):
+
+    def embedding_layer(input):
+        return fluid.layers.embedding(
+            input=input,
+            is_sparse=True,
+            # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190
+            # if you want to set is_distributed to True
+            is_distributed=False,
+            size=[sparse_feature_dim, embedding_size],
+            param_attr=fluid.ParamAttr(name="SparseFeatFactors",
+                                       initializer=fluid.initializer.Uniform()))
+
+    dense_input = fluid.layers.data(
+        name="dense_input", shape=[dense_feature_dim], dtype='float32')
+
+    sparse_input_ids = [
+        fluid.layers.data(name="C" + str(i), shape=[1], lod_level=1, dtype='int64')
+        for i in range(1, 27)]
+
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+
+    datas = [dense_input] + sparse_input_ids + [label]
+
+    py_reader = fluid.layers.create_py_reader_by_data(capacity=64,
+                                                      feed_list=datas,
+                                                      name='py_reader',
+                                                      use_double_buffer=True)
+    words = fluid.layers.read_file(py_reader)
+
+    sparse_embed_seq = list(map(embedding_layer, words[1:-1]))
+    concated = fluid.layers.concat(sparse_embed_seq + words[0:1], axis=1)
+
+    fc1 = fluid.layers.fc(input=concated, size=400, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(concated.shape[1]))))
+    fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(fc1.shape[1]))))
+    fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
+                          param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                              scale=1 / math.sqrt(fc2.shape[1]))))
+    predict = fluid.layers.fc(input=fc3, size=2, act='softmax',
+                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
+                                  scale=1 / math.sqrt(fc3.shape[1]))))
+
+    cost = fluid.layers.cross_entropy(input=predict, label=words[-1])
+    avg_cost = fluid.layers.reduce_sum(cost)
+    accuracy = fluid.layers.accuracy(input=predict, label=words[-1])
+    auc_var, batch_auc_var, auc_states = \
+        fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20)
+
+    return avg_cost, auc_var, batch_auc_var, py_reader
+
+

 def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, sparse_input):
    def dense_fm_layer(input, emb_dict_size, factor_size, fm_param_attr):