From 305de2d09556a24e7673f19275ab1b926880592a Mon Sep 17 00:00:00 2001 From: wangsijiang Date: Thu, 24 Jan 2019 17:35:53 +0800 Subject: [PATCH] add deepmf nn --- fluid/PaddleRec/ctr/deepmf_conf.py | 117 ++++++++++++++++++++++++++++ fluid/PaddleRec/ctr/network_conf.py | 55 +++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 fluid/PaddleRec/ctr/deepmf_conf.py diff --git a/fluid/PaddleRec/ctr/deepmf_conf.py b/fluid/PaddleRec/ctr/deepmf_conf.py new file mode 100644 index 00000000..7b175acf --- /dev/null +++ b/fluid/PaddleRec/ctr/deepmf_conf.py @@ -0,0 +1,117 @@ +import paddle.fluid as fluid +import math + +dense_feature_dim = 13 + +user_dense_feature_dim = 13 +item_dense_feature_dim = 13 + +## text cnn conf +WORD_SIZE = 100000 +EMBED_SIZE = 64 +CNN_DIM = 128 +CNN_FILTER_SIZE = 5 + + +def text_cnn(word): + """ + """ + embed = fluid.layers.embedding( + input=word, + size=[WORD_SIZE, EMBED_SIZE], + dtype='float32', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Normal(scale=1/math.sqrt(WORD_SIZE))), + is_sparse=IS_SPARSE, + is_distributed=False) + cnn = fluid.nets.sequence_conv_pool( + input = embed, + num_filters = CNN_DIM, + filter_size = CNN_FILTER_SIZE, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Normal(scale=1/math.sqrt(CNN_FILTER_SIZE * embed.shape[1]))), + act='tanh', + pool_type = "max") + return cnn + + +def deepmf_ctr_model(embedding_size, sparse_feature_dim): + + def embedding_layer(input): + return fluid.layers.embedding( + input=input, + is_sparse=True, + # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190 + # if you want to set is_distributed to True + is_distributed=False, + size=[sparse_feature_dim, embedding_size], + param_attr=fluid.ParamAttr(name="SparseFeatFactors", + initializer=fluid.initializer.Uniform())) + + user_dense_input = fluid.layers.data( + name="dense_input", shape=[user_dense_feature_dim], dtype='float32') + + user_sparse_input_ids = [ + fluid.layers.data(name="USER" + str(i), shape=[1], lod_level=1, dtype='int64') + for i in range(1, user_sparse_slot_num)] + + item_dense_input = fluid.layers.data( + name="dense_input", shape=[item_dense_feature_dim], dtype='float32') + + item_sparse_input_ids = [ + fluid.layers.data(name="ITEM" + str(i), shape=[1], lod_level=1, dtype='int64') + for i in range(1, item_sparse_slot_num)] + + + + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + datas = [user_dense_input] + [item_dense_input] + user_sparse_input_ids + item_sparse_input_ids + [label] + + py_reader = fluid.layers.create_py_reader_by_data(capacity=64, + feed_list=datas, + name='py_reader', + use_double_buffer=True) + words = fluid.layers.read_file(py_reader) + + user_sparse_embed_seq = list(map(embedding_layer, words[2: user_sparse_slot_num + 2])) + item_sparse_embed_seq = list(map(embedding_layer, words[user_sparse_slot_num + 2: user_sparse_slot_num + item_sparse_slot_num + 2])) + + + user_concated = fluid.layers.concat(user_sparse_embed_seq + words[0:1], axis=1) + item_concated = fluid.layers.concat(item_sparse_embed_seq + words[1:2], axis=1) + + user_fc1 = fluid.layers.fc(input=user_concated, size=400, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(concated.shape[1])))) + user_fc2 = fluid.layers.fc(input=fc1, size=128, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc1.shape[1])))) + user_fc3 = fluid.layers.fc(input=fc2, size=64, act='tanh', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc2.shape[1])))) + + item_fc1 = fluid.layers.fc(input=user_concated, size=400, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(concated.shape[1])))) + item_fc2 = fluid.layers.fc(input=fc1, size=128, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc1.shape[1])))) + item_fc3 = fluid.layers.fc(input=fc2, size=64, act='tanh', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc2.shape[1])))) + + sim = fluid.layers.cos_sim(X = user_fc3, Y = item_fc3) + + predict = fluid.layers.fc(input=sim, size=2, act='softmax', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc3.shape[1])))) + + cost = fluid.layers.cross_entropy(input=predict, label=words[-1]) + avg_cost = fluid.layers.reduce_sum(cost) + accuracy = fluid.layers.accuracy(input=predict, label=words[-1]) + auc_var, batch_auc_var, auc_states = \ + fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20) + + return avg_cost, auc_var, batch_auc_var, py_reader + diff --git a/fluid/PaddleRec/ctr/network_conf.py b/fluid/PaddleRec/ctr/network_conf.py index c51f892e..7b30631e 100644 --- a/fluid/PaddleRec/ctr/network_conf.py +++ b/fluid/PaddleRec/ctr/network_conf.py @@ -33,6 +33,61 @@ def text_cnn(word): return cnn +def deepmf_ctr_model(embedding_size, sparse_feature_dim): + + def embedding_layer(input): + return fluid.layers.embedding( + input=input, + is_sparse=True, + # you need to patch https://github.com/PaddlePaddle/Paddle/pull/14190 + # if you want to set is_distributed to True + is_distributed=False, + size=[sparse_feature_dim, embedding_size], + param_attr=fluid.ParamAttr(name="SparseFeatFactors", + initializer=fluid.initializer.Uniform())) + + dense_input = fluid.layers.data( + name="dense_input", shape=[dense_feature_dim], dtype='float32') + + sparse_input_ids = [ + fluid.layers.data(name="C" + str(i), shape=[1], lod_level=1, dtype='int64') + for i in range(1, 27)] + + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + datas = [dense_input] + sparse_input_ids + [label] + + py_reader = fluid.layers.create_py_reader_by_data(capacity=64, + feed_list=datas, + name='py_reader', + use_double_buffer=True) + words = fluid.layers.read_file(py_reader) + + sparse_embed_seq = list(map(embedding_layer, words[1:-1])) + concated = fluid.layers.concat(sparse_embed_seq + words[0:1], axis=1) + + fc1 = fluid.layers.fc(input=concated, size=400, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(concated.shape[1])))) + fc2 = fluid.layers.fc(input=fc1, size=400, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc1.shape[1])))) + fc3 = fluid.layers.fc(input=fc2, size=400, act='relu', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc2.shape[1])))) + predict = fluid.layers.fc(input=fc3, size=2, act='softmax', + param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc3.shape[1])))) + + cost = fluid.layers.cross_entropy(input=predict, label=words[-1]) + avg_cost = fluid.layers.reduce_sum(cost) + accuracy = fluid.layers.accuracy(input=predict, label=words[-1]) + auc_var, batch_auc_var, auc_states = \ + fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20) + + return avg_cost, auc_var, batch_auc_var, py_reader + + def ctr_deepfm_model(factor_size, sparse_feature_dim, dense_feature_dim, sparse_input): def dense_fm_layer(input, emb_dict_size, factor_size, fm_param_attr): -- GitLab