提交 a87a3c96 编写于 作者: D dzhwinter

"update comment"

上级 561b6c82
......@@ -8,6 +8,15 @@ import functools
def lambdaRank(input_dim):
"""
lambdaRank is a ListWise Rank Model, input data and label must be sequence
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters :
input_dim, one document's dense feature vector dimension
dense_vector_sequence format
[[f, ...], [f, ...], ...], f is represent for an float or int number
"""
label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1))
data = paddle.layer.data("data",
......@@ -16,14 +25,24 @@ def lambdaRank(input_dim):
# hidden layer
hd1 = paddle.layer.fc(
input=data,
size=128,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
hd2 = paddle.layer.fc(
input=hd1,
size=10,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01))
output = paddle.layer.fc(
input=hd1,
input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))
# evaluator
evaluator = paddle.evaluator.auc(input=output, label=label)
# cost layer
cost = paddle.layer.lambda_cost(
input=output, score=label, NDCG_num=6, max_sort_size=-1)
return cost, output
......@@ -39,7 +58,7 @@ def train_lambdaRank(num_passes):
paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32)
# mq2007 input_dim = 46, dense format
# mq2007 input_dim = 46, dense format
input_dim = 46
cost, output = lambdaRank(input_dim)
parameters = paddle.parameters.create(cost)
......@@ -83,20 +102,23 @@ def lambdaRank_infer(pass_id):
infer_query_id = None
infer_data = []
infer_data_num = 1000
infer_data_num = 1
fill_default_test = functools.partial(
paddle.dataset.mq2007.test, format="listwise")
for label, querylist in fill_default_test():
infer_data.append(querylist)
if len(infer_data) == infer_data_num:
break
# predict score of infer_data document. Re-sort the document base on predict score
# in descending order. then we build the ranking documents
predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
for i, score in enumerate(predicitons):
print score
print i, score
if __name__ == '__main__':
paddle.init(use_gpu=False, trainer_count=4)
train_lambdaRank(100)
lambdaRank_infer(pass_id=2)
train_lambdaRank(2)
lambdaRank_infer(pass_id=1)
import os, sys
import os
import sys
import gzip
import functools
import paddle.v2 as paddle
......@@ -37,7 +38,7 @@ def half_ranknet(name_prefix, input_dim):
def ranknet(input_dim):
# label layer
label = paddle.layer.data("label", paddle.data_type.integer_value(1))
label = paddle.layer.data("label", paddle.data_type.dense_vector(1))
# reuse the parameter in half_ranknet
output_left = half_ranknet("left", input_dim)
......@@ -56,7 +57,7 @@ def train_ranknet(num_passes):
batch_size=100)
test_reader = paddle.batch(paddle.dataset.mq2007.test, batch_size=100)
# mq2007 feature_dim = 46, dense format
# mq2007 feature_dim = 46, dense format
# fc hidden_dim = 128
feature_dim = 46
cost = ranknet(feature_dim)
......@@ -106,10 +107,9 @@ def ranknet_infer(pass_id):
gzip.open("ranknet_params_%d.tar.gz" % (pass_id - 1)))
# load data of same query and relevance documents, need ranknet to rank these candidates
infer_query_id = None
infer_query_id = []
infer_data = []
infer_score_list = []
infer_data_num = 1000
infer_doc_index = []
# convert to mq2007 built-in data format
# <query_id> <relevance_score> <feature_vector>
......@@ -117,17 +117,19 @@ def ranknet_infer(pass_id):
paddle.dataset.mq2007.test, format="plain_txt")
for query_id, relevance_score, feature_vector in plain_txt_test():
if infer_query_id == None:
infer_query_id = query_id
elif infer_query_id != query_id:
break
infer_query_id.append(query_id)
infer_data.append(feature_vector)
predicitons = paddle.infer(
# predict score of infer_data document. Re-sort the document base on predict score
# in descending order. then we build the ranking documents
scores = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
for query_id, score in zip(infer_query_id, scores):
print "query_id : ", query_id, " ranknet rank document order : ", score
if __name__ == '__main__':
paddle.init(use_gpu=False, trainer_count=4)
pass_num = 10
pass_num = 2
train_ranknet(pass_num)
ranknet_infer(pass_id=pass_num - 1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册