提交 47ba3c29 编写于 作者: P peterzhang2029

fix lambda_rank

上级 b8256825
...@@ -8,25 +8,25 @@ import numpy as np ...@@ -8,25 +8,25 @@ import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
def lambda_rank(input_dim): def lambda_rank(input_dim, is_infer):
""" """
lambda_rank is a Listwise rank model, the input data and label Lambda_rank is a Listwise rank model, the input data and label
must be sequences. must be sequences.
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters : parameters :
input_dim, one document's dense feature vector dimension input_dim, one document's dense feature vector dimension
format of the dense_vector_sequence: Format of the dense_vector_sequence:
[[f, ...], [f, ...], ...], f is a float or an int number [[f, ...], [f, ...], ...], f is a float or an int number
""" """
if not is_infer:
label = paddle.layer.data("label", label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1)) paddle.data_type.dense_vector_sequence(1))
data = paddle.layer.data("data", data = paddle.layer.data("data",
paddle.data_type.dense_vector_sequence(input_dim)) paddle.data_type.dense_vector_sequence(input_dim))
# hidden layer # Define hidden layer.
hd1 = paddle.layer.fc( hd1 = paddle.layer.fc(
input=data, input=data,
size=128, size=128,
...@@ -44,27 +44,30 @@ def lambda_rank(input_dim): ...@@ -44,27 +44,30 @@ def lambda_rank(input_dim):
act=paddle.activation.Linear(), act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01)) param_attr=paddle.attr.Param(initial_std=0.01))
# evaluator if not is_infer:
evaluator = paddle.evaluator.auc(input=output, label=label) # Define evaluator.
# cost layer evaluator = paddle.evaluator.auc(input=output, label=label)
cost = paddle.layer.lambda_cost( # Define cost layer.
input=output, score=label, NDCG_num=6, max_sort_size=-1) cost = paddle.layer.lambda_cost(
return cost, output input=output, score=label, NDCG_num=6, max_sort_size=-1)
return cost, output
return output
def train_lambda_rank(num_passes): def train_lambda_rank(num_passes):
# listwise input sequence # Listwise input sequence.
fill_default_train = functools.partial( fill_default_train = functools.partial(
paddle.dataset.mq2007.train, format="listwise") paddle.dataset.mq2007.train, format="listwise")
fill_default_test = functools.partial( fill_default_test = functools.partial(
paddle.dataset.mq2007.test, format="listwise") paddle.dataset.mq2007.test, format="listwise")
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32) paddle.reader.shuffle(fill_default_train, buf_size=100), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32) test_reader = paddle.batch(fill_default_test, batch_size=32)
# mq2007 input_dim = 46, dense format # Training dataset: mq2007, input_dim = 46, dense format.
input_dim = 46 input_dim = 46
cost, output = lambda_rank(input_dim) cost, output = lambda_rank(input_dim, is_infer=False)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD( trainer = paddle.trainer.SGD(
...@@ -72,7 +75,7 @@ def train_lambda_rank(num_passes): ...@@ -72,7 +75,7 @@ def train_lambda_rank(num_passes):
parameters=parameters, parameters=parameters,
update_equation=paddle.optimizer.Adam(learning_rate=1e-4)) update_equation=paddle.optimizer.Adam(learning_rate=1e-4))
# Define end batch and end pass event handler # Define end batch and end pass event handler.
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
print "Pass %d Batch %d Cost %.9f" % (event.pass_id, event.batch_id, print "Pass %d Batch %d Cost %.9f" % (event.pass_id, event.batch_id,
...@@ -93,30 +96,31 @@ def train_lambda_rank(num_passes): ...@@ -93,30 +96,31 @@ def train_lambda_rank(num_passes):
def lambda_rank_infer(pass_id): def lambda_rank_infer(pass_id):
"""lambda_rank model inference interface """Lambda rank model inference interface.
parameters: Parameters:
pass_id : inference model in pass_id pass_id : inference model in pass_id
""" """
print "Begin to Infer..." print "Begin to Infer..."
input_dim = 46 input_dim = 46
output = lambda_rank(input_dim) output = lambda_rank(input_dim, is_infer=True)
parameters = paddle.parameters.Parameters.from_tar( parameters = paddle.parameters.Parameters.from_tar(
gzip.open("lambda_rank_params_%d.tar.gz" % (pass_id - 1))) gzip.open("lambda_rank_params_%d.tar.gz" % (pass_id - 1)))
infer_query_id = None infer_query_id = None
infer_data = [] infer_data = []
infer_data_num = 1 infer_data_num = 1
fill_default_test = functools.partial( fill_default_test = functools.partial(
paddle.dataset.mq2007.test, format="listwise") paddle.dataset.mq2007.test, format="listwise")
for label, querylist in fill_default_test(): for label, querylist in fill_default_test():
infer_data.append(querylist) infer_data.append([querylist])
if len(infer_data) == infer_data_num: if len(infer_data) == infer_data_num:
break break
# predict score of infer_data document. # Predict score of infer_data document.
# Re-sort the document base on predict score # Re-sort the document base on predict score.
# in descending order. then we build the ranking documents # In descending order. then we build the ranking documents.
predicitons = paddle.infer( predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data) output_layer=output, parameters=parameters, input=infer_data)
for i, score in enumerate(predicitons): for i, score in enumerate(predicitons):
...@@ -129,7 +133,7 @@ if __name__ == '__main__': ...@@ -129,7 +133,7 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
"--num_passes", "--num_passes",
type=int, type=int,
help="num of passes in train| infer pass number of model") help="The Num of passes in train| infer pass number of model.")
args = parser.parse_args() args = parser.parse_args()
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
if args.run_type == "train": if args.run_type == "train":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册