TypeError: reduce() of empty sequence with no initial value
Created by: JessieMeng
import os, sys
import gzip
import paddle.v2 as paddle
import numpy as np
import functools
import argparse
from data_input import *
def lambda_rank(title_input_dim,label_input_dim):
"""
lambda_rank is a Listwise rank model, the input data and label must be sequences.
https://papers.nips.cc/paper/2971-learning-to-rank-with-nonsmooth-cost-functions.pdf
parameters :
input_dim, one document's dense feature vector dimension
format of the dense_vector_sequence:
[[f, ...], [f, ...], ...], f is a float or an int number
"""
label = paddle.layer.data("label",
paddle.data_type.dense_vector_sequence(1))
title_data = paddle.layer.data("title_data",
paddle.data_type.integer_value(title_input_dim))
title_emb = paddle.layer.embedding(input=title_data,size=256)
label_data = paddle.layer.data("label_data",
paddle.data_type.integer_value(label_input_dim))
label_emb = paddle.layer.embedding(input=label_data,size=256)
combine_data = paddle.layer.fc(input=[title_emb,label_emb],size=128,act=paddle.activation.Relu())
# hidden layer
title_hd1 = paddle.layer.fc(
input=combine_data,
size=10,
act=paddle.activation.Tanh())
output = paddle.layer.fc(
input=title_hd1,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01))
# evaluator
evaluator = paddle.evaluator.auc(input=output, label=label)
# cost layer
cost = paddle.layer.lambda_cost(
input=output, score=label, NDCG_num=6, max_sort_size=-1)
return cost, output
def train_lambda_rank(num_passes):
# listwise input sequence
fill_default_train = functools.partial(
train)
fill_default_test = functools.partial(
test)
train_reader = paddle.batch(
paddle.reader.shuffle(fill_default_train, buf_size=1000), batch_size=32)
test_reader = paddle.batch(fill_default_test, batch_size=32)
# mq2007 input_dim = 46, dense format
title_input_dim = 358561
label_input_dim = 6116
cost, output = lambda_rank(title_input_dim,label_input_dim)
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(
cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(learning_rate=1e-4))
# Define end batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
print "Pass %d Batch %d Cost %.9f" % (event.pass_id, event.batch_id,
event.cost)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
with gzip.open("lambda_rank_params_%d.tar.gz" % (event.pass_id),
"w") as f:
parameters.to_tar(f)
feeding = {"label": 0, "title_data": 1,"label_data":2}
trainer.train(
reader=paddle.batch(paddle.reader.shuffle(train(),buf_size=100),batch_size=32),
event_handler=event_handler,
feeding=feeding,
num_passes=num_passes)
def lambda_rank_infer(pass_id):
"""
lambda_rank model inference interface
parameters:
pass_id : inference model in pass_id
"""
print "Begin to Infer..."
input_dim = 46
output = lambda_rank(input_dim)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open("lambda_rank_params_%d.tar.gz" % (pass_id - 1)))
infer_query_id = None
infer_data = []
infer_data_num = 1
fill_default_test = functools.partial(
paddle.dataset.mq2007.test, format="listwise")
for label, querylist in fill_default_test():
infer_data.append(querylist)
if len(infer_data) == infer_data_num:
break
# predict score of infer_data document. Re-sort the document base on predict score
# in descending order. then we build the ranking documents
predicitons = paddle.infer(
output_layer=output, parameters=parameters, input=infer_data)
for i, score in enumerate(predicitons):
print i, score
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='LambdaRank demo')
parser.add_argument("--run_type", type=str, help="run type is train|infer")
parser.add_argument(
"--num_passes",
type=int,
help="num of passes in train| infer pass number of model")
args = parser.parse_args()
paddle.init(use_gpu=False, trainer_count=1)
if args.run_type == "train":
train_lambda_rank(args.num_passes)
elif args.run_type == "infer":
lambda_rank_infer(pass_id=args.num_passes - 1)
#-*- coding:utf-8
import sys
import random
import functools
__all__=['train','test']
def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
rand = random.Random(x=rand_seed)
with open("data_seg_id_result") as f:
for line in f:
if (rand.random() < test_ratio) == is_test:
tokens = line.strip().split("\t")
print "all:",len(tokens)
score = int(tokens[0])
words = tokens[1].split(" ")
new_word_list=[]
print "word:",len(words)
for word in words:
if word=='':
continue
new_word_list.append(int(word))
lable=int(tokens[2])
#print [[score]]+[new_word_list]+[lable]
yield [[score]]+[new_word_list]+[lable]
def __reader_creator__(**kwargs):
return lambda: __reader__(**kwargs)
train = functools.partial(__reader_creator__,is_test=False)
test = functools.partial(__reader_creator__,is_test=True)