提交 f3ebf87c 编写于 作者: D dong zhihong

ltr case done.

上级 f5724efa
import os, sys
import gzip
import paddle.v2 as paddle
import numpy as np
import functools
#lambdaRank is listwise learning to rank algorithm
def lambdaRank(feature_dim):
label = paddle.layer.data("label", paddle.data_type.integer_value_sequence(1))
data = paddle.layer.data("data", paddle.data_type.dense_vector(feature_dim))
# two hidden layers
hd1 = paddle.layer.fc(
name="/hidden_1",
input=data,
size=256,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w1"))
hd2 = paddle.layer.fc(
name="/hidden_2",
input=hd1,
size=256,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w2"))
output = paddle.layer.fc(
name="/output",
input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01, name="output"))
cost = paddle.layer.lambda_cost(input=output,
score=label,
NDCG_num=10)
return cost, output
def train_lambdaRank(num_passes):
fill_default_train = functools.partial(paddle.dataset.mq2007.train, format="listwise")
fill_default_test = functools.partial(paddle.dataset.mq2007.test, format="listwise")
train_reader = paddle.batch(
paddle.reader.shuffle(fill_default_train, buf_size=1000), batch_size=1000)
test_reader = paddle.batch(
paddle.reader.shuffle(fill_default_test, buf_size=1000), batch_size=1000)
# mq2007 feature_dim = 46, dense format
# fc hidden_dim = 128
feature_dim = 46
cost, output = lambdaRank(feature_dim)
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(
cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(learning_rate=1e-4)
)
# Define end batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.9f" % (
event.pass_id, event.batch_id, event.cost)
else:
sys.stdout.write(".")
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" %(event.pass_id, result.metrics)
with gzip.open("lambdaRank_params_%d.tar.gz" %(event.pass_id), "w") as f:
parameters.to_tar(f)
feeding = {"label":0,
"data": 1}
trainer.train(reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=num_passes)
def lambdaRank_infer(pass_id):
print "Begin to Infer..."
feature_dim = 46
output = lambdaRnak(feature_dim)
parameters = paddle.parameters.Parameters.from_tar(gzip.open("lambdaRank_params_%d.tar.gz" %(pass_id-1)))
infer_data = []
infer_data_num = 1000
for label, left, right in paddle.dataset.mq2007.test():
infer_data.append(left)
if len(infer_data) == infer_data_num:
break
predicitons = paddle.infer(output_layer=output,
parameters=parameters,
input=infer_data)
for i, score in enumerate(predicitons):
print score
if __name__ == '__main__':
paddle.init(use_gpu=False, trainer_count=4)
train_lambdaRank(2)
lambdaRank_infer(pass_id=2)
import os, sys
import gzip
import paddle.v2 as paddle
import numpy as np
# ranknet is the classic pairwise learning to rank algorithm
# http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf
def half_ranknet(name_prefix, input_dim):
data = paddle.layer.data(name_prefix+"/data", paddle.data_type.dense_vector(input_dim))
# two hidden layers
hd1 = paddle.layer.fc(
name=name_prefix+"/hidden_1",
input=data,
size=32,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w1"))
hd2 = paddle.layer.fc(
name=name_prefix+"/hidden_2",
input=hd1,
size=16,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(initial_std=0.01, name="hidden_w2"))
output = paddle.layer.fc(
name=name_prefix+"/output",
input=hd2,
size=1,
act=paddle.activation.Linear(),
param_attr=paddle.attr.Param(initial_std=0.01, name="output"))
return output
def ranknet(input_dim):
label = paddle.layer.data("label", paddle.data_type.integer_value(1))
output_left = half_ranknet("left", input_dim)
output_right = half_ranknet("right", input_dim)
cost = paddle.layer.rank_cost(name="cost", left=output_left, right=output_right, label=label)
return cost
def train_ranknet(num_passes):
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mq2007.train, buf_size=1000), batch_size=1000)
test_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mq2007.test, buf_size=1000), batch_size=1000)
# mq2007 feature_dim = 46, dense format
# fc hidden_dim = 128
feature_dim = 46
cost = ranknet(feature_dim)
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(
cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(learning_rate=2e-4)
)
# Define end batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.9f" % (
event.pass_id, event.batch_id, event.cost)
else:
sys.stdout.write(".")
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" %(event.pass_id, result.metrics)
with gzip.open("ranknet_params_%d.tar.gz" %(event.pass_id), "w") as f:
parameters.to_tar(f)
feeding = {"label":0,
"left/data" :1,
"right/data":2}
trainer.train(reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=num_passes)
def ranknet_infer(pass_id):
print "Begin to Infer..."
feature_dim = 46
output = half_ranknet("left", feature_dim)
parameters = paddle.parameters.Parameters.from_tar(gzip.open("ranknet_params_%d.tar.gz" %(pass_id-1)))
infer_data = []
infer_data_num = 1000
for label, left, right in paddle.dataset.mq2007.test():
infer_data.append(left)
if len(infer_data) == infer_data_num:
break
predicitons = paddle.infer(output_layer=output,
parameters=parameters,
input=infer_data)
for i, score in enumerate(predicitons):
print score
if __name__ == '__main__':
paddle.init(use_gpu=False, trainer_count=4)
train_ranknet(2)
ranknet_infer(pass_id=2)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册