提交 d9b2e7d0 编写于 作者: Y Yu Yang

Complete recommendation demo in API.v2

上级 4fd45928
......@@ -2,6 +2,7 @@ import paddle.v2 as paddle
def main():
paddle.init(use_gpu=False, trainer_count=3)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
......@@ -17,10 +18,17 @@ def main():
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.embedding(input=usr_age_id, size=16)
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_combined_features = paddle.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb],
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
......@@ -30,12 +38,59 @@ def main():
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value(len(movie_title_dict)))
mov_title_emb = paddle.embedding(input=mov_title_id, size=32)
with paddle.layer.mixed() as mixed:
pass
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.regression_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.reader.batched(
paddle.dataset.movielens.train(), batch_size=256),
event_handler=event_handler,
reader_dict={
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
})
if __name__ == '__main__':
......
......@@ -6,7 +6,7 @@ import functools
__all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
'age_table'
'age_table', 'movie_categories', 'max_job_id'
]
age_table = [1, 18, 25, 35, 45, 50, 56]
......@@ -135,6 +135,23 @@ def max_user_id():
return reduce(__max_index_info__, USER_INFO.viewvalues()).index
def __max_job_id_impl__(a, b):
if a.job_id > b.job_id:
return a
else:
return b
def max_job_id():
__initialize_meta_info__()
return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
def movie_categories():
__initialize_meta_info__()
return CATEGORIES_DICT
def unittest():
for train_count, _ in enumerate(train()()):
pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册