提交 c97b2ddd 编写于 作者: Q Qiao Longfei 提交者: GitHub

Merge pull request #375 from jacquesqiao/update-word2vector

update word2vec to be compatible with book ipynb
...@@ -12,17 +12,15 @@ def wordemb(inlayer): ...@@ -12,17 +12,15 @@ def wordemb(inlayer):
input=inlayer, input=inlayer,
size=embsize, size=embsize,
param_attr=paddle.attr.Param( param_attr=paddle.attr.Param(
name="_proj", name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0))
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb return wordemb
def main(): def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=3)
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) dict_size = len(word_dict)
# Every layer takes integer value of range [0, dict_size)
firstword = paddle.layer.data( firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size)) name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data( secondword = paddle.layer.data(
...@@ -57,22 +55,26 @@ def main(): ...@@ -57,22 +55,26 @@ def main():
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
result = trainer.test( print "Pass %d, Batch %d, Cost %f, %s" % (
paddle.batch( event.pass_id, event.batch_id, event.cost, event.metrics)
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( if isinstance(event, paddle.event.EndPass):
event.pass_id, event.batch_id, event.cost, event.metrics, result = trainer.test(
result.metrics) paddle.batch(paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Testing metrics %s" % (event.pass_id,
result.metrics)
with open("model_%d.tar" % event.pass_id, 'w') as f:
parameters.to_tar(f)
cost = paddle.layer.classification_cost(input=predictword, label=nextword) cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam( adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3, learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4)) regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer) trainer = paddle.trainer.SGD(cost, parameters, adagrad)
trainer.train( trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32), paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30, num_passes=100,
event_handler=event_handler) event_handler=event_handler)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册