提交 43b2bccd 编写于 作者: Y yangyaming

add network configuration and train script

上级 7629cf6d
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import math
import paddle.v2 as paddle
def network_conf(hidden_size, embed_size, dict_size):
def word_embed(in_layer):
''' word embedding layer '''
word_embed = paddle.layer.table_projection(
input=in_layer,
size=embed_size,
param_attr=paddle.attr.Param(
name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0))
return word_embed
first_word = paddle.layer.data(
name='firstw', type=paddle.data_type.integer_value(dict_size))
second_word = paddle.layer.data(
name='secondw', type=paddle.data_type.integer_value(dict_size))
third_word = paddle.layer.data(
name='thirdw', type=paddle.data_type.integer_value(dict_size))
fourth_word = paddle.layer.data(
name='fourthw', type=paddle.data_type.integer_value(dict_size))
target_word = paddle.layer.data(
name='fifthw', type=paddle.data_type.integer_value(dict_size))
first_word_embed = word_embed(first_word)
second_word_embed = word_embed(second_word)
third_word_embed = word_embed(third_word)
fourth_word_embed = word_embed(fourth_word)
context_embed = paddle.layer.concat(input=[
first_word_embed, second_word_embed, third_word_embed, fourth_word_embed
])
hidden_layer = paddle.layer.fc(
input=context_embed,
size=hidden_size,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embed_size * 8), learning_rate=1))
with paddle.layer.mixed(
size=dict_size - 1,
act=paddle.activation.Sigmoid(),
bias_attr=paddle.attr.Param(name='sigmoid_b')) as prediction:
prediction += paddle.layer.trans_full_matrix_projection(
input=hidden_layer, param_attr=paddle.attr.Param(name='sigmoid_w'))
cost = paddle.layer.hsigmoid(
input=hidden_layer,
label=target_word,
num_classes=dict_size,
param_attr=paddle.attr.Param(name='sigmoid_w'),
bias_attr=paddle.attr.Param(name='sigmoid_b'))
parameters = paddle.parameters.create([cost, prediction])
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
input_data_lst = ['firstw', 'secondw', 'thirdw', 'fourthw', 'fifthw']
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
return input_data_lst, trainer, prediction, parameters
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import paddle.v2 as paddle
from network_conf import network_conf
import gzip
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
input_data_lst, trainer, _, parameters = network_conf(
hidden_size=256, embed_size=32, dict_size=dict_size)
def event_handler(event):
if isinstance(event, paddle.event.EndPass):
model_name = './models/model_pass_%05d.tar.gz' % event.pass_id
print("Save model into %s ..." % model_name)
with gzip.open(model_name, 'w') as f:
parameters.to_tar(f)
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, 5), 32))
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
feeding = dict(zip(input_data_lst, xrange(len(input_data_lst))))
trainer.train(
paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imikolov.train(word_dict, 5)(),
buf_size=1000), 64),
num_passes=30,
event_handler=event_handler,
feeding=feeding)
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册