提交 3adc3ab1 编写于 作者: Y yangyaming

Change table_projection to embedding layer, predict all test instance batch by batch

上级 347626a4
......@@ -5,16 +5,7 @@ import math
import paddle.v2 as paddle
def network_conf(is_train, hidden_size, embed_size, dict_size):
def word_embed(in_layer):
''' word embedding layer '''
word_embed = paddle.layer.table_projection(
input=in_layer,
size=embed_size,
param_attr=paddle.attr.Param(
name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0))
return word_embed
def network_conf(hidden_size, embed_size, dict_size, is_train=True):
first_word = paddle.layer.data(
name='firstw', type=paddle.data_type.integer_value(dict_size))
second_word = paddle.layer.data(
......@@ -26,17 +17,23 @@ def network_conf(is_train, hidden_size, embed_size, dict_size):
target_word = paddle.layer.data(
name='fifthw', type=paddle.data_type.integer_value(dict_size))
first_word_embed = word_embed(first_word)
second_word_embed = word_embed(second_word)
third_word_embed = word_embed(third_word)
fourth_word_embed = word_embed(fourth_word)
context_embed = paddle.layer.concat(input=[
first_word_embed, second_word_embed, third_word_embed, fourth_word_embed
embed_param_attr = paddle.attr.Param(
name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0)
embed_first_word = paddle.layer.embedding(
input=first_word, size=embed_size, param_attr=embed_param_attr)
embed_second_word = paddle.layer.embedding(
input=second_word, size=embed_size, param_attr=embed_param_attr)
embed_third_word = paddle.layer.embedding(
input=third_word, size=embed_size, param_attr=embed_param_attr)
embed_fourth_word = paddle.layer.embedding(
input=fourth_word, size=embed_size, param_attr=embed_param_attr)
embed_context = paddle.layer.concat(input=[
embed_first_word, embed_second_word, embed_third_word, embed_fourth_word
])
hidden_layer = paddle.layer.fc(
input=context_embed,
input=embed_context,
size=hidden_size,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
......
......@@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
import paddle.v2 as paddle
from network_conf import network_conf
from hsigmoid_conf import network_conf
import gzip
......@@ -36,41 +36,47 @@ def decode_res(infer_res, dict_size):
return predict_lbls
def predict(batch_ins, idx_word_dict, dict_size, prediction_layer, parameters):
infer_res = paddle.infer(
output_layer=prediction_layer, parameters=parameters, input=batch_ins)
predict_lbls = decode_res(infer_res, dict_size)
predict_words = [idx_word_dict[lbl] for lbl in predict_lbls] # map to word
# Ouput format: word1 word2 word3 word4 -> predict label
for i, ins in enumerate(batch_ins):
print(idx_word_dict[ins[0]] + ' ' + \
idx_word_dict[ins[1]] + ' ' + \
idx_word_dict[ins[2]] + ' ' + \
idx_word_dict[ins[3]] + ' ' + \
' -> ' + predict_words[i])
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict(typo_freq=2)
word_dict = paddle.dataset.imikolov.build_dict(min_word_freq=2)
dict_size = len(word_dict)
prediction = network_conf(
prediction_layer = network_conf(
is_train=False, hidden_size=256, embed_size=32, dict_size=dict_size)
print('Load model ....')
with gzip.open('./models/model_pass_00000.tar.gz') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
ins_num = 10 # total 10 instance for prediction
ins_lst = [] # input data
idx_word_dict = dict((v, k) for k, v in word_dict.items())
batch_size = 64
batch_ins = []
ins_iter = paddle.dataset.imikolov.test(word_dict, 5)
for ins in ins_iter():
ins_lst.append(ins[:-1])
if len(ins_lst) >= ins_num: break
infer_res = paddle.infer(
output_layer=prediction, parameters=parameters, input=ins_lst)
idx_word_dict = dict((v, k) for k, v in word_dict.items())
predict_lbls = decode_res(infer_res, dict_size)
predict_words = [idx_word_dict[lbl] for lbl in predict_lbls] # map to word
# Ouput format: word1 word2 word3 word4 -> predict label
for i, ins in enumerate(ins_lst):
print idx_word_dict[ins[0]] + ' ' + \
idx_word_dict[ins[1]] + ' ' + \
idx_word_dict[ins[2]] + ' ' + \
idx_word_dict[ins[3]] + ' ' + \
' -> ' + predict_words[i]
batch_ins.append(ins[:-1])
if len(batch_ins) == batch_size:
predict(batch_ins, idx_word_dict, dict_size, prediction_layer,
parameters)
batch_ins = []
if len(batch_ins) > 0:
predict(batch_ins, idx_word_dict, dict_size, prediction_layer,
parameters)
if __name__ == '__main__':
......
......@@ -2,13 +2,13 @@
# -*- coding: utf-8 -*-
import paddle.v2 as paddle
from network_conf import network_conf
from hsigmoid_conf import network_conf
import gzip
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict(typo_freq=2)
word_dict = paddle.dataset.imikolov.build_dict(min_word_freq=2)
dict_size = len(word_dict)
cost = network_conf(
is_train=True, hidden_size=256, embed_size=32, dict_size=dict_size)
......@@ -25,8 +25,8 @@ def main():
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, 5), 32))
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
print("Pass %d, Batch %d, Cost %f, Test Cost %f" %
(event.pass_id, event.batch_id, event.cost, result.cost))
feeding = {
'firstw': 0,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册