diff --git a/word_embedding/network_conf.py b/word_embedding/network_conf.py index 9cc39ebfb9657567dcb8594b56f89ef5d4137a85..e8c7b5cb153467e3de14c48e74c29246211833a8 100644 --- a/word_embedding/network_conf.py +++ b/word_embedding/network_conf.py @@ -44,13 +44,6 @@ def network_conf(hidden_size, embed_size, dict_size): param_attr=paddle.attr.Param( initial_std=1. / math.sqrt(embed_size * 8), learning_rate=1)) - with paddle.layer.mixed( - size=dict_size - 1, - act=paddle.activation.Sigmoid(), - bias_attr=paddle.attr.Param(name='sigmoid_b')) as prediction: - prediction += paddle.layer.trans_full_matrix_projection( - input=hidden_layer, param_attr=paddle.attr.Param(name='sigmoid_w')) - cost = paddle.layer.hsigmoid( input=hidden_layer, label=target_word, @@ -58,14 +51,13 @@ def network_conf(hidden_size, embed_size, dict_size): param_attr=paddle.attr.Param(name='sigmoid_w'), bias_attr=paddle.attr.Param(name='sigmoid_b')) - parameters = paddle.parameters.create([cost, prediction]) - - adam_optimizer = paddle.optimizer.Adam( - learning_rate=3e-3, - regularization=paddle.optimizer.L2Regularization(8e-4)) + with paddle.layer.mixed( + size=dict_size - 1, + act=paddle.activation.Sigmoid(), + bias_attr=paddle.attr.Param(name='sigmoid_b')) as prediction: + prediction += paddle.layer.trans_full_matrix_projection( + input=hidden_layer, param_attr=paddle.attr.Param(name='sigmoid_w')) input_data_lst = ['firstw', 'secondw', 'thirdw', 'fourthw', 'fifthw'] - trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer) - - return input_data_lst, trainer, prediction, parameters + return input_data_lst, cost, prediction diff --git a/word_embedding/predict_v2.py b/word_embedding/predict_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..5dfa928c9f9b0014790a60b687c25c3a39fa2dc4 --- /dev/null +++ b/word_embedding/predict_v2.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import paddle.v2 as paddle +from network_conf import network_conf +import gzip + + +def decode_res(infer_res, dict_size): + predict_lbls = [] + infer_res = infer_res > 0.5 + for i, probs in enumerate(infer_res): + idx = 0 + result = 1 + while idx < len(probs): + result <<= 1 + if probs[idx]: + result |= 1 + if probs[idx]: + idx = idx * 2 + 2 # right child + else: + idx = idx * 2 + 1 # left child + predict_lbl = result - dict_size + predict_lbls.append(predict_lbl) + return predict_lbls + + +def main(): + paddle.init(use_gpu=False, trainer_count=4) + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + _, _, prediction = network_conf( + hidden_size=256, embed_size=32, dict_size=dict_size) + + print('Load model ....') + with gzip.open('./models/model_pass_00000.tar.gz') as f: + parameters = paddle.parameters.Parameters.from_tar(f) + + ins_num = 10 + ins_lst = [] + ins_lbls = [] + + ins_buffer = paddle.reader.shuffle( + lambda: paddle.dataset.imikolov.train(word_dict, 5)(), + buf_size=1000) + + for ins in ins_buffer(): + ins_lst.append(ins[:-1]) + ins_lbls.append(ins[-1]) + if len(ins_lst) >= ins_num: break + + infer_res = paddle.infer( + output_layer=prediction, parameters=parameters, input=ins_lst) + + idx_word_dict = dict((v, k) for k, v in word_dict.items()) + + predict_lbls = decode_res(infer_res, dict_size) + predict_words = [idx_word_dict[lbl] for lbl in predict_lbls] + gt_words = [idx_word_dict[lbl] for lbl in ins_lbls] + + for i, ins in enumerate(ins_lst): + print idx_word_dict[ins[0]] + ' ' + idx_word_dict[ins[1]] + \ + ' -> ' + predict_words[i] + ' ( ' + gt_words[i] + ' )' + + +if __name__ == '__main__': + main() diff --git a/word_embedding/train_v2.py b/word_embedding/train_v2.py index 1c14f4fa71bcd18a5467baa867f8f21d4476372d..4cb028b22aabc540304b584b0f9a6541ad2bc9ec 100644 --- a/word_embedding/train_v2.py +++ b/word_embedding/train_v2.py @@ -10,7 +10,7 @@ def main(): paddle.init(use_gpu=False, trainer_count=1) word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) - input_data_lst, trainer, _, parameters = network_conf( + input_data_lst, cost, prediction = network_conf( hidden_size=256, embed_size=32, dict_size=dict_size) def event_handler(event): @@ -29,6 +29,11 @@ def main(): event.pass_id, event.batch_id, event.cost) feeding = dict(zip(input_data_lst, xrange(len(input_data_lst)))) + parameters = paddle.parameters.create([cost, prediction]) + adam_optimizer = paddle.optimizer.Adam( + learning_rate=3e-3, + regularization=paddle.optimizer.L2Regularization(8e-4)) + trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer) trainer.train( paddle.batch(