提交 4a94f8a4 编写于 作者: D dangqingqing

refine api training

上级 bca1fce6
import sys
import math
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
from model_v2 import db_lstm import paddle.v2.dataset.conll05 as conll05
UNK_IDX = 0 UNK_IDX = 0
word_dict_file = './data/wordDict.txt'
label_dict_file = './data/targetDict.txt'
predicate_file = './data/verbDict.txt'
word_dict = dict() def db_lstm():
label_dict = dict() word_dict, verb_dict, label_dict = conll05.get_dict()
predicate_dict = dict() word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
with open(word_dict_file, 'r') as f_word, \ pred_len = len(verb_dict)
open(label_dict_file, 'r') as f_label, \ print 'word_dict_len,', word_dict_len
open(predicate_file, 'r') as f_pre: print 'label_dict_len,', label_dict_len
for i, line in enumerate(f_word): print 'pred_len,', pred_len
w = line.strip()
word_dict[w] = i mark_dict_len = 2
word_dim = 32
for i, line in enumerate(f_label): mark_dim = 5
w = line.strip() hidden_dim = 512
label_dict[w] = i depth = 8
for i, line in enumerate(f_pre): #8 features
w = line.strip() def d_type(size):
predicate_dict[w] = i return paddle.data_type.integer_value_sequence(size)
word_dict_len = len(word_dict) word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
label_dict_len = len(label_dict) predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
pred_len = len(predicate_dict)
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
def train_reader(file_name="data/feature"): ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
def reader(): ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
with open(file_name, 'r') as fdata: ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
for line in fdata: mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t') target = paddle.layer.data(name='target', type=d_type(label_dict_len))
words = sentence.split() default_std = 1 / math.sqrt(hidden_dim) / 3.0
sen_len = len(words)
word_slot = [word_dict.get(w, UNK_IDX) for w in words] emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
predicate_slot = [predicate_dict.get(predicate)] * sen_len std_default = paddle.attr.Param(initial_std=default_std)
ctx_n2_slot = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len predicate_embedding = paddle.layer.embedding(
ctx_0_slot = [word_dict.get(ctx_0, UNK_IDX)] * sen_len size=word_dim,
ctx_p1_slot = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len input=predicate,
ctx_p2_slot = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
marks = mark.split() mark_embedding = paddle.layer.embedding(
mark_slot = [int(w) for w in marks] size=mark_dim, input=mark, param_attr=std_0)
label_list = label.split() word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
label_slot = [label_dict.get(w) for w in label_list] emb_layers = [
yield word_slot, ctx_n2_slot, ctx_n1_slot, \ paddle.layer.embedding(
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
return reader emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w): def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f: with open(file_name, 'rb') as f:
f.read(16) # skip header for float type. f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w) return np.fromfile(f, dtype=np.float32).reshape(h, w)
...@@ -71,44 +144,36 @@ def main(): ...@@ -71,44 +144,36 @@ def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
# define network topology # define network topology
crf_cost, crf_dec = db_lstm(word_dict_len, label_dict_len, pred_len) crf_cost, crf_dec = db_lstm()
# create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec]) parameters = paddle.parameters.create([crf_cost, crf_dec])
optimizer = paddle.optimizer.Momentum(momentum=0.01, learning_rate=2e-2)
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0: if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics) event.pass_id, event.batch_id, event.cost, event.metrics)
else:
pass
trainer = paddle.trainer.SGD(cost=crf_cost, trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
parameters.set('emb', load_parameter("data/emb", 44068, 32))
reader_dict = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8,
}
trn_reader = paddle.reader.batched( trn_reader = paddle.reader.batched(
paddle.reader.shuffle( paddle.reader.shuffle(
train_reader(), buf_size=8192), batch_size=10) conll05.test, buf_size=8192), batch_size=10)
trainer.train( trainer.train(
reader=trn_reader, reader=trn_reader, event_handler=event_handler, num_passes=10000)
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
if __name__ == '__main__': if __name__ == '__main__':
......
import math
import paddle.v2 as paddle
def db_lstm(word_dict_len, label_dict_len, pred_len):
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
...@@ -14,4 +14,4 @@ ...@@ -14,4 +14,4 @@
import mnist import mnist
__all__ = ['mnist'] __all__ = ['mnist', 'cifar', 'imdb', 'conll05', 'imikolov', 'movielens']
...@@ -160,7 +160,6 @@ def reader_creator(corpus_reader, ...@@ -160,7 +160,6 @@ def reader_creator(corpus_reader,
ctx_p2 = 'eos' ctx_p2 = 'eos'
word_idx = [word_dict.get(w, UNK_IDX) for w in sentence] word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
pred_idx = [predicate_dict.get(predicate)] * sen_len
ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
...@@ -168,10 +167,11 @@ def reader_creator(corpus_reader, ...@@ -168,10 +167,11 @@ def reader_creator(corpus_reader,
ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
pred_idx = [predicate_dict.get(predicate)] * sen_len
label_idx = [label_dict.get(w) for w in labels] label_idx = [label_dict.get(w) for w in labels]
yield word_idx, pred_idx, ctx_n2_idx, ctx_n1_idx, \ yield word_idx, ctx_n2_idx, ctx_n1_idx, \
ctx_0_idx, ctx_p1_idx, ctx_p2_idx, mark, label_idx ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
return reader() return reader()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册