diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py index cfbd2a022426bbb71d167c14a7e053c097e5eab8..8ce6faaa1b75e030a181b29ff20a039c7630c390 100644 --- a/demo/semantic_role_labeling/api_train_v2.py +++ b/demo/semantic_role_labeling/api_train_v2.py @@ -1,69 +1,142 @@ +import sys +import math import numpy as np import paddle.v2 as paddle -from model_v2 import db_lstm +import paddle.v2.dataset.conll05 as conll05 UNK_IDX = 0 -word_dict_file = './data/wordDict.txt' -label_dict_file = './data/targetDict.txt' -predicate_file = './data/verbDict.txt' -word_dict = dict() -label_dict = dict() -predicate_dict = dict() - -with open(word_dict_file, 'r') as f_word, \ - open(label_dict_file, 'r') as f_label, \ - open(predicate_file, 'r') as f_pre: - for i, line in enumerate(f_word): - w = line.strip() - word_dict[w] = i - - for i, line in enumerate(f_label): - w = line.strip() - label_dict[w] = i - - for i, line in enumerate(f_pre): - w = line.strip() - predicate_dict[w] = i - -word_dict_len = len(word_dict) -label_dict_len = len(label_dict) -pred_len = len(predicate_dict) - - -def train_reader(file_name="data/feature"): - def reader(): - with open(file_name, 'r') as fdata: - for line in fdata: - sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \ - line.strip().split('\t') - - words = sentence.split() - sen_len = len(words) - word_slot = [word_dict.get(w, UNK_IDX) for w in words] - - predicate_slot = [predicate_dict.get(predicate)] * sen_len - ctx_n2_slot = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_slot = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_slot = [word_dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_slot = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_slot = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len - - marks = mark.split() - mark_slot = [int(w) for w in marks] - - label_list = label.split() - label_slot = [label_dict.get(w) for w in label_list] - yield word_slot, ctx_n2_slot, ctx_n1_slot, \ - ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot - - return reader +def db_lstm(): + word_dict, verb_dict, label_dict = conll05.get_dict() + word_dict_len = len(word_dict) + label_dict_len = len(label_dict) + pred_len = len(verb_dict) + print 'word_dict_len,', word_dict_len + print 'label_dict_len,', label_dict_len + print 'pred_len,', pred_len + + mark_dict_len = 2 + word_dim = 32 + mark_dim = 5 + hidden_dim = 512 + depth = 8 + + #8 features + def d_type(size): + return paddle.data_type.integer_value_sequence(size) + + word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) + predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) + + ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len)) + ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len)) + ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len)) + ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len)) + ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) + mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) + + target = paddle.layer.data(name='target', type=d_type(label_dict_len)) + + default_std = 1 / math.sqrt(hidden_dim) / 3.0 + + emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.) + std_0 = paddle.attr.Param(initial_std=0.) + std_default = paddle.attr.Param(initial_std=default_std) + + predicate_embedding = paddle.layer.embedding( + size=word_dim, + input=predicate, + param_attr=paddle.attr.Param( + name='vemb', initial_std=default_std)) + mark_embedding = paddle.layer.embedding( + size=mark_dim, input=mark, param_attr=std_0) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + paddle.layer.embedding( + size=word_dim, input=x, param_attr=emb_para) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0 = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=emb, param_attr=std_default) for emb in emb_layers + ]) + + mix_hidden_lr = 1e-3 + lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) + hidden_para_attr = paddle.attr.Param( + initial_std=default_std, learning_rate=mix_hidden_lr) + + lstm_0 = paddle.layer.lstmemory( + input=hidden_0, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + bias_attr=std_0, + param_attr=lstm_para_attr) + + #stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ]) + + lstm = paddle.layer.lstmemory( + input=mix_hidden, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + reverse=((i % 2) == 1), + bias_attr=std_0, + param_attr=lstm_para_attr) + + input_tmp = [mix_hidden, lstm] + + feature_out = paddle.layer.mixed( + size=label_dict_len, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ], ) + + crf_cost = paddle.layer.crf(size=label_dict_len, + input=feature_out, + label=target, + param_attr=paddle.attr.Param( + name='crfw', + initial_std=default_std, + learning_rate=mix_hidden_lr)) + + crf_dec = paddle.layer.crf_decoding( + name='crf_dec_l', + size=label_dict_len, + input=feature_out, + label=target, + param_attr=paddle.attr.Param(name='crfw')) + + return crf_cost, crf_dec def load_parameter(file_name, h, w): with open(file_name, 'rb') as f: - f.read(16) # skip header for float type. + f.read(16) # skip header. return np.fromfile(f, dtype=np.float32).reshape(h, w) @@ -71,44 +144,36 @@ def main(): paddle.init(use_gpu=False, trainer_count=1) # define network topology - crf_cost, crf_dec = db_lstm(word_dict_len, label_dict_len, pred_len) + crf_cost, crf_dec = db_lstm() + # create parameters parameters = paddle.parameters.create([crf_cost, crf_dec]) - optimizer = paddle.optimizer.Momentum(momentum=0.01, learning_rate=2e-2) + + # create optimizer + optimizer = paddle.optimizer.Momentum( + momentum=0, + learning_rate=2e-2, + regularization=paddle.optimizer.L2Regularization(rate=8e-4), + model_average=paddle.optimizer.ModelAverage( + average_window=0.5, max_average_window=10000), ) def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: print "Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) - else: - pass trainer = paddle.trainer.SGD(cost=crf_cost, parameters=parameters, update_equation=optimizer) + parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) - parameters.set('emb', load_parameter("data/emb", 44068, 32)) - - reader_dict = { - 'word_data': 0, - 'ctx_n2_data': 1, - 'ctx_n1_data': 2, - 'ctx_0_data': 3, - 'ctx_p1_data': 4, - 'ctx_p2_data': 5, - 'verb_data': 6, - 'mark_data': 7, - 'target': 8, - } trn_reader = paddle.reader.batched( paddle.reader.shuffle( - train_reader(), buf_size=8192), batch_size=10) + conll05.test, buf_size=8192), batch_size=10) + trainer.train( - reader=trn_reader, - event_handler=event_handler, - num_passes=10000, - reader_dict=reader_dict) + reader=trn_reader, event_handler=event_handler, num_passes=10000) if __name__ == '__main__': diff --git a/demo/semantic_role_labeling/model_v2.py b/demo/semantic_role_labeling/model_v2.py deleted file mode 100644 index cec58e52c79408f831d599eae6195e1ddf3d9b9e..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/model_v2.py +++ /dev/null @@ -1,121 +0,0 @@ -import math -import paddle.v2 as paddle - - -def db_lstm(word_dict_len, label_dict_len, pred_len): - mark_dict_len = 2 - word_dim = 32 - mark_dim = 5 - hidden_dim = 512 - depth = 8 - - #8 features - def d_type(size): - return paddle.data_type.integer_value_sequence(size) - - word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) - predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) - - ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len)) - ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len)) - ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len)) - ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len)) - ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) - mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) - - target = paddle.layer.data(name='target', type=d_type(label_dict_len)) - - default_std = 1 / math.sqrt(hidden_dim) / 3.0 - - emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.) - std_0 = paddle.attr.Param(initial_std=0.) - std_default = paddle.attr.Param(initial_std=default_std) - - predicate_embedding = paddle.layer.embedding( - size=word_dim, - input=predicate, - param_attr=paddle.attr.Param( - name='vemb', initial_std=default_std)) - mark_embedding = paddle.layer.embedding( - size=mark_dim, input=mark, param_attr=std_0) - - word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] - emb_layers = [ - paddle.layer.embedding( - size=word_dim, input=x, param_attr=emb_para) for x in word_input - ] - emb_layers.append(predicate_embedding) - emb_layers.append(mark_embedding) - - hidden_0 = paddle.layer.mixed( - size=hidden_dim, - bias_attr=std_default, - input=[ - paddle.layer.full_matrix_projection( - input=emb, param_attr=std_default) for emb in emb_layers - ]) - - mix_hidden_lr = 1e-3 - lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) - hidden_para_attr = paddle.attr.Param( - initial_std=default_std, learning_rate=mix_hidden_lr) - - lstm_0 = paddle.layer.lstmemory( - input=hidden_0, - act=paddle.activation.Relu(), - gate_act=paddle.activation.Sigmoid(), - state_act=paddle.activation.Sigmoid(), - bias_attr=std_0, - param_attr=lstm_para_attr) - - #stack L-LSTM and R-LSTM with direct edges - input_tmp = [hidden_0, lstm_0] - - for i in range(1, depth): - mix_hidden = paddle.layer.mixed( - size=hidden_dim, - bias_attr=std_default, - input=[ - paddle.layer.full_matrix_projection( - input=input_tmp[0], param_attr=hidden_para_attr), - paddle.layer.full_matrix_projection( - input=input_tmp[1], param_attr=lstm_para_attr) - ]) - - lstm = paddle.layer.lstmemory( - input=mix_hidden, - act=paddle.activation.Relu(), - gate_act=paddle.activation.Sigmoid(), - state_act=paddle.activation.Sigmoid(), - reverse=((i % 2) == 1), - bias_attr=std_0, - param_attr=lstm_para_attr) - - input_tmp = [mix_hidden, lstm] - - feature_out = paddle.layer.mixed( - size=label_dict_len, - bias_attr=std_default, - input=[ - paddle.layer.full_matrix_projection( - input=input_tmp[0], param_attr=hidden_para_attr), - paddle.layer.full_matrix_projection( - input=input_tmp[1], param_attr=lstm_para_attr) - ], ) - - crf_cost = paddle.layer.crf(size=label_dict_len, - input=feature_out, - label=target, - param_attr=paddle.attr.Param( - name='crfw', - initial_std=default_std, - learning_rate=mix_hidden_lr)) - - crf_dec = paddle.layer.crf_decoding( - name='crf_dec_l', - size=label_dict_len, - input=feature_out, - label=target, - param_attr=paddle.attr.Param(name='crfw')) - - return crf_cost, crf_dec diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 15460b820d86d27e0f538e575fff4d4ca6b46e32..90803628e3e0c52fbe5846c76ca065b8ab18c147 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -14,4 +14,4 @@ import mnist -__all__ = ['mnist'] +__all__ = ['mnist', 'cifar', 'imdb', 'conll05', 'imikolov', 'movielens'] diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 7874161a05968921a8e6789a7ff6a229d6cc6c01..52f19d2115f545213962199914f7e40d13dbe92b 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -160,7 +160,6 @@ def reader_creator(corpus_reader, ctx_p2 = 'eos' word_idx = [word_dict.get(w, UNK_IDX) for w in sentence] - pred_idx = [predicate_dict.get(predicate)] * sen_len ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len @@ -168,10 +167,11 @@ def reader_creator(corpus_reader, ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len + pred_idx = [predicate_dict.get(predicate)] * sen_len label_idx = [label_dict.get(w) for w in labels] - yield word_idx, pred_idx, ctx_n2_idx, ctx_n1_idx, \ - ctx_0_idx, ctx_p1_idx, ctx_p2_idx, mark, label_idx + yield word_idx, ctx_n2_idx, ctx_n1_idx, \ + ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx return reader()