diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..33b966cca579f6aa6fd62354d00efa5faf21b80c --- /dev/null +++ b/demo/semantic_role_labeling/api_train_v2.py @@ -0,0 +1,112 @@ +import numpy +import paddle.v2 as paddle +from paddle.trainer_config_helpers.atts import ParamAttr + +from mode_v2 import db_lstm + +word_dict_file = './data/wordDict.txt' +label_dict_file = './data/targetDict.txt' +predicate_file = './data/verbDict.txt' + +word_dict = dict() +label_dict = dict() +predicate_dict = dict() + +with open(word_dict_file, 'r') as f_word, \ + open(label_dict_file, 'r') as f_label, \ + open(predicate_file, 'r') as f_pre: + for i, line in enumerate(f_word): + w = line.strip() + word_dict[w] = i + + for i, line in enumerate(f_label): + w = line.strip() + label_dict[w] = i + + for i, line in enumerate(f_pre): + w = line.strip() + predicate_dict[w] = i + +word_dict_len = len(word_dict) +label_dict_len = len(label_dict) +pred_len = len(predicate_dict) + + +def train_reader(file_name="data/feature"): + def reader(): + with open(file_name, 'r') as fdata: + for line in fdata: + sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \ + line.strip().split('\t') + + words = sentence.split() + sen_len = len(words) + word_slot = [word_dict.get(w, UNK_IDX) for w in words] + + predicate_slot = [predicate_dict.get(predicate)] * sen_len + ctx_n2_slot = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len + ctx_n1_slot = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len + ctx_0_slot = [word_dict.get(ctx_0, UNK_IDX)] * sen_len + ctx_p1_slot = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len + ctx_p2_slot = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len + + marks = mark.split() + mark_slot = [int(w) for w in marks] + + label_list = label.split() + label_slot = [label_dict.get(w) for w in label_list] + yield word_slot, ctx_n2_slot, ctx_n1_slot, \ + ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot + + return reader + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + label_dict_len = 500 + # define network topology + output = db_lstm() + target = paddle.layer.data(name='target', size=label_dict_len) + crf_cost = paddle.layer.crf_layer( + size=500, + input=output, + label=target, + param_attr=paddle.attr.Param( + name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr)) + + crf_dec = paddle.layer.crf_decoding_layer( + name='crf_dec_l', + size=label_dict_len, + input=output, + label=target, + param_attr=paddle.attr.Param(name='crfw')) + + topo = [crf_cost, crf_dec] + parameters = paddle.parameters.create(topo) + optimizer = paddle.optimizer.Momentum(momentum=0.01, learning_rate=2e-2) + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + para = parameters.get('___fc_2__.w0') + print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, + event.cost, para.mean()) + + else: + pass + + trainer = paddle.trainer.SGD(update_equation=optimizer) + + trainer.train( + train_data_reader=train_reader, + batch_size=32, + topology=topo, + parameters=parameters, + event_handler=event_handler, + num_passes=10000, + data_types=[], + reader_dict={}) + + +if __name__ == '__main__': + main() diff --git a/demo/semantic_role_labeling/model_v2.py b/demo/semantic_role_labeling/model_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..d4d011770dc2de11d5e599d769c45262b13ffcda --- /dev/null +++ b/demo/semantic_role_labeling/model_v2.py @@ -0,0 +1,103 @@ +import paddle.v2 as paddle + + +def db_lstm(word_dict_len, label_dict_len, pred_len): + mark_dict_len = 2 + word_dim = 32 + mark_dim = 5 + hidden_dim = 512 + depth = 8 + + #8 features + word = paddle.layer.data(name='word_data', size=word_dict_len) + predicate = paddle.layer.data(name='verb_data', size=pred_len) + + ctx_n2 = paddle.layer.data(name='ctx_n2_data', size=word_dict_len) + ctx_n1 = paddle.layer.data(name='ctx_n1_data', size=word_dict_len) + ctx_0 = paddle.layer.data(name='ctx_0_data', size=word_dict_len) + ctx_p1 = paddle.layer.data(name='ctx_p1_data', size=word_dict_len) + ctx_p2 = paddle.layer.data(name='ctx_p2_data', size=word_dict_len) + mark = paddle.layer.data(name='mark_data', size=mark_dict_len) + + default_std = 1 / math.sqrt(hidden_dim) / 3.0 + + emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.) + std_0 = paddle.attr.Param(initial_std=0.) + std_default = paddle.attr.Param(initial_std=default_std) + + predicate_embedding = paddle.layer.embeding( + size=word_dim, + input=predicate, + param_attr=paddle.attr.Param( + name='vemb', initial_std=default_std)) + mark_embedding = paddle.layer.embeding( + name='word_ctx-in_embedding', + size=mark_dim, + input=mark, + param_attr=std_0) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + paddle.layer.embeding( + size=word_dim, input=x, param_attr=emb_para) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0 = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=emb, param_attr=std_default) for emb in emb_layers + ]) + + mix_hidden_lr = 1e-3 + lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) + hidden_para_attr = paddle.attr.Param( + initial_std=default_std, learning_rate=mix_hidden_lr) + + lstm_0 = paddle.layer.lstmemory( + input=hidden_0, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + bias_attr=std_0, + param_attr=lstm_para_attr) + + #stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ]) + + lstm = paddle.layer.lstmemory( + input=mix_hidden, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + reverse=((i % 2) == 1), + bias_attr=std_0, + param_attr=lstm_para_attr) + + input_tmp = [mix_hidden, lstm] + + feature_out = paddle.layer.mixed( + size=label_dict_len, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ], ) + + return feature_out