diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py new file mode 100644 index 0000000000000000000000000000000000000000..5fa5e0e5f34e6904e0e66d3ab4149cdfcffeb244 --- /dev/null +++ b/python/paddle/v2/fluid/tests/book_distribute/test_dist_label_semantic_roles.py @@ -0,0 +1,225 @@ +import math + +import numpy as np +import paddle.v2 as paddle +import paddle.v2.dataset.conll05 as conll05 +import paddle.v2.fluid as fluid +import time +import os + +word_dict, verb_dict, label_dict = conll05.get_dict() +word_dict_len = len(word_dict) +label_dict_len = len(label_dict) +pred_len = len(verb_dict) + +mark_dict_len = 2 +word_dim = 32 +mark_dim = 5 +hidden_dim = 512 +depth = 8 +mix_hidden_lr = 1e-3 + +IS_SPARSE = True +PASS_NUM = 10 +BATCH_SIZE = 20 + +embedding_name = 'emb' + + +def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + +def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, + **ignored): + # 8 features + predicate_embedding = fluid.layers.embedding( + input=predicate, + size=[pred_len, word_dim], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='vemb') + + mark_embedding = fluid.layers.embedding( + input=mark, + size=[mark_dict_len, mark_dim], + dtype='float32', + is_sparse=IS_SPARSE) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + fluid.layers.embedding( + size=[word_dict_len, word_dim], + input=x, + param_attr=fluid.ParamAttr( + name=embedding_name, trainable=False)) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0_layers = [ + fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers + ] + + hidden_0 = fluid.layers.sums(input=hidden_0_layers) + + lstm_0 = fluid.layers.dynamic_lstm( + input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') + + # stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=hidden_dim), + fluid.layers.fc(input=input_tmp[1], size=hidden_dim) + ]) + + lstm = fluid.layers.dynamic_lstm( + input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) + + input_tmp = [mix_hidden, lstm] + + feature_out = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=label_dict_len), + fluid.layers.fc(input=input_tmp[1], size=label_dict_len) + ]) + + return feature_out + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def main(): + # define network topology + word = fluid.layers.data( + name='word_data', shape=[1], dtype='int64', lod_level=1) + predicate = fluid.layers.data( + name='verb_data', shape=[1], dtype='int64', lod_level=1) + ctx_n2 = fluid.layers.data( + name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) + ctx_n1 = fluid.layers.data( + name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) + ctx_0 = fluid.layers.data( + name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) + ctx_p1 = fluid.layers.data( + name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) + ctx_p2 = fluid.layers.data( + name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) + mark = fluid.layers.data( + name='mark_data', shape=[1], dtype='int64', lod_level=1) + feature_out = db_lstm(**locals()) + target = fluid.layers.data( + name='target', shape=[1], dtype='int64', lod_level=1) + crf_cost = fluid.layers.linear_chain_crf( + input=feature_out, + label=target, + param_attr=fluid.ParamAttr( + name='crfw', learning_rate=mix_hidden_lr)) + avg_cost = fluid.layers.mean(x=crf_cost) + + # TODO(qiao) + # check other optimizers and check why out will be NAN + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001) + optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) + + # TODO(qiao) + # add dependency track and move this config before optimizer + crf_decode = fluid.layers.crf_decoding( + input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) + + chunk_evaluator = fluid.evaluator.ChunkEvaluator( + input=crf_decode, + label=target, + chunk_scheme="IOB", + num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) + place = fluid.CPUPlace() + feeder = fluid.DataFeeder( + feed_list=[ + word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target + ], + place=place) + exe = fluid.Executor(place) + + t = fluid.DistributeTranspiler() + pserver_endpoints = os.getenv("PSERVERS") + # server endpoint for current node + current_endpoint = os.getenv("SERVER_ENDPOINT") + # run as trainer or parameter server + training_role = os.getenv( + "TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver + t.transpile( + optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2) + + if training_role == "PSERVER": + if not current_endpoint: + print("need env SERVER_ENDPOINT") + exit(1) + pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops) + exe.run(fluid.default_startup_program()) + exe.run(pserver_prog) + elif training_role == "TRAINER": + trainer_prog = t.get_trainer_program() + start_time = time.time() + batch_id = 0 + exe.run(fluid.default_startup_program()) + embedding_param = fluid.global_scope().find_var( + embedding_name).get_tensor() + embedding_param.set( + load_parameter(conll05.get_embedding(), word_dict_len, word_dim), + place) + for pass_id in xrange(PASS_NUM): + chunk_evaluator.reset(exe) + for data in train_data(): + cost, precision, recall, f1_score = exe.run( + trainer_prog, + feed=feeder.feed(data), + fetch_list=[avg_cost] + chunk_evaluator.metrics) + pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval( + exe) + + if batch_id % 10 == 0: + print("avg_cost:" + str(cost) + " precision:" + str( + precision) + " recall:" + str(recall) + " f1_score:" + + str(f1_score) + " pass_precision:" + str( + pass_precision) + " pass_recall:" + str( + pass_recall) + " pass_f1_score:" + str( + pass_f1_score)) + if batch_id != 0: + print("second per batch: " + str((time.time( + ) - start_time) / batch_id)) + + batch_id = batch_id + 1 + + +if __name__ == '__main__': + main()