diff --git a/fluid/sequence_tagging_for_ner/infer.py b/fluid/sequence_tagging_for_ner/infer.py index 2d0bd9496ed2ec1db019a0124905093e0b12531a..acf98d0f15f7f493654822751fb2619de20e5505 100644 --- a/fluid/sequence_tagging_for_ner/infer.py +++ b/fluid/sequence_tagging_for_ner/infer.py @@ -1,7 +1,10 @@ +from __future__ import print_function + import numpy as np +import six +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle from network_conf import ner_net import reader @@ -33,9 +36,9 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file, [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) for data in test_data(): - word = to_lodtensor(map(lambda x: x[0], data), place) - mark = to_lodtensor(map(lambda x: x[1], data), place) - target = to_lodtensor(map(lambda x: x[2], data), place) + word = to_lodtensor([x[0] for x in data], place) + mark = to_lodtensor([x[1] for x in data], place) + target = to_lodtensor([x[2] for x in data], place) crf_decode = exe.run( inference_program, feed={"word": word, @@ -46,19 +49,19 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file, lod_info = (crf_decode[0].lod())[0] np_data = np.array(crf_decode[0]) assert len(data) == len(lod_info) - 1 - for sen_index in xrange(len(data)): + for sen_index in six.moves.xrange(len(data)): assert len(data[sen_index][0]) == lod_info[ sen_index + 1] - lod_info[sen_index] word_index = 0 - for tag_index in xrange(lod_info[sen_index], + for tag_index in six.moves.xrange(lod_info[sen_index], lod_info[sen_index + 1]): word = word_reverse_dict[data[sen_index][0][word_index]] gold_tag = label_reverse_dict[data[sen_index][2][ word_index]] tag = label_reverse_dict[np_data[tag_index][0]] - print word + "\t" + gold_tag + "\t" + tag + print(word + "\t" + gold_tag + "\t" + tag) word_index += 1 - print "" + print("") if __name__ == "__main__": diff --git a/fluid/sequence_tagging_for_ner/train.py b/fluid/sequence_tagging_for_ner/train.py index 7a6e992df459157d0a74839d1e3fbef7213a4b14..7ad5f28546c1971aa247f14079af8fa44d5a02ce 100644 --- a/fluid/sequence_tagging_for_ner/train.py +++ b/fluid/sequence_tagging_for_ner/train.py @@ -1,7 +1,10 @@ +from __future__ import print_function + import os import math import time import numpy as np +import six import paddle import paddle.fluid as fluid @@ -15,9 +18,9 @@ from utils_extend import to_lodtensor, get_embedding def test(exe, chunk_evaluator, inference_program, test_data, place): chunk_evaluator.reset(exe) for data in test_data(): - word = to_lodtensor(map(lambda x: x[0], data), place) - mark = to_lodtensor(map(lambda x: x[1], data), place) - target = to_lodtensor(map(lambda x: x[2], data), place) + word = to_lodtensor([x[0] for x in data], place) + mark = to_lodtensor([x[1] for x in data], place) + target = to_lodtensor([x[2] for x in data], place) acc = exe.run(inference_program, feed={"word": word, "mark": mark, @@ -97,7 +100,7 @@ def main(train_data_file, embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() embedding_param.set(word_vector_values, place) - for pass_id in xrange(num_passes): + for pass_id in six.moves.xrange(num_passes): chunk_evaluator.reset(exe) for batch_id, data in enumerate(train_reader()): cost, batch_precision, batch_recall, batch_f1_score = exe.run( @@ -142,6 +145,5 @@ if __name__ == "__main__": emb_file="data/wordVectors.txt", model_save_dir="models", num_passes=1000, - batch_size=1, use_gpu=False, parallel=False)