提交 343a63a8 编写于 作者: M minqiyang

Port sequence_tagging_for_ner to Python3

上级 84e4cbba
from __future__ import print_function
import numpy as np import numpy as np
import six
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle
from network_conf import ner_net from network_conf import ner_net
import reader import reader
...@@ -33,9 +36,9 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file, ...@@ -33,9 +36,9 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file,
[inference_program, feed_target_names, [inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe) fetch_targets] = fluid.io.load_inference_model(model_path, exe)
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor([x[0] for x in data], place)
mark = to_lodtensor(map(lambda x: x[1], data), place) mark = to_lodtensor([x[1] for x in data], place)
target = to_lodtensor(map(lambda x: x[2], data), place) target = to_lodtensor([x[2] for x in data], place)
crf_decode = exe.run( crf_decode = exe.run(
inference_program, inference_program,
feed={"word": word, feed={"word": word,
...@@ -46,19 +49,19 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file, ...@@ -46,19 +49,19 @@ def infer(model_path, batch_size, test_data_file, vocab_file, target_file,
lod_info = (crf_decode[0].lod())[0] lod_info = (crf_decode[0].lod())[0]
np_data = np.array(crf_decode[0]) np_data = np.array(crf_decode[0])
assert len(data) == len(lod_info) - 1 assert len(data) == len(lod_info) - 1
for sen_index in xrange(len(data)): for sen_index in six.moves.xrange(len(data)):
assert len(data[sen_index][0]) == lod_info[ assert len(data[sen_index][0]) == lod_info[
sen_index + 1] - lod_info[sen_index] sen_index + 1] - lod_info[sen_index]
word_index = 0 word_index = 0
for tag_index in xrange(lod_info[sen_index], for tag_index in six.moves.xrange(lod_info[sen_index],
lod_info[sen_index + 1]): lod_info[sen_index + 1]):
word = word_reverse_dict[data[sen_index][0][word_index]] word = word_reverse_dict[data[sen_index][0][word_index]]
gold_tag = label_reverse_dict[data[sen_index][2][ gold_tag = label_reverse_dict[data[sen_index][2][
word_index]] word_index]]
tag = label_reverse_dict[np_data[tag_index][0]] tag = label_reverse_dict[np_data[tag_index][0]]
print word + "\t" + gold_tag + "\t" + tag print(word + "\t" + gold_tag + "\t" + tag)
word_index += 1 word_index += 1
print "" print("")
if __name__ == "__main__": if __name__ == "__main__":
......
from __future__ import print_function
import os import os
import math import math
import time import time
import numpy as np import numpy as np
import six
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -15,9 +18,9 @@ from utils_extend import to_lodtensor, get_embedding ...@@ -15,9 +18,9 @@ from utils_extend import to_lodtensor, get_embedding
def test(exe, chunk_evaluator, inference_program, test_data, place): def test(exe, chunk_evaluator, inference_program, test_data, place):
chunk_evaluator.reset(exe) chunk_evaluator.reset(exe)
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor([x[0] for x in data], place)
mark = to_lodtensor(map(lambda x: x[1], data), place) mark = to_lodtensor([x[1] for x in data], place)
target = to_lodtensor(map(lambda x: x[2], data), place) target = to_lodtensor([x[2] for x in data], place)
acc = exe.run(inference_program, acc = exe.run(inference_program,
feed={"word": word, feed={"word": word,
"mark": mark, "mark": mark,
...@@ -97,7 +100,7 @@ def main(train_data_file, ...@@ -97,7 +100,7 @@ def main(train_data_file,
embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor()
embedding_param.set(word_vector_values, place) embedding_param.set(word_vector_values, place)
for pass_id in xrange(num_passes): for pass_id in six.moves.xrange(num_passes):
chunk_evaluator.reset(exe) chunk_evaluator.reset(exe)
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
cost, batch_precision, batch_recall, batch_f1_score = exe.run( cost, batch_precision, batch_recall, batch_f1_score = exe.run(
...@@ -142,6 +145,5 @@ if __name__ == "__main__": ...@@ -142,6 +145,5 @@ if __name__ == "__main__":
emb_file="data/wordVectors.txt", emb_file="data/wordVectors.txt",
model_save_dir="models", model_save_dir="models",
num_passes=1000, num_passes=1000,
batch_size=1,
use_gpu=False, use_gpu=False,
parallel=False) parallel=False)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册