提交 06567940 编写于 作者: L lujun

fix bug and fix for python3all, test=develop

上级 1a5d3925
...@@ -52,7 +52,7 @@ def parse_args(): ...@@ -52,7 +52,7 @@ def parse_args():
def print_arguments(args): def print_arguments(args):
print('----------- Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
...@@ -61,6 +61,7 @@ def load_reverse_dict(dict_path): ...@@ -61,6 +61,7 @@ def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0]) return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines())) for idx, line in enumerate(open(dict_path, "r").readlines()))
def to_lodtensor(data, place): def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data] seq_lens = [len(seq) for seq in data]
cur_len = 0 cur_len = 0
...@@ -76,7 +77,6 @@ def to_lodtensor(data, place): ...@@ -76,7 +77,6 @@ def to_lodtensor(data, place):
return res return res
def infer(args): def infer(args):
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data( mention = fluid.layers.data(
...@@ -108,8 +108,8 @@ def infer(args): ...@@ -108,8 +108,8 @@ def infer(args):
profiler.reset_profiler() profiler.reset_profiler()
iters = 0 iters = 0
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(map(lambda x: x[1], data), place) mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
start = time.time() start = time.time()
crf_decode = exe.run(inference_program, crf_decode = exe.run(inference_program,
...@@ -122,12 +122,12 @@ def infer(args): ...@@ -122,12 +122,12 @@ def infer(args):
np_data = np.array(crf_decode[0]) np_data = np.array(crf_decode[0])
word_count = 0 word_count = 0
assert len(data) == len(lod_info) - 1 assert len(data) == len(lod_info) - 1
for sen_index in xrange(len(data)): for sen_index in range(len(data)):
assert len(data[sen_index][0]) == lod_info[ assert len(data[sen_index][0]) == lod_info[
sen_index + 1] - lod_info[sen_index] sen_index + 1] - lod_info[sen_index]
word_index = 0 word_index = 0
for tag_index in xrange(lod_info[sen_index], for tag_index in range(lod_info[sen_index],
lod_info[sen_index + 1]): lod_info[sen_index + 1]):
word = str(data[sen_index][0][word_index]) word = str(data[sen_index][0][word_index])
gold_tag = label_reverse_dict[data[sen_index][2][ gold_tag = label_reverse_dict[data[sen_index][2][
word_index]] word_index]]
......
...@@ -65,7 +65,7 @@ def parse_args(): ...@@ -65,7 +65,7 @@ def parse_args():
def print_arguments(args): def print_arguments(args):
print('----------- Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
...@@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place, ...@@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list): cur_fetch_list):
chunk_evaluator.reset() chunk_evaluator.reset()
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(map(lambda x: x[1], data), place) mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
target = to_lodtensor(map(lambda x: x[2], data), place) target = to_lodtensor(list(map(lambda x: x[2], data)), place)
result_list = exe.run( result_list = exe.run(
inference_program, inference_program,
feed={"word": word, feed={"word": word,
...@@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place, ...@@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
number_infer = np.array(result_list[0]) number_infer = np.array(result_list[0])
number_label = np.array(result_list[1]) number_label = np.array(result_list[1])
number_correct = np.array(result_list[2]) number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer[0], number_label[0], chunk_evaluator.update(number_infer[0].astype('int64'),
number_correct[0]) number_label[0].astype('int64'),
number_correct[0].astype('int64'))
return chunk_evaluator.eval() return chunk_evaluator.eval()
...@@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, ...@@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list): cur_fetch_list):
chunk_evaluator.reset() chunk_evaluator.reset()
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(map(lambda x: x[1], data), place) mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
target = to_lodtensor(map(lambda x: x[2], data), place) target = to_lodtensor(list(map(lambda x: x[2], data)), place)
result_list = test_exe.run( result_list = test_exe.run(
fetch_list=cur_fetch_list, fetch_list=cur_fetch_list,
feed={"word": word, feed={"word": word,
...@@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, ...@@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
number_infer = np.array(result_list[0]) number_infer = np.array(result_list[0])
number_label = np.array(result_list[1]) number_label = np.array(result_list[1])
number_correct = np.array(result_list[2]) number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer.sum(), chunk_evaluator.update(number_infer.sum().astype('int64'),
number_label.sum(), number_correct.sum()) number_label.sum().astype('int64'),
number_correct.sum().astype('int64'))
return chunk_evaluator.eval() return chunk_evaluator.eval()
...@@ -270,11 +272,6 @@ def main(args): ...@@ -270,11 +272,6 @@ def main(args):
crf_decode = fluid.layers.crf_decoding( crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
inference_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
(precision, recall, f1_score, num_infer_chunks, num_label_chunks, (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval( num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode, input=crf_decode,
...@@ -282,6 +279,11 @@ def main(args): ...@@ -282,6 +279,11 @@ def main(args):
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0))) num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0)))
inference_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
chunk_evaluator = fluid.metrics.ChunkEvaluator() chunk_evaluator = fluid.metrics.ChunkEvaluator()
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -312,7 +314,7 @@ def main(args): ...@@ -312,7 +314,7 @@ def main(args):
test_exe = exe test_exe = exe
batch_id = 0 batch_id = 0
for pass_id in xrange(args.num_passes): for pass_id in range(args.num_passes):
chunk_evaluator.reset() chunk_evaluator.reset()
train_reader_iter = train_reader() train_reader_iter = train_reader()
start_time = time.time() start_time = time.time()
...@@ -326,9 +328,9 @@ def main(args): ...@@ -326,9 +328,9 @@ def main(args):
], ],
feed=feeder.feed(cur_batch)) feed=feeder.feed(cur_batch))
chunk_evaluator.update( chunk_evaluator.update(
np.array(nums_infer).sum(), np.array(nums_infer).sum().astype("int64"),
np.array(nums_label).sum(), np.array(nums_label).sum().astype("int64"),
np.array(nums_correct).sum()) np.array(nums_correct).sum().astype("int64"))
cost_list = np.array(cost) cost_list = np.array(cost)
batch_id += 1 batch_id += 1
except StopIteration: except StopIteration:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册