未验证 提交 01fda934 编写于 作者: Q Qiyang Min 提交者: GitHub

Merge pull request #13523 from velconia/fix_rnn_search

Fix reader of rnn_search in python3
......@@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size):
]
for name in names:
for line in f.extractfile(name):
line_split = line.strip().split(six.b('\t'))
line = cpt.to_text(line)
line_split = line.strip().split('\t')
if len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
......
......@@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang):
word_dict = defaultdict(int)
with tarfile.open(tar_file, mode="r") as f:
for line in f.extractfile("wmt16/train"):
line_split = line.strip().split(six.b("\t"))
line = cpt.to_text(line)
line_split = line.strip().split("\t")
if len(line_split) != 2: continue
sen = line_split[0] if lang == "en" else line_split[1]
for w in sen.split():
......@@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
with tarfile.open(tar_file, mode="r") as f:
for line in f.extractfile(file_name):
line_split = line.strip().split(six.b("\t"))
line = cpt.to_text(line)
line_split = line.strip().split("\t")
if len(line_split) != 2:
continue
src_words = line_split[src_col].split()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册