From 06289aa2f90608bb2126571d4a3b5f8bc097f120 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Fri, 21 Sep 2018 00:31:10 +0800 Subject: [PATCH] Fix reader of rnn_search in python3 --- python/paddle/dataset/wmt14.py | 3 ++- python/paddle/dataset/wmt16.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index f8c1a3357..adc0c1aac 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size): ] for name in names: for line in f.extractfile(name): - line_split = line.strip().split(six.b('\t')) + line = cpt.to_text(line) + line_split = line.strip().split('\t') if len(line_split) != 2: continue src_seq = line_split[0] # one source sequence diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index f30dcd518..9c02e0f41 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang): word_dict = defaultdict(int) with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile("wmt16/train"): - line_split = line.strip().split(six.b("\t")) + line = cpt.to_text(line) + line_split = line.strip().split("\t") if len(line_split) != 2: continue sen = line_split[0] if lang == "en" else line_split[1] for w in sen.split(): @@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): with tarfile.open(tar_file, mode="r") as f: for line in f.extractfile(file_name): - line_split = line.strip().split(six.b("\t")) + line = cpt.to_text(line) + line_split = line.strip().split("\t") if len(line_split) != 2: continue src_words = line_split[src_col].split() -- GitLab