From 06289aa2f90608bb2126571d4a3b5f8bc097f120 Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Fri, 21 Sep 2018 00:31:10 +0800
Subject: [PATCH] Fix reader of rnn_search in python3

---
 python/paddle/dataset/wmt14.py | 3 ++-
 python/paddle/dataset/wmt16.py | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py
index f8c1a33574e..adc0c1aac80 100644
--- a/python/paddle/dataset/wmt14.py
+++ b/python/paddle/dataset/wmt14.py
@@ -89,7 +89,8 @@ def reader_creator(tar_file, file_name, dict_size):
             ]
             for name in names:
                 for line in f.extractfile(name):
-                    line_split = line.strip().split(six.b('\t'))
+                    line = cpt.to_text(line)
+                    line_split = line.strip().split('\t')
                     if len(line_split) != 2:
                         continue
                     src_seq = line_split[0]  # one source sequence
diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py
index f30dcd518ea..9c02e0f41b0 100644
--- a/python/paddle/dataset/wmt16.py
+++ b/python/paddle/dataset/wmt16.py
@@ -64,7 +64,8 @@ def __build_dict(tar_file, dict_size, save_path, lang):
     word_dict = defaultdict(int)
     with tarfile.open(tar_file, mode="r") as f:
         for line in f.extractfile("wmt16/train"):
-            line_split = line.strip().split(six.b("\t"))
+            line = cpt.to_text(line)
+            line_split = line.strip().split("\t")
             if len(line_split) != 2: continue
             sen = line_split[0] if lang == "en" else line_split[1]
             for w in sen.split():
@@ -123,7 +124,8 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
 
         with tarfile.open(tar_file, mode="r") as f:
             for line in f.extractfile(file_name):
-                line_split = line.strip().split(six.b("\t"))
+                line = cpt.to_text(line)
+                line_split = line.strip().split("\t")
                 if len(line_split) != 2:
                     continue
                 src_words = line_split[src_col].split()
-- 
GitLab