提交 1106077f 编写于 作者: T tianxin 提交者: pkpk

set encoding=utf8 for open (#3395)

上级 751b90b6
...@@ -66,7 +66,7 @@ class BaseReader(object): ...@@ -66,7 +66,7 @@ class BaseReader(object):
def _read_tsv(self, input_file, quotechar=None): def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
headers = next(reader) headers = next(reader)
Example = namedtuple('Example', headers) Example = namedtuple('Example', headers)
......
...@@ -69,7 +69,7 @@ def printable_text(text): ...@@ -69,7 +69,7 @@ def printable_text(text):
def load_vocab(vocab_file): def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary.""" """Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict() vocab = collections.OrderedDict()
fin = open(vocab_file) fin = open(vocab_file, encoding="utf8")
for num, line in enumerate(fin): for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t") items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2: if len(items) > 2:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册