未验证 提交 5dc346bd 编写于 作者: K kinghuin 提交者: GitHub

fix windows gbk bug (#763)

上级 fab19b7e
......@@ -36,7 +36,9 @@ class Couplet(GenerationDataset):
def __init__(self, tokenizer=None, max_seq_len=None):
dataset_dir = os.path.join(DATA_HOME, "couplet")
base_path = self._download_dataset(dataset_dir, url=_DATA_URL)
with open(os.path.join(dataset_dir, "vocab.txt")) as vocab_file:
with open(
os.path.join(dataset_dir, "vocab.txt"),
encoding="utf8") as vocab_file:
label_list = [line.strip() for line in vocab_file.readlines()]
super(Couplet, self).__init__(
base_path=base_path,
......
......@@ -184,7 +184,9 @@ class BaseDataset(object):
raise NotImplementedError
def _load_label_data(self):
with open(os.path.join(self.base_path, self.label_file), "r") as file:
with open(
os.path.join(self.base_path, self.label_file), "r",
encoding="utf8") as file:
return file.read().strip().split("\n")
def __str__(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册