From 5dc346bd729f6ec49579dc9b7a0ed5d22bf4fdc5 Mon Sep 17 00:00:00 2001 From: kinghuin Date: Tue, 21 Jul 2020 10:22:00 +0800 Subject: [PATCH] fix windows gbk bug (#763) --- paddlehub/dataset/couplet.py | 4 +++- paddlehub/dataset/dataset.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/paddlehub/dataset/couplet.py b/paddlehub/dataset/couplet.py index e7dc34aa..2fe8c368 100644 --- a/paddlehub/dataset/couplet.py +++ b/paddlehub/dataset/couplet.py @@ -36,7 +36,9 @@ class Couplet(GenerationDataset): def __init__(self, tokenizer=None, max_seq_len=None): dataset_dir = os.path.join(DATA_HOME, "couplet") base_path = self._download_dataset(dataset_dir, url=_DATA_URL) - with open(os.path.join(dataset_dir, "vocab.txt")) as vocab_file: + with open( + os.path.join(dataset_dir, "vocab.txt"), + encoding="utf8") as vocab_file: label_list = [line.strip() for line in vocab_file.readlines()] super(Couplet, self).__init__( base_path=base_path, diff --git a/paddlehub/dataset/dataset.py b/paddlehub/dataset/dataset.py index 18616737..c3d2b0c9 100644 --- a/paddlehub/dataset/dataset.py +++ b/paddlehub/dataset/dataset.py @@ -184,7 +184,9 @@ class BaseDataset(object): raise NotImplementedError def _load_label_data(self): - with open(os.path.join(self.base_path, self.label_file), "r") as file: + with open( + os.path.join(self.base_path, self.label_file), "r", + encoding="utf8") as file: return file.read().strip().split("\n") def __str__(self): -- GitLab