diff --git a/paddlehub/dataset/couplet.py b/paddlehub/dataset/couplet.py index e7dc34aa48f8560a4c4d2de85d32622f8aa213d8..2fe8c36859d2db6a3298a086476b6fbec347fe5d 100644 --- a/paddlehub/dataset/couplet.py +++ b/paddlehub/dataset/couplet.py @@ -36,7 +36,9 @@ class Couplet(GenerationDataset): def __init__(self, tokenizer=None, max_seq_len=None): dataset_dir = os.path.join(DATA_HOME, "couplet") base_path = self._download_dataset(dataset_dir, url=_DATA_URL) - with open(os.path.join(dataset_dir, "vocab.txt")) as vocab_file: + with open( + os.path.join(dataset_dir, "vocab.txt"), + encoding="utf8") as vocab_file: label_list = [line.strip() for line in vocab_file.readlines()] super(Couplet, self).__init__( base_path=base_path, diff --git a/paddlehub/dataset/dataset.py b/paddlehub/dataset/dataset.py index 186167374972d72a445ceb201616940cdc1c362b..c3d2b0c94d76295cc5118280b1c7348da3ebf6df 100644 --- a/paddlehub/dataset/dataset.py +++ b/paddlehub/dataset/dataset.py @@ -184,7 +184,9 @@ class BaseDataset(object): raise NotImplementedError def _load_label_data(self): - with open(os.path.join(self.base_path, self.label_file), "r") as file: + with open( + os.path.join(self.base_path, self.label_file), "r", + encoding="utf8") as file: return file.read().strip().split("\n") def __str__(self):