diff --git a/paddlehub/dataset/couplet.py b/paddlehub/dataset/couplet.py
index e7dc34aa48f8560a4c4d2de85d32622f8aa213d8..2fe8c36859d2db6a3298a086476b6fbec347fe5d 100644
--- a/paddlehub/dataset/couplet.py
+++ b/paddlehub/dataset/couplet.py
@@ -36,7 +36,9 @@ class Couplet(GenerationDataset):
     def __init__(self, tokenizer=None, max_seq_len=None):
         dataset_dir = os.path.join(DATA_HOME, "couplet")
         base_path = self._download_dataset(dataset_dir, url=_DATA_URL)
-        with open(os.path.join(dataset_dir, "vocab.txt")) as vocab_file:
+        with open(
+                os.path.join(dataset_dir, "vocab.txt"),
+                encoding="utf8") as vocab_file:
             label_list = [line.strip() for line in vocab_file.readlines()]
         super(Couplet, self).__init__(
             base_path=base_path,
diff --git a/paddlehub/dataset/dataset.py b/paddlehub/dataset/dataset.py
index 186167374972d72a445ceb201616940cdc1c362b..c3d2b0c94d76295cc5118280b1c7348da3ebf6df 100644
--- a/paddlehub/dataset/dataset.py
+++ b/paddlehub/dataset/dataset.py
@@ -184,7 +184,9 @@ class BaseDataset(object):
         raise NotImplementedError
 
     def _load_label_data(self):
-        with open(os.path.join(self.base_path, self.label_file), "r") as file:
+        with open(
+                os.path.join(self.base_path, self.label_file), "r",
+                encoding="utf8") as file:
             return file.read().strip().split("\n")
 
     def __str__(self):