diff --git a/demo/text-classification/run_classifier.sh b/demo/text-classification/run_classifier.sh
index d2d0046b390c7d4bb9a91c7d3f66fbd5f93954da..0b53a268c31929fd810138bf0fa36ab41469fc60 100644
--- a/demo/text-classification/run_classifier.sh
+++ b/demo/text-classification/run_classifier.sh
@@ -1,4 +1,4 @@
-export CUDA_VISIBLE_DEVICES=3
+export CUDA_VISIBLE_DEVICES=0
 
 # User can select chnsenticorp, nlpcc_dbqa, lcqmc for different task
 DATASET="chnsenticorp"
diff --git a/paddlehub/__init__.py b/paddlehub/__init__.py
index dc4593ac57cf9c3e8d586fbd161a4b0befa99110..a6bce51d665a0cd0a851e6e1acf888a27bc587e3 100644
--- a/paddlehub/__init__.py
+++ b/paddlehub/__init__.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+#coding:utf-8
+import six
+
 from . import module
 from . import common
 from . import io
@@ -41,3 +45,8 @@ from .finetune.finetune import finetune_and_eval
 from .finetune.config import RunConfig
 from .finetune.strategy import AdamWeightDecayStrategy
 from .finetune.strategy import DefaultStrategy
+
+if six.PY2:
+    import sys
+    reload(sys)
+    sys.setdefaultencoding("UTF-8")
\ No newline at end of file
diff --git a/paddlehub/dataset/chnsenticorp.py b/paddlehub/dataset/chnsenticorp.py
index 58b0283e8e8c0cccad3dfb4f0516b94605aea2f4..b59a7a393372c1587c7cf555e56217a9941458a1 100644
--- a/paddlehub/dataset/chnsenticorp.py
+++ b/paddlehub/dataset/chnsenticorp.py
@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import print_function
 
 from collections import namedtuple
+import codecs
 import os
 import csv
 
@@ -79,7 +80,7 @@ class ChnSentiCorp(HubDataset):
 
     def _read_tsv(self, input_file, quotechar=None):
         """Reads a tab separated value file."""
-        with open(input_file, "r") as f:
+        with codecs.open(input_file, "r", encoding="UTF-8") as f:
             reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
             examples = []
             seq_id = 0
diff --git a/paddlehub/dataset/lcqmc.py b/paddlehub/dataset/lcqmc.py
index fb16bbff2351f761e559c9e452cb384487fbb510..550128755e3460f00769aa7aaf2c617e2ab6b48e 100644
--- a/paddlehub/dataset/lcqmc.py
+++ b/paddlehub/dataset/lcqmc.py
@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import print_function
 
 from collections import namedtuple
+import codecs
 import os
 import csv
 
@@ -75,7 +76,7 @@ class LCQMC(HubDataset):
 
     def _read_tsv(self, input_file, quotechar=None):
         """Reads a tab separated value file."""
-        with open(input_file, "r") as f:
+        with codecs.open(input_file, "r", encoding="UTF-8") as f:
             reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
             examples = []
             seq_id = 0
diff --git a/paddlehub/dataset/msra_ner.py b/paddlehub/dataset/msra_ner.py
index afdbebe91e09f99c4da3edf884ed889f69b22d53..81c31a960d0bf71c0dad5d1d4a2087c427a78931 100644
--- a/paddlehub/dataset/msra_ner.py
+++ b/paddlehub/dataset/msra_ner.py
@@ -17,8 +17,10 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import codecs
 import csv
 import json
+import six
 from collections import namedtuple
 
 from paddlehub.dataset import InputExample, HubDataset
@@ -85,7 +87,7 @@ class MSRA_NER(HubDataset):
 
     def _read_tsv(self, input_file, quotechar=None):
         """Reads a tab separated value file."""
-        with open(input_file, "r") as f:
+        with codecs.open(input_file, "r", encoding="UTF-8") as f:
             reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
             examples = []
             seq_id = 0
diff --git a/paddlehub/dataset/nlpcc_dbqa.py b/paddlehub/dataset/nlpcc_dbqa.py
index b6200c2e5245b56c9615f014cb883f9709947ee8..09a64439295aee6d8de93f7b6884bb246cbe40b7 100644
--- a/paddlehub/dataset/nlpcc_dbqa.py
+++ b/paddlehub/dataset/nlpcc_dbqa.py
@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import print_function
 
 from collections import namedtuple
+import codecs
 import os
 import csv
 
@@ -81,7 +82,7 @@ class NLPCC_DBQA(HubDataset):
 
     def _read_tsv(self, input_file, quotechar=None):
         """Reads a tab separated value file."""
-        with open(input_file, "r") as f:
+        with codecs.open(input_file, "r", encoding="UTF-8") as f:
             reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
             examples = []
             seq_id = 0