diff --git a/demo/text-classification/run_classifier.sh b/demo/text-classification/run_classifier.sh index d2d0046b390c7d4bb9a91c7d3f66fbd5f93954da..0b53a268c31929fd810138bf0fa36ab41469fc60 100644 --- a/demo/text-classification/run_classifier.sh +++ b/demo/text-classification/run_classifier.sh @@ -1,4 +1,4 @@ -export CUDA_VISIBLE_DEVICES=3 +export CUDA_VISIBLE_DEVICES=0 # User can select chnsenticorp, nlpcc_dbqa, lcqmc for different task DATASET="chnsenticorp" diff --git a/paddlehub/__init__.py b/paddlehub/__init__.py index dc4593ac57cf9c3e8d586fbd161a4b0befa99110..a6bce51d665a0cd0a851e6e1acf888a27bc587e3 100644 --- a/paddlehub/__init__.py +++ b/paddlehub/__init__.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +#coding:utf-8 +import six + from . import module from . import common from . import io @@ -41,3 +45,8 @@ from .finetune.finetune import finetune_and_eval from .finetune.config import RunConfig from .finetune.strategy import AdamWeightDecayStrategy from .finetune.strategy import DefaultStrategy + +if six.PY2: + import sys + reload(sys) + sys.setdefaultencoding("UTF-8") \ No newline at end of file diff --git a/paddlehub/dataset/chnsenticorp.py b/paddlehub/dataset/chnsenticorp.py index 58b0283e8e8c0cccad3dfb4f0516b94605aea2f4..b59a7a393372c1587c7cf555e56217a9941458a1 100644 --- a/paddlehub/dataset/chnsenticorp.py +++ b/paddlehub/dataset/chnsenticorp.py @@ -17,6 +17,7 @@ from __future__ import division from __future__ import print_function from collections import namedtuple +import codecs import os import csv @@ -79,7 +80,7 @@ class ChnSentiCorp(HubDataset): def _read_tsv(self, input_file, quotechar=None): """Reads a tab separated value file.""" - with open(input_file, "r") as f: + with codecs.open(input_file, "r", encoding="UTF-8") as f: reader = csv.reader(f, delimiter="\t", quotechar=quotechar) examples = [] seq_id = 0 diff --git a/paddlehub/dataset/lcqmc.py b/paddlehub/dataset/lcqmc.py index fb16bbff2351f761e559c9e452cb384487fbb510..550128755e3460f00769aa7aaf2c617e2ab6b48e 100644 --- a/paddlehub/dataset/lcqmc.py +++ b/paddlehub/dataset/lcqmc.py @@ -17,6 +17,7 @@ from __future__ import division from __future__ import print_function from collections import namedtuple +import codecs import os import csv @@ -75,7 +76,7 @@ class LCQMC(HubDataset): def _read_tsv(self, input_file, quotechar=None): """Reads a tab separated value file.""" - with open(input_file, "r") as f: + with codecs.open(input_file, "r", encoding="UTF-8") as f: reader = csv.reader(f, delimiter="\t", quotechar=quotechar) examples = [] seq_id = 0 diff --git a/paddlehub/dataset/msra_ner.py b/paddlehub/dataset/msra_ner.py index afdbebe91e09f99c4da3edf884ed889f69b22d53..81c31a960d0bf71c0dad5d1d4a2087c427a78931 100644 --- a/paddlehub/dataset/msra_ner.py +++ b/paddlehub/dataset/msra_ner.py @@ -17,8 +17,10 @@ from __future__ import division from __future__ import print_function import os +import codecs import csv import json +import six from collections import namedtuple from paddlehub.dataset import InputExample, HubDataset @@ -85,7 +87,7 @@ class MSRA_NER(HubDataset): def _read_tsv(self, input_file, quotechar=None): """Reads a tab separated value file.""" - with open(input_file, "r") as f: + with codecs.open(input_file, "r", encoding="UTF-8") as f: reader = csv.reader(f, delimiter="\t", quotechar=quotechar) examples = [] seq_id = 0 diff --git a/paddlehub/dataset/nlpcc_dbqa.py b/paddlehub/dataset/nlpcc_dbqa.py index b6200c2e5245b56c9615f014cb883f9709947ee8..09a64439295aee6d8de93f7b6884bb246cbe40b7 100644 --- a/paddlehub/dataset/nlpcc_dbqa.py +++ b/paddlehub/dataset/nlpcc_dbqa.py @@ -17,6 +17,7 @@ from __future__ import division from __future__ import print_function from collections import namedtuple +import codecs import os import csv @@ -81,7 +82,7 @@ class NLPCC_DBQA(HubDataset): def _read_tsv(self, input_file, quotechar=None): """Reads a tab separated value file.""" - with open(input_file, "r") as f: + with codecs.open(input_file, "r", encoding="UTF-8") as f: reader = csv.reader(f, delimiter="\t", quotechar=quotechar) examples = [] seq_id = 0