From b71e406d99bf8f45bd98e75838151823dba6e2c2 Mon Sep 17 00:00:00 2001 From: Steffy-zxf <48793257+Steffy-zxf@users.noreply.github.com> Date: Tue, 28 May 2019 11:41:06 +0800 Subject: [PATCH] Fix encoding format (#42) * Fix the encoding format --- demo/elmo/elmo_finetune.py | 1 + demo/image-classification/img_classifier.py | 1 + demo/image-classification/predict.py | 1 + demo/lac/lac_demo.py | 16 ++++++++++++++-- demo/senta/predict.py | 1 + demo/senta/senta_demo.py | 16 ++++++++++++++-- demo/senta/senta_finetune.py | 1 + demo/sequence-labeling/predict.py | 1 + demo/sequence-labeling/sequence_label.py | 1 + demo/ssd/ssd_demo.py | 1 + demo/text-classification/predict.py | 1 + demo/text-classification/simple_demo.py | 1 + demo/text-classification/text_classifier.py | 1 + paddlehub/__init__.py | 13 ++++++------- paddlehub/commands/__init__.py | 1 + paddlehub/commands/base_command.py | 1 + paddlehub/commands/clear.py | 1 + paddlehub/commands/cml_utils.py | 1 + paddlehub/commands/download.py | 1 + paddlehub/commands/help.py | 1 + paddlehub/commands/hub.py | 10 +++++++++- paddlehub/commands/install.py | 1 + paddlehub/commands/list.py | 1 + paddlehub/commands/run.py | 4 +++- paddlehub/commands/search.py | 1 + paddlehub/commands/show.py | 1 + paddlehub/commands/uninstall.py | 1 + paddlehub/commands/version.py | 1 + paddlehub/common/__init__.py | 1 + paddlehub/common/arg_helper.py | 1 + paddlehub/common/dir.py | 1 + paddlehub/common/downloader.py | 1 + paddlehub/common/hub_server.py | 1 + paddlehub/common/logger.py | 1 + paddlehub/common/paddle_helper.py | 1 + paddlehub/common/utils.py | 1 + paddlehub/dataset/__init__.py | 1 + paddlehub/dataset/base_cv_dataset.py | 1 + paddlehub/dataset/chnsenticorp.py | 1 + paddlehub/dataset/dataset.py | 1 + paddlehub/dataset/dogcat.py | 1 + paddlehub/dataset/flowers.py | 1 + paddlehub/dataset/food101.py | 1 + paddlehub/dataset/indoor67.py | 1 + paddlehub/dataset/lcqmc.py | 1 + paddlehub/dataset/msra_ner.py | 2 +- paddlehub/dataset/nlpcc_dbqa.py | 1 + paddlehub/dataset/stanford_dogs.py | 1 + paddlehub/finetune/__init__.py | 1 + paddlehub/finetune/checkpoint.py | 1 + paddlehub/finetune/checkpoint_pb2.py | 1 + paddlehub/finetune/config.py | 1 + paddlehub/finetune/evaluate.py | 1 + paddlehub/finetune/finetune.py | 1 + paddlehub/finetune/optimization.py | 1 + paddlehub/finetune/regularizer.py | 1 + paddlehub/finetune/strategy.py | 1 + paddlehub/finetune/task.py | 1 + paddlehub/io/__init__.py | 1 + paddlehub/io/augmentation.py | 1 + paddlehub/io/parser.py | 1 + paddlehub/io/type.py | 1 + paddlehub/module/__init__.py | 1 + paddlehub/module/base_processor.py | 1 + paddlehub/module/check_info_pb2.py | 1 + paddlehub/module/checker.py | 1 + paddlehub/module/manager.py | 1 + paddlehub/module/module.py | 1 + paddlehub/module/module_desc_pb2.py | 1 + paddlehub/module/signature.py | 1 + paddlehub/reader/__init__.py | 1 + paddlehub/reader/batching.py | 1 + paddlehub/reader/cv_reader.py | 1 + paddlehub/reader/nlp_reader.py | 14 ++++---------- paddlehub/version.py | 1 + requirements.txt | 1 - setup.py | 9 +++++++-- 77 files changed, 126 insertions(+), 27 deletions(-) diff --git a/demo/elmo/elmo_finetune.py b/demo/elmo/elmo_finetune.py index cd4143de..9ae3b939 100644 --- a/demo/elmo/elmo_finetune.py +++ b/demo/elmo/elmo_finetune.py @@ -1,3 +1,4 @@ +#coding:utf-8 import argparse import ast import io diff --git a/demo/image-classification/img_classifier.py b/demo/image-classification/img_classifier.py index acef9d22..37281ed0 100644 --- a/demo/image-classification/img_classifier.py +++ b/demo/image-classification/img_classifier.py @@ -1,3 +1,4 @@ +#coding:utf-8 import argparse import os diff --git a/demo/image-classification/predict.py b/demo/image-classification/predict.py index 32cdc1f1..89e28306 100644 --- a/demo/image-classification/predict.py +++ b/demo/image-classification/predict.py @@ -1,3 +1,4 @@ +#coding:utf-8 import argparse import os diff --git a/demo/lac/lac_demo.py b/demo/lac/lac_demo.py index 883d94ee..d923d5ec 100644 --- a/demo/lac/lac_demo.py +++ b/demo/lac/lac_demo.py @@ -1,4 +1,10 @@ +#coding:utf-8 +from __future__ import print_function + +import json import os +import six + import paddlehub as hub if __name__ == "__main__": @@ -12,5 +18,11 @@ if __name__ == "__main__": # execute predict and print the result results = lac.lexical_analysis(data=inputs) for result in results: - print(result['word']) - print(result['tag']) + if six.PY2: + print(json.dumps( + result['word'], encoding="utf8", ensure_ascii=False)) + print(json.dumps( + result['tag'], encoding="utf8", ensure_ascii=False)) + else: + print(result['word']) + print(result['tag']) diff --git a/demo/senta/predict.py b/demo/senta/predict.py index e11070e8..a30245c8 100644 --- a/demo/senta/predict.py +++ b/demo/senta/predict.py @@ -1,3 +1,4 @@ +#coding:utf-8 from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/demo/senta/senta_demo.py b/demo/senta/senta_demo.py index 2ebcc850..07446a86 100644 --- a/demo/senta/senta_demo.py +++ b/demo/senta/senta_demo.py @@ -1,5 +1,10 @@ -# coding: utf-8 +#coding:utf-8 +from __future__ import print_function + +import json import os +import six + import paddlehub as hub if __name__ == "__main__": @@ -11,5 +16,12 @@ if __name__ == "__main__": input_dict = {"text": test_text} results = senta.sentiment_classify(data=input_dict) + + for index, text in enumerate(test_text): + results[index]["text"] = text for index, result in enumerate(results): - print(test_text[index], result['sentiment_key']) + if six.PY2: + print(json.dumps( + results[index], encoding="utf8", ensure_ascii=False)) + else: + print(results[index]) diff --git a/demo/senta/senta_finetune.py b/demo/senta/senta_finetune.py index d87fc842..b1d5e0c8 100644 --- a/demo/senta/senta_finetune.py +++ b/demo/senta/senta_finetune.py @@ -1,3 +1,4 @@ +#coding:utf-8 import argparse import ast diff --git a/demo/sequence-labeling/predict.py b/demo/sequence-labeling/predict.py index 6465eb0c..0c3cf513 100644 --- a/demo/sequence-labeling/predict.py +++ b/demo/sequence-labeling/predict.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/sequence-labeling/sequence_label.py b/demo/sequence-labeling/sequence_label.py index ebbf281c..31a3444e 100644 --- a/demo/sequence-labeling/sequence_label.py +++ b/demo/sequence-labeling/sequence_label.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/ssd/ssd_demo.py b/demo/ssd/ssd_demo.py index 4a24751f..3d4b3769 100644 --- a/demo/ssd/ssd_demo.py +++ b/demo/ssd/ssd_demo.py @@ -1,3 +1,4 @@ +#coding:utf-8 import os import paddlehub as hub diff --git a/demo/text-classification/predict.py b/demo/text-classification/predict.py index 5159f9c5..ff21c513 100644 --- a/demo/text-classification/predict.py +++ b/demo/text-classification/predict.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/demo/text-classification/simple_demo.py b/demo/text-classification/simple_demo.py index 8527adfe..645478aa 100644 --- a/demo/text-classification/simple_demo.py +++ b/demo/text-classification/simple_demo.py @@ -1,3 +1,4 @@ +#coding:utf-8 import paddle.fluid as fluid import paddlehub as hub diff --git a/demo/text-classification/text_classifier.py b/demo/text-classification/text_classifier.py index 0e379d66..1411e86a 100644 --- a/demo/text-classification/text_classifier.py +++ b/demo/text-classification/text_classifier.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/paddlehub/__init__.py b/paddlehub/__init__.py index cc52cfa4..09cf2bfb 100644 --- a/paddlehub/__init__.py +++ b/paddlehub/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" @@ -12,10 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -#coding:utf-8 - import six +if six.PY2: + import sys + reload(sys) + sys.setdefaultencoding("UTF-8") + from . import module from . import common from . import io @@ -47,8 +51,3 @@ from .finetune.config import RunConfig from .finetune.strategy import AdamWeightDecayStrategy from .finetune.strategy import DefaultStrategy from .finetune.strategy import DefaultFinetuneStrategy - -if six.PY2: - import sys - reload(sys) - sys.setdefaultencoding("UTF-8") diff --git a/paddlehub/commands/__init__.py b/paddlehub/commands/__init__.py index 09886b81..1690a06f 100644 --- a/paddlehub/commands/__init__.py +++ b/paddlehub/commands/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/base_command.py b/paddlehub/commands/base_command.py index 96707cdb..627ef470 100644 --- a/paddlehub/commands/base_command.py +++ b/paddlehub/commands/base_command.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/clear.py b/paddlehub/commands/clear.py index a2103d85..0b5624c3 100644 --- a/paddlehub/commands/clear.py +++ b/paddlehub/commands/clear.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/cml_utils.py b/paddlehub/commands/cml_utils.py index 74dcc382..6936bdbf 100644 --- a/paddlehub/commands/cml_utils.py +++ b/paddlehub/commands/cml_utils.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/download.py b/paddlehub/commands/download.py index d2291fc8..c70123ae 100644 --- a/paddlehub/commands/download.py +++ b/paddlehub/commands/download.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/help.py b/paddlehub/commands/help.py index 8b7e5f1d..11c2666e 100644 --- a/paddlehub/commands/help.py +++ b/paddlehub/commands/help.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/hub.py b/paddlehub/commands/hub.py index d2eef0bd..c5f901cc 100644 --- a/paddlehub/commands/hub.py +++ b/paddlehub/commands/hub.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" @@ -16,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six import sys from paddlehub.common.logger import logger @@ -60,4 +62,10 @@ def main(): if __name__ == "__main__": - command.execute(sys.argv[1:]) + argv = [] + for item in sys.argv: + if six.PY2: + argv.append(item.decode(sys.stdin.encoding).decode("utf8")) + else: + argv.append(item) + command.execute(argv[1:]) diff --git a/paddlehub/commands/install.py b/paddlehub/commands/install.py index c181f398..fc444b19 100644 --- a/paddlehub/commands/install.py +++ b/paddlehub/commands/install.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/list.py b/paddlehub/commands/list.py index 44421b51..8830eb6a 100644 --- a/paddlehub/commands/list.py +++ b/paddlehub/commands/list.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/run.py b/paddlehub/commands/run.py index 4a18210b..d1b837aa 100644 --- a/paddlehub/commands/run.py +++ b/paddlehub/commands/run.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" @@ -17,6 +18,7 @@ from __future__ import division from __future__ import print_function import argparse +import json import os import sys @@ -172,7 +174,7 @@ class RunCommand(BaseCommand): results = module( sign_name=self.args.signature, data=input_data, **config) if six.PY2: - print(repr(results).decode('string_escape')) + print(json.dumps(results, encoding="utf8", ensure_ascii=False)) else: print(results) diff --git a/paddlehub/commands/search.py b/paddlehub/commands/search.py index 73f59886..99710ea6 100644 --- a/paddlehub/commands/search.py +++ b/paddlehub/commands/search.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/show.py b/paddlehub/commands/show.py index 990ce2e0..ebd3b8cb 100644 --- a/paddlehub/commands/show.py +++ b/paddlehub/commands/show.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/uninstall.py b/paddlehub/commands/uninstall.py index c00b9acd..ac50029f 100644 --- a/paddlehub/commands/uninstall.py +++ b/paddlehub/commands/uninstall.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/commands/version.py b/paddlehub/commands/version.py index 47e95950..c763044c 100644 --- a/paddlehub/commands/version.py +++ b/paddlehub/commands/version.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/__init__.py b/paddlehub/common/__init__.py index 93814bf4..17a2fc95 100644 --- a/paddlehub/common/__init__.py +++ b/paddlehub/common/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/arg_helper.py b/paddlehub/common/arg_helper.py index 761709a6..9d8008b5 100644 --- a/paddlehub/common/arg_helper.py +++ b/paddlehub/common/arg_helper.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/dir.py b/paddlehub/common/dir.py index 550ce1fd..9baa21f0 100644 --- a/paddlehub/common/dir.py +++ b/paddlehub/common/dir.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/downloader.py b/paddlehub/common/downloader.py index 10091e1b..984bedc0 100644 --- a/paddlehub/common/downloader.py +++ b/paddlehub/common/downloader.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/hub_server.py b/paddlehub/common/hub_server.py index 0029a649..1b7f08ce 100644 --- a/paddlehub/common/hub_server.py +++ b/paddlehub/common/hub_server.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/logger.py b/paddlehub/common/logger.py index d04913a8..fb61fb39 100644 --- a/paddlehub/common/logger.py +++ b/paddlehub/common/logger.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/paddle_helper.py b/paddlehub/common/paddle_helper.py index 955ae809..5bb0fae1 100644 --- a/paddlehub/common/paddle_helper.py +++ b/paddlehub/common/paddle_helper.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/common/utils.py b/paddlehub/common/utils.py index db57a4aa..0406de26 100644 --- a/paddlehub/common/utils.py +++ b/paddlehub/common/utils.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/__init__.py b/paddlehub/dataset/__init__.py index 2239623b..f73375e2 100644 --- a/paddlehub/dataset/__init__.py +++ b/paddlehub/dataset/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/paddlehub/dataset/base_cv_dataset.py b/paddlehub/dataset/base_cv_dataset.py index 9eb41c10..1606b874 100644 --- a/paddlehub/dataset/base_cv_dataset.py +++ b/paddlehub/dataset/base_cv_dataset.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/chnsenticorp.py b/paddlehub/dataset/chnsenticorp.py index b59a7a39..76b25485 100644 --- a/paddlehub/dataset/chnsenticorp.py +++ b/paddlehub/dataset/chnsenticorp.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/dataset.py b/paddlehub/dataset/dataset.py index aca77747..57f02e5d 100644 --- a/paddlehub/dataset/dataset.py +++ b/paddlehub/dataset/dataset.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/dogcat.py b/paddlehub/dataset/dogcat.py index 70600b34..6ebbec88 100644 --- a/paddlehub/dataset/dogcat.py +++ b/paddlehub/dataset/dogcat.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/flowers.py b/paddlehub/dataset/flowers.py index adef50ae..3b46790d 100644 --- a/paddlehub/dataset/flowers.py +++ b/paddlehub/dataset/flowers.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/food101.py b/paddlehub/dataset/food101.py index de9c8679..03622ebb 100644 --- a/paddlehub/dataset/food101.py +++ b/paddlehub/dataset/food101.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/indoor67.py b/paddlehub/dataset/indoor67.py index 59ebc3e7..37d014a0 100644 --- a/paddlehub/dataset/indoor67.py +++ b/paddlehub/dataset/indoor67.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/lcqmc.py b/paddlehub/dataset/lcqmc.py index 55012875..99a94de6 100644 --- a/paddlehub/dataset/lcqmc.py +++ b/paddlehub/dataset/lcqmc.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/msra_ner.py b/paddlehub/dataset/msra_ner.py index 81c31a96..a2ac96a2 100644 --- a/paddlehub/dataset/msra_ner.py +++ b/paddlehub/dataset/msra_ner.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" @@ -20,7 +21,6 @@ import os import codecs import csv import json -import six from collections import namedtuple from paddlehub.dataset import InputExample, HubDataset diff --git a/paddlehub/dataset/nlpcc_dbqa.py b/paddlehub/dataset/nlpcc_dbqa.py index 09a64439..1440e7ad 100644 --- a/paddlehub/dataset/nlpcc_dbqa.py +++ b/paddlehub/dataset/nlpcc_dbqa.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/dataset/stanford_dogs.py b/paddlehub/dataset/stanford_dogs.py index 9081027e..c399ad35 100644 --- a/paddlehub/dataset/stanford_dogs.py +++ b/paddlehub/dataset/stanford_dogs.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/__init__.py b/paddlehub/finetune/__init__.py index a028662d..309e3855 100644 --- a/paddlehub/finetune/__init__.py +++ b/paddlehub/finetune/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/checkpoint.py b/paddlehub/finetune/checkpoint.py index 4fa987df..43272685 100644 --- a/paddlehub/finetune/checkpoint.py +++ b/paddlehub/finetune/checkpoint.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/checkpoint_pb2.py b/paddlehub/finetune/checkpoint_pb2.py index 39ca5238..e0383224 100644 --- a/paddlehub/finetune/checkpoint_pb2.py +++ b/paddlehub/finetune/checkpoint_pb2.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Generated by the protocol buffer compiler. DO NOT EDIT! # source: checkpoint.proto diff --git a/paddlehub/finetune/config.py b/paddlehub/finetune/config.py index 4b3d8f71..89a0ebb1 100644 --- a/paddlehub/finetune/config.py +++ b/paddlehub/finetune/config.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/evaluate.py b/paddlehub/finetune/evaluate.py index 4563e394..38a05c56 100644 --- a/paddlehub/finetune/evaluate.py +++ b/paddlehub/finetune/evaluate.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/finetune.py b/paddlehub/finetune/finetune.py index 0d63cfd5..7965b28c 100644 --- a/paddlehub/finetune/finetune.py +++ b/paddlehub/finetune/finetune.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/optimization.py b/paddlehub/finetune/optimization.py index 5b7363c0..00658b9d 100644 --- a/paddlehub/finetune/optimization.py +++ b/paddlehub/finetune/optimization.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/paddlehub/finetune/regularizer.py b/paddlehub/finetune/regularizer.py index 72fc399f..441b05a3 100644 --- a/paddlehub/finetune/regularizer.py +++ b/paddlehub/finetune/regularizer.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/strategy.py b/paddlehub/finetune/strategy.py index 31c0baa3..f7fa6f59 100644 --- a/paddlehub/finetune/strategy.py +++ b/paddlehub/finetune/strategy.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/finetune/task.py b/paddlehub/finetune/task.py index bc2b8ac8..cac00ca1 100644 --- a/paddlehub/finetune/task.py +++ b/paddlehub/finetune/task.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/io/__init__.py b/paddlehub/io/__init__.py index e69de29b..1ecc0e80 100644 --- a/paddlehub/io/__init__.py +++ b/paddlehub/io/__init__.py @@ -0,0 +1 @@ +#coding:utf-8 diff --git a/paddlehub/io/augmentation.py b/paddlehub/io/augmentation.py index ec5e8a6c..4710fc1e 100644 --- a/paddlehub/io/augmentation.py +++ b/paddlehub/io/augmentation.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/io/parser.py b/paddlehub/io/parser.py index d332d7f1..4a60105a 100644 --- a/paddlehub/io/parser.py +++ b/paddlehub/io/parser.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/io/type.py b/paddlehub/io/type.py index 074b5e05..4a68a918 100644 --- a/paddlehub/io/type.py +++ b/paddlehub/io/type.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/module/__init__.py b/paddlehub/module/__init__.py index b03a52d9..0f2925da 100644 --- a/paddlehub/module/__init__.py +++ b/paddlehub/module/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/module/base_processor.py b/paddlehub/module/base_processor.py index 1d6b7fb3..d5a57d9f 100644 --- a/paddlehub/module/base_processor.py +++ b/paddlehub/module/base_processor.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/module/check_info_pb2.py b/paddlehub/module/check_info_pb2.py index 95d3e411..78f5546c 100644 --- a/paddlehub/module/check_info_pb2.py +++ b/paddlehub/module/check_info_pb2.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Generated by the protocol buffer compiler. DO NOT EDIT! # source: check_info.proto diff --git a/paddlehub/module/checker.py b/paddlehub/module/checker.py index 13398c2b..d76ca6bd 100644 --- a/paddlehub/module/checker.py +++ b/paddlehub/module/checker.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/module/manager.py b/paddlehub/module/manager.py index 46027574..645c4348 100644 --- a/paddlehub/module/manager.py +++ b/paddlehub/module/manager.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/module/module.py b/paddlehub/module/module.py index 1c0ddc15..f8a95604 100644 --- a/paddlehub/module/module.py +++ b/paddlehub/module/module.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/module/module_desc_pb2.py b/paddlehub/module/module_desc_pb2.py index 001052e4..4dd6c812 100644 --- a/paddlehub/module/module_desc_pb2.py +++ b/paddlehub/module/module_desc_pb2.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Generated by the protocol buffer compiler. DO NOT EDIT! # source: module_desc.proto diff --git a/paddlehub/module/signature.py b/paddlehub/module/signature.py index 93150c0a..1baba84c 100644 --- a/paddlehub/module/signature.py +++ b/paddlehub/module/signature.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/reader/__init__.py b/paddlehub/reader/__init__.py index a0e119df..bc0fa025 100644 --- a/paddlehub/reader/__init__.py +++ b/paddlehub/reader/__init__.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/reader/batching.py b/paddlehub/reader/batching.py index e33b0082..5ec5f320 100644 --- a/paddlehub/reader/batching.py +++ b/paddlehub/reader/batching.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/paddlehub/reader/cv_reader.py b/paddlehub/reader/cv_reader.py index 9e06a5a5..6aba2f8d 100644 --- a/paddlehub/reader/cv_reader.py +++ b/paddlehub/reader/cv_reader.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/paddlehub/reader/nlp_reader.py b/paddlehub/reader/nlp_reader.py index 0ecd43b2..7d2b8094 100644 --- a/paddlehub/reader/nlp_reader.py +++ b/paddlehub/reader/nlp_reader.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,6 +21,7 @@ import csv import json import platform import six +import sys from collections import namedtuple import paddle @@ -31,12 +33,6 @@ from .batching import pad_batch_data import paddlehub as hub -def get_encoding(): - if platform.platform().lower().startswith("windows"): - return "gbk" - return "utf8" - - class BaseReader(object): def __init__(self, dataset, @@ -426,15 +422,13 @@ class LACClassifyReader(object): def preprocess(text): data_dict = {self.feed_key: [text]} processed = self.lac.lexical_analysis(data=data_dict) - for data in processed: - for index, word in enumerate(data['word']): - if six.PY2 and type(word) == str: - data['word'][index] = word.decode(get_encoding()) processed = [ self.vocab[word] for word in processed[0]['word'] if word in self.vocab ] if len(processed) == 0: + if six.PY2: + text = text.encode(sys.stdout.encoding) logger.warning( "The words in text %s can't be found in the vocabulary." % (text)) diff --git a/paddlehub/version.py b/paddlehub/version.py index 71a25047..669680c5 100644 --- a/paddlehub/version.py +++ b/paddlehub/version.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" diff --git a/requirements.txt b/requirements.txt index a9e5ee21..10dc2485 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,3 @@ pyyaml numpy >= 1.12.0 Pillow six >= 1.10.0 -chardet == 3.0.4 diff --git a/setup.py b/setup.py index c6baa6c3..17c469eb 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +#coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" @@ -30,8 +31,12 @@ def python_version(): max_version, mid_version, min_version = python_version() REQUIRED_PACKAGES = [ - 'numpy >= 1.12.0', 'six >= 1.10.0', 'protobuf >= 3.1.0', 'pyyaml', 'Pillow', - "visualdl >= 1.3.0", "chardet == 3.0.4" + 'numpy >= 1.12.0', + 'six >= 1.10.0', + 'protobuf >= 3.1.0', + 'pyyaml', + 'Pillow', + "visualdl >= 1.3.0", ] if max_version < 3: -- GitLab