diff --git a/demo/sequence_labeling/predict.py b/demo/sequence_labeling/predict.py index 862e9bfba7675741898e9e42ae32931fde94dd7f..fb189b42b83319bcee2823d71ca25bb94e52ec18 100644 --- a/demo/sequence_labeling/predict.py +++ b/demo/sequence_labeling/predict.py @@ -87,12 +87,13 @@ if __name__ == '__main__': add_crf=True) # Data to be predicted + # If using python 2, prefix "u" is necessary data = [ - ["我们变而以书会友,以书结缘,把欧美、港台流行的食品类图谱、画册、工具书汇集一堂。"], - ["为了跟踪国际最新食品工艺、流行趋势,大量搜集海外专业书刊资料是提高技艺的捷径。"], - ["其中线装古籍逾千册;民国出版物几百种;珍本四册、稀见本四百余册,出版时间跨越三百余年。"], - ["有的古木交柯,春机荣欣,从诗人句中得之,而入画中,观之令人心驰。"], - ["不过重在晋趣,略增明人气息,妙在集古有道、不露痕迹罢了。"], + [u"我们变而以书会友,以书结缘,把欧美、港台流行的食品类图谱、画册、工具书汇集一堂。"], + [u"为了跟踪国际最新食品工艺、流行趋势,大量搜集海外专业书刊资料是提高技艺的捷径。"], + [u"其中线装古籍逾千册;民国出版物几百种;珍本四册、稀见本四百余册,出版时间跨越三百余年。"], + [u"有的古木交柯,春机荣欣,从诗人句中得之,而入画中,观之令人心驰。"], + [u"不过重在晋趣,略增明人气息,妙在集古有道、不露痕迹罢了。"], ] # Add 0x02 between characters to match the format of training data, diff --git a/paddlehub/finetune/task/base_task.py b/paddlehub/finetune/task/base_task.py index b8c045b5a909c89903a8495ca1940ed848389dce..51351555635c47bc68b808af85669ffdc0decfed 100644 --- a/paddlehub/finetune/task/base_task.py +++ b/paddlehub/finetune/task/base_task.py @@ -24,7 +24,11 @@ import copy import logging import inspect from functools import partial - +import six +if six.PY2: + from inspect import getargspec as get_args +else: + from inspect import getfullargspec as get_args import numpy as np import paddle.fluid as fluid from tb_paddle import SummaryWriter @@ -129,7 +133,7 @@ class TaskHooks(): "name: %s has existed in hook_type:%s, use modify method to modify it" % (name, hook_type)) else: - args_num = len(inspect.getfullargspec(func).args) + args_num = len(get_args(func).args) if args_num != self._hook_params_num[hook_type]: raise ValueError( "The number of parameters to the hook hook_type:%s should be %i" diff --git a/paddlehub/finetune/task/reading_comprehension_task.py b/paddlehub/finetune/task/reading_comprehension_task.py index ccc590ee36906611d33b52c5a4b03cfcfba2c3b3..fe73fceb795064836f1a7a41f2493b3e5a06effd 100644 --- a/paddlehub/finetune/task/reading_comprehension_task.py +++ b/paddlehub/finetune/task/reading_comprehension_task.py @@ -26,6 +26,7 @@ import json from collections import OrderedDict +import io import numpy as np import paddle.fluid as fluid from .base_task import BaseTask @@ -517,13 +518,13 @@ class ReadingComprehensionTask(BaseTask): null_score_diff_threshold=self.null_score_diff_threshold, is_english=self.is_english) if self.phase == 'val' or self.phase == 'dev': - with open( + with io.open( self.data_reader.dataset.dev_path, 'r', encoding="utf8") as dataset_file: dataset_json = json.load(dataset_file) dataset = dataset_json['data'] elif self.phase == 'test': - with open( + with io.open( self.data_reader.dataset.test_path, 'r', encoding="utf8") as dataset_file: dataset_json = json.load(dataset_file) diff --git a/paddlehub/reader/tokenization.py b/paddlehub/reader/tokenization.py index ef49ed76fd82d0a0b58cfe2b3bc7122eb9e8acac..bde0ed43cd5140d2d926b5e43d53e0f55ed91205 100644 --- a/paddlehub/reader/tokenization.py +++ b/paddlehub/reader/tokenization.py @@ -170,7 +170,7 @@ class WSSPTokenizer(object): self.inv_vocab = {v: k for k, v in self.vocab.items()} self.ws = ws self.lower = lower - self.dict = pickle.load(open(word_dict, 'rb'), encoding='utf8') + self.dict = pickle.load(open(word_dict, 'rb')) self.sp_model = spm.SentencePieceProcessor() self.window_size = 5 self.sp_model.Load(sp_model_dir)