提交 b0d11b3b 编写于 作者: X xixiaoyao

fix bugs

上级 5649e623
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# encoding=utf8 # -*- coding: utf-8 -*-
import os import os
import sys import sys
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# encoding=utf8 # -*- coding: utf-8 -*-
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# -*- coding: utf-8 -*-
import paddle.fluid as fluid import paddle.fluid as fluid
from backbone.utils.transformer import pre_process_layer from backbone.utils.transformer import pre_process_layer
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# encoding=utf8 # -*- coding: utf-8 -*-
import paddle.fluid as fluid import paddle.fluid as fluid
import collections import collections
......
...@@ -17,6 +17,7 @@ import types ...@@ -17,6 +17,7 @@ import types
import csv import csv
import numpy as np import numpy as np
from utils import tokenization from utils import tokenization
import io
from utils.batching import prepare_batch_data from utils.batching import prepare_batch_data
...@@ -115,7 +116,7 @@ class BaseProcessor(object): ...@@ -115,7 +116,7 @@ class BaseProcessor(object):
@classmethod @classmethod
def _read_tsv(cls, input_file, quotechar=None): def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = [] lines = []
for line in reader: for line in reader:
......
#encoding=utf8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import os import os
import sys import sys
import random import random
......
...@@ -22,6 +22,7 @@ import gzip ...@@ -22,6 +22,7 @@ import gzip
import logging import logging
import re import re
import six import six
import io
import collections import collections
from utils import tokenization from utils import tokenization
from utils.batching import prepare_batch_data from utils.batching import prepare_batch_data
...@@ -126,7 +127,7 @@ class DataProcessor(object): ...@@ -126,7 +127,7 @@ class DataProcessor(object):
def load_vocab(self, vocab_file): def load_vocab(self, vocab_file):
"""Loads a vocabulary file into a dictionary.""" """Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict() vocab = collections.OrderedDict()
fin = open(vocab_file) fin = io.open(vocab_file, encoding='utf8')
for num, line in enumerate(fin): for num, line in enumerate(fin):
items = self.convert_to_unicode(line.strip()).split("\t") items = self.convert_to_unicode(line.strip()).split("\t")
if len(items) > 2: if len(items) > 2:
......
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
"""Run MRQA""" """Run MRQA"""
import six import six
import io
import math import math
import json import json
import random import random
import io
import collections import collections
import numpy as np import numpy as np
from utils import tokenization from utils import tokenization
...@@ -401,14 +403,14 @@ class DataProcessor(object): ...@@ -401,14 +403,14 @@ class DataProcessor(object):
all_nbest_json[example.qas_id] = nbest_json all_nbest_json[example.qas_id] = nbest_json
with open(output_prediction_file, "w") as writer: with io.open(output_prediction_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_predictions, indent=4) + "\n") writer.write(json.dumps(all_predictions, indent=4) + "\n")
with open(output_nbest_file, "w") as writer: with io.open(output_nbest_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_nbest_json, indent=4) + "\n") writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
if with_negative: if with_negative:
with open(output_null_log_odds_file, "w") as writer: with io.open(output_null_log_odds_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(scores_diff_json, indent=4) + "\n") writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
...@@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False): ...@@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False):
"""Read a MRQA json file into a list of MRQAExample.""" """Read a MRQA json file into a list of MRQAExample."""
phase = 'training' if is_training else 'testing' phase = 'training' if is_training else 'testing'
print("loading mrqa {} data...".format(phase)) print("loading mrqa {} data...".format(phase))
with open(input_file, "r") as reader: with io.open(input_file, "r", encoding="utf8") as reader:
input_data = json.load(reader)["data"] input_data = json.load(reader)["data"]
def is_whitespace(c): def is_whitespace(c):
...@@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \ ...@@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \
assert sample_rate > 0.0 and sample_rate <= 1.0, "sample_rate must be set between 0.0~1.0" assert sample_rate > 0.0 and sample_rate <= 1.0, "sample_rate must be set between 0.0~1.0"
print("loading data with json parser...") print("loading data with json parser...")
with open(data_path, "r") as reader: with io.open(data_path, "r", encoding="utf8") as reader:
data = json.load(reader)["data"] data = json.load(reader)["data"]
num_raw_examples = 0 num_raw_examples = 0
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# -*- coding: utf-8 -*-
"""Mask, padding and batching.""" """Mask, padding and batching."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -18,6 +19,7 @@ from __future__ import print_function ...@@ -18,6 +19,7 @@ from __future__ import print_function
import os import os
import sys import sys
import io
import argparse import argparse
import json import json
import yaml import yaml
...@@ -38,7 +40,7 @@ class JsonConfig(object): ...@@ -38,7 +40,7 @@ class JsonConfig(object):
def _parse(self, config_path): def _parse(self, config_path):
try: try:
with open(config_path) as json_file: with io.open(config_path, encoding="utf8") as json_file:
config_dict = json.load(json_file) config_dict = json.load(json_file)
assert isinstance(config_dict, dict), "Object in {} is NOT a dict.".format(config_path) assert isinstance(config_dict, dict), "Object in {} is NOT a dict.".format(config_path)
except: except:
...@@ -216,7 +218,7 @@ class PDConfig(object): ...@@ -216,7 +218,7 @@ class PDConfig(object):
raise Warning("the json file %s does not exist." % file_path) raise Warning("the json file %s does not exist." % file_path)
return return
with open(file_path, "r") as fin: with io.open(file_path, "r", encoding="utf8") as fin:
self.json_config = json.loads(fin.read()) self.json_config = json.loads(fin.read())
fin.close() fin.close()
...@@ -241,7 +243,7 @@ class PDConfig(object): ...@@ -241,7 +243,7 @@ class PDConfig(object):
raise Warning("the yaml file %s does not exist." % file_path) raise Warning("the yaml file %s does not exist." % file_path)
return return
with open(file_path, "r") as fin: with io.open(file_path, "r", encoding="utf8") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader) self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close() fin.close()
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# -*- coding: utf-8 -*-
from __future__ import print_function from __future__ import print_function
import paddle import paddle
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# -*- coding: utf-8 -*-
from __future__ import print_function from __future__ import print_function
......
...@@ -20,7 +20,7 @@ from __future__ import print_function ...@@ -20,7 +20,7 @@ from __future__ import print_function
import collections import collections
import unicodedata import unicodedata
import six import six
import io
def convert_to_unicode(text): def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
...@@ -68,7 +68,7 @@ def printable_text(text): ...@@ -68,7 +68,7 @@ def printable_text(text):
def load_vocab(vocab_file): def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary.""" """Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict() vocab = collections.OrderedDict()
fin = open(vocab_file) fin = io.open(vocab_file, encoding="utf8")
for num, line in enumerate(fin): for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t") items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2: if len(items) > 2:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册