diff --git a/mtl_run.py b/mtl_run.py index 7f457b9d0eb37a755d0cb280d5df7e211f22f476..8bba8bc976ce4dc40d72a5fd132e4913a704811c 100644 --- a/mtl_run.py +++ b/mtl_run.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# encoding=utf8 +# -*- coding: utf-8 -*- import os import sys diff --git a/paradigm/answer_matching.py b/paradigm/answer_matching.py index d1bd2023882c7c99ac4a75e75ce788495dca2096..2d5381598204fbec2f1695a45c49931df3ffb435 100644 --- a/paradigm/answer_matching.py +++ b/paradigm/answer_matching.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# encoding=utf8 +# -*- coding: utf-8 -*- import paddle.fluid as fluid diff --git a/paradigm/mask_language_model.py b/paradigm/mask_language_model.py index fa13d75b306f249021ced83f5546ba922001f29b..3f8e21e0a07255904be06b58120af975ed522209 100644 --- a/paradigm/mask_language_model.py +++ b/paradigm/mask_language_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -*- coding: utf-8 -*- import paddle.fluid as fluid from backbone.utils.transformer import pre_process_layer diff --git a/paradigm/reading_comprehension.py b/paradigm/reading_comprehension.py index 5f38842720d9f503412891067528cc0aefbf194f..79e0543d80100ab0b009db1034f9e67daa7d9778 100644 --- a/paradigm/reading_comprehension.py +++ b/paradigm/reading_comprehension.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# encoding=utf8 +# -*- coding: utf-8 -*- import paddle.fluid as fluid import collections diff --git a/reader/answer_matching_reader.py b/reader/answer_matching_reader.py index 1764d0aeead712ad41212f8d7c9d4b6e1a4d48b8..e072837bbd9d6a5fb37df7d4873d5147559e43a0 100644 --- a/reader/answer_matching_reader.py +++ b/reader/answer_matching_reader.py @@ -17,6 +17,7 @@ import types import csv import numpy as np from utils import tokenization +import io from utils.batching import prepare_batch_data @@ -115,7 +116,7 @@ class BaseProcessor(object): @classmethod def _read_tsv(cls, input_file, quotechar=None): """Reads a tab separated value file.""" - with open(input_file, "r") as f: + with io.open(input_file, "r", encoding="utf8") as f: reader = csv.reader(f, delimiter="\t", quotechar=quotechar) lines = [] for line in reader: diff --git a/reader/joint_reader.py b/reader/joint_reader.py index aced579ac80ad2462d87acbe9dae3c2143f63b01..151847b7a0976380a213889a4e7130da75e67baf 100644 --- a/reader/joint_reader.py +++ b/reader/joint_reader.py @@ -1,4 +1,18 @@ -#encoding=utf8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- coding: utf-8 -*- + import os import sys import random diff --git a/reader/mask_language_model_reader.py b/reader/mask_language_model_reader.py index e4accbad2fb3aa32aa38e180b609371171852ff0..134648d76ca1c7bdca5cd81c5c6eb84985e2cc0e 100644 --- a/reader/mask_language_model_reader.py +++ b/reader/mask_language_model_reader.py @@ -22,6 +22,7 @@ import gzip import logging import re import six +import io import collections from utils import tokenization from utils.batching import prepare_batch_data @@ -126,7 +127,7 @@ class DataProcessor(object): def load_vocab(self, vocab_file): """Loads a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() - fin = open(vocab_file) + fin = io.open(vocab_file, encoding='utf8') for num, line in enumerate(fin): items = self.convert_to_unicode(line.strip()).split("\t") if len(items) > 2: diff --git a/reader/reading_comprehension_reader.py b/reader/reading_comprehension_reader.py index c3eaa7243529dd03ca82f66c0b1d41239ebdcb78..afba8853e86b216596c5f699146863da74cb1abd 100644 --- a/reader/reading_comprehension_reader.py +++ b/reader/reading_comprehension_reader.py @@ -14,9 +14,11 @@ """Run MRQA""" import six +import io import math import json import random +import io import collections import numpy as np from utils import tokenization @@ -401,14 +403,14 @@ class DataProcessor(object): all_nbest_json[example.qas_id] = nbest_json - with open(output_prediction_file, "w") as writer: + with io.open(output_prediction_file, "w", encoding="utf8") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") - with open(output_nbest_file, "w") as writer: + with io.open(output_nbest_file, "w", encoding="utf8") as writer: writer.write(json.dumps(all_nbest_json, indent=4) + "\n") if with_negative: - with open(output_null_log_odds_file, "w") as writer: + with io.open(output_null_log_odds_file, "w", encoding="utf8") as writer: writer.write(json.dumps(scores_diff_json, indent=4) + "\n") @@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False): """Read a MRQA json file into a list of MRQAExample.""" phase = 'training' if is_training else 'testing' print("loading mrqa {} data...".format(phase)) - with open(input_file, "r") as reader: + with io.open(input_file, "r", encoding="utf8") as reader: input_data = json.load(reader)["data"] def is_whitespace(c): @@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \ assert sample_rate > 0.0 and sample_rate <= 1.0, "sample_rate must be set between 0.0~1.0" print("loading data with json parser...") - with open(data_path, "r") as reader: + with io.open(data_path, "r", encoding="utf8") as reader: data = json.load(reader)["data"] num_raw_examples = 0 diff --git a/utils/batching.py b/utils/batching.py index 13803cf79f00d7ea5857360eeceb2da354a5e626..569a55cb6d276449d4fa7ab7ecec9d080297fd5e 100644 --- a/utils/batching.py +++ b/utils/batching.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -*- coding: utf-8 -*- """Mask, padding and batching.""" from __future__ import absolute_import from __future__ import division diff --git a/utils/configure.py b/utils/configure.py index 2d63b0b0021b64f9d36acb9a80d1ed9385238416..67de31fd09a49fa4e052bd2177fc69150e7479c4 100644 --- a/utils/configure.py +++ b/utils/configure.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division @@ -18,6 +19,7 @@ from __future__ import print_function import os import sys +import io import argparse import json import yaml @@ -38,7 +40,7 @@ class JsonConfig(object): def _parse(self, config_path): try: - with open(config_path) as json_file: + with io.open(config_path, encoding="utf8") as json_file: config_dict = json.load(json_file) assert isinstance(config_dict, dict), "Object in {} is NOT a dict.".format(config_path) except: @@ -216,7 +218,7 @@ class PDConfig(object): raise Warning("the json file %s does not exist." % file_path) return - with open(file_path, "r") as fin: + with io.open(file_path, "r", encoding="utf8") as fin: self.json_config = json.loads(fin.read()) fin.close() @@ -241,7 +243,7 @@ class PDConfig(object): raise Warning("the yaml file %s does not exist." % file_path) return - with open(file_path, "r") as fin: + with io.open(file_path, "r", encoding="utf8") as fin: self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader) fin.close() diff --git a/utils/fp16.py b/utils/fp16.py index e153c2b9a1029897def264278c5dbe72e1f369f5..28e0be4418b4f4232d2e5f57d84593f806442cc4 100644 --- a/utils/fp16.py +++ b/utils/fp16.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -*- coding: utf-8 -*- from __future__ import print_function import paddle diff --git a/utils/init.py b/utils/init.py index abdad1f1e14f38b5613a3c843c7a12fb1adebed8..0e8c47e5addf6512b687cb6ed1f59361cb633ef2 100644 --- a/utils/init.py +++ b/utils/init.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -*- coding: utf-8 -*- from __future__ import print_function diff --git a/utils/tokenization.py b/utils/tokenization.py index 3a52ecf654c07dd1bcfe2b5739f86a4ac8d7e720..8001f95358e6f42181fd8e623bbb35a3203c2f6d 100644 --- a/utils/tokenization.py +++ b/utils/tokenization.py @@ -20,7 +20,7 @@ from __future__ import print_function import collections import unicodedata import six - +import io def convert_to_unicode(text): """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" @@ -68,7 +68,7 @@ def printable_text(text): def load_vocab(vocab_file): """Loads a vocabulary file into a dictionary.""" vocab = collections.OrderedDict() - fin = open(vocab_file) + fin = io.open(vocab_file, encoding="utf8") for num, line in enumerate(fin): items = convert_to_unicode(line.strip()).split("\t") if len(items) > 2: