未验证 提交 a4987b6a 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #8 from xixiaoyao/master

fix bugs
*.pyc
__pycache__
pretrain_model
output_model
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# encoding=utf8
# -*- coding: utf-8 -*-
import os
import sys
......
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# encoding=utf8
# -*- coding: utf-8 -*-
import paddle.fluid as fluid
......
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import paddle.fluid as fluid
from backbone.utils.transformer import pre_process_layer
......
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# encoding=utf8
# -*- coding: utf-8 -*-
import paddle.fluid as fluid
import collections
......
......@@ -17,6 +17,7 @@ import types
import csv
import numpy as np
from utils import tokenization
import io
from utils.batching import prepare_batch_data
......@@ -115,7 +116,7 @@ class BaseProcessor(object):
@classmethod
def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, "r") as f:
with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = []
for line in reader:
......
#encoding=utf8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
import os
import sys
import random
......
......@@ -22,6 +22,7 @@ import gzip
import logging
import re
import six
import io
import collections
from utils import tokenization
from utils.batching import prepare_batch_data
......@@ -126,7 +127,7 @@ class DataProcessor(object):
def load_vocab(self, vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
fin = open(vocab_file)
fin = io.open(vocab_file, encoding='utf8')
for num, line in enumerate(fin):
items = self.convert_to_unicode(line.strip()).split("\t")
if len(items) > 2:
......
......@@ -14,9 +14,11 @@
"""Run MRQA"""
import six
import io
import math
import json
import random
import io
import collections
import numpy as np
from utils import tokenization
......@@ -401,14 +403,14 @@ class DataProcessor(object):
all_nbest_json[example.qas_id] = nbest_json
with open(output_prediction_file, "w") as writer:
with io.open(output_prediction_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_predictions, indent=4) + "\n")
with open(output_nbest_file, "w") as writer:
with io.open(output_nbest_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
if with_negative:
with open(output_null_log_odds_file, "w") as writer:
with io.open(output_null_log_odds_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
......@@ -486,7 +488,7 @@ def read_mrqa_examples(input_file, is_training, with_negative=False):
"""Read a MRQA json file into a list of MRQAExample."""
phase = 'training' if is_training else 'testing'
print("loading mrqa {} data...".format(phase))
with open(input_file, "r") as reader:
with io.open(input_file, "r", encoding="utf8") as reader:
input_data = json.load(reader)["data"]
def is_whitespace(c):
......@@ -736,7 +738,7 @@ def estimate_runtime_examples(data_path, sample_rate, tokenizer, \
assert sample_rate > 0.0 and sample_rate <= 1.0, "sample_rate must be set between 0.0~1.0"
print("loading data with json parser...")
with open(data_path, "r") as reader:
with io.open(data_path, "r", encoding="utf8") as reader:
data = json.load(reader)["data"]
num_raw_examples = 0
......
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
"""Mask, padding and batching."""
from __future__ import absolute_import
from __future__ import division
......
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
......@@ -18,6 +19,7 @@ from __future__ import print_function
import os
import sys
import io
import argparse
import json
import yaml
......@@ -38,7 +40,7 @@ class JsonConfig(object):
def _parse(self, config_path):
try:
with open(config_path) as json_file:
with io.open(config_path, encoding="utf8") as json_file:
config_dict = json.load(json_file)
assert isinstance(config_dict, dict), "Object in {} is NOT a dict.".format(config_path)
except:
......@@ -216,7 +218,7 @@ class PDConfig(object):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
with io.open(file_path, "r", encoding="utf8") as fin:
self.json_config = json.loads(fin.read())
fin.close()
......@@ -241,7 +243,7 @@ class PDConfig(object):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
with io.open(file_path, "r", encoding="utf8") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
......
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
from __future__ import print_function
import paddle
......
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*-
from __future__ import print_function
......
......@@ -20,7 +20,7 @@ from __future__ import print_function
import collections
import unicodedata
import six
import io
def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
......@@ -68,15 +68,15 @@ def printable_text(text):
def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
fin = open(vocab_file)
for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2:
break
token = items[0]
index = items[1] if len(items) == 2 else num
token = token.strip()
vocab[token] = int(index)
with io.open(vocab_file, encoding="utf8") as fin:
for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2:
break
token = items[0]
index = items[1] if len(items) == 2 else num
token = token.strip()
vocab[token] = int(index)
return vocab
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册