提交 dc5abb6f 编写于 作者: Y Yibing Liu 提交者: pkpk

Fix windows encoding problem (#3595) (#3597)

上级 fc437d49
......@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import types
import csv
......@@ -100,7 +101,7 @@ class DataProcessor(object):
@classmethod
def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, "r") as f:
with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = []
for line in reader:
......
......@@ -15,6 +15,7 @@
from __future__ import print_function
from __future__ import division
import io
import os
import numpy as np
import types
......@@ -125,7 +126,7 @@ class DataReader(object):
def load_vocab(self, vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
fin = open(vocab_file)
fin = io.open(vocab_file, encoding="utf8")
for num, line in enumerate(fin):
items = self.convert_to_unicode(line.strip()).split("\t")
if len(items) > 2:
......
......@@ -14,6 +14,7 @@
# limitations under the License.
"""Run BERT on SQuAD 1.1 and SQuAD 2.0."""
import io
import six
import math
import json
......@@ -95,7 +96,7 @@ class InputFeatures(object):
def read_squad_examples(input_file, is_training, version_2_with_negative=False):
"""Read a SQuAD json file into a list of SquadExample."""
with open(input_file, "r") as reader:
with io.open(input_file, "r", encoding="utf8") as reader:
input_data = json.load(reader)["data"]
def is_whitespace(c):
......@@ -763,15 +764,15 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
all_nbest_json[example.qas_id] = nbest_json
with open(output_prediction_file, "w") as writer:
writer.write(json.dumps(all_predictions, indent=4) + "\n")
with io.open(output_prediction_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_predictions, indent=4) + u"\n")
with open(output_nbest_file, "w") as writer:
writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
with io.open(output_nbest_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_nbest_json, indent=4) + u"\n")
if version_2_with_negative:
with open(output_null_log_odds_file, "w") as writer:
writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
with io.open(output_null_log_odds_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(scores_diff_json, indent=4) + u"\n")
def get_final_text(pred_text, orig_text, do_lower_case, verbose):
......
......@@ -17,6 +17,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os
import time
import argparse
......
......@@ -17,6 +17,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import argparse
import collections
import multiprocessing
......
......@@ -21,6 +21,7 @@ from __future__ import print_function
import collections
import unicodedata
import six
import io
def convert_to_unicode(text):
......@@ -69,7 +70,7 @@ def printable_text(text):
def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
fin = open(vocab_file)
fin = io.open(vocab_file, encoding="utf8")
for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2:
......
......@@ -17,6 +17,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os
import time
import sys
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册