提交 dc5abb6f 编写于 作者: Y Yibing Liu 提交者: pkpk

Fix windows encoding problem (#3595) (#3597)

上级 fc437d49
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import io
import os import os
import types import types
import csv import csv
...@@ -100,7 +101,7 @@ class DataProcessor(object): ...@@ -100,7 +101,7 @@ class DataProcessor(object):
@classmethod @classmethod
def _read_tsv(cls, input_file, quotechar=None): def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = [] lines = []
for line in reader: for line in reader:
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
from __future__ import division from __future__ import division
import io
import os import os
import numpy as np import numpy as np
import types import types
...@@ -125,7 +126,7 @@ class DataReader(object): ...@@ -125,7 +126,7 @@ class DataReader(object):
def load_vocab(self, vocab_file): def load_vocab(self, vocab_file):
"""Loads a vocabulary file into a dictionary.""" """Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict() vocab = collections.OrderedDict()
fin = open(vocab_file) fin = io.open(vocab_file, encoding="utf8")
for num, line in enumerate(fin): for num, line in enumerate(fin):
items = self.convert_to_unicode(line.strip()).split("\t") items = self.convert_to_unicode(line.strip()).split("\t")
if len(items) > 2: if len(items) > 2:
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
"""Run BERT on SQuAD 1.1 and SQuAD 2.0.""" """Run BERT on SQuAD 1.1 and SQuAD 2.0."""
import io
import six import six
import math import math
import json import json
...@@ -95,7 +96,7 @@ class InputFeatures(object): ...@@ -95,7 +96,7 @@ class InputFeatures(object):
def read_squad_examples(input_file, is_training, version_2_with_negative=False): def read_squad_examples(input_file, is_training, version_2_with_negative=False):
"""Read a SQuAD json file into a list of SquadExample.""" """Read a SQuAD json file into a list of SquadExample."""
with open(input_file, "r") as reader: with io.open(input_file, "r", encoding="utf8") as reader:
input_data = json.load(reader)["data"] input_data = json.load(reader)["data"]
def is_whitespace(c): def is_whitespace(c):
...@@ -763,15 +764,15 @@ def write_predictions(all_examples, all_features, all_results, n_best_size, ...@@ -763,15 +764,15 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
all_nbest_json[example.qas_id] = nbest_json all_nbest_json[example.qas_id] = nbest_json
with open(output_prediction_file, "w") as writer: with io.open(output_prediction_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_predictions, indent=4) + "\n") writer.write(json.dumps(all_predictions, indent=4) + u"\n")
with open(output_nbest_file, "w") as writer: with io.open(output_nbest_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(all_nbest_json, indent=4) + "\n") writer.write(json.dumps(all_nbest_json, indent=4) + u"\n")
if version_2_with_negative: if version_2_with_negative:
with open(output_null_log_odds_file, "w") as writer: with io.open(output_null_log_odds_file, "w", encoding="utf8") as writer:
writer.write(json.dumps(scores_diff_json, indent=4) + "\n") writer.write(json.dumps(scores_diff_json, indent=4) + u"\n")
def get_final_text(pred_text, orig_text, do_lower_case, verbose): def get_final_text(pred_text, orig_text, do_lower_case, verbose):
......
...@@ -17,6 +17,10 @@ from __future__ import absolute_import ...@@ -17,6 +17,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os import os
import time import time
import argparse import argparse
......
...@@ -17,6 +17,10 @@ from __future__ import absolute_import ...@@ -17,6 +17,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import argparse import argparse
import collections import collections
import multiprocessing import multiprocessing
......
...@@ -21,6 +21,7 @@ from __future__ import print_function ...@@ -21,6 +21,7 @@ from __future__ import print_function
import collections import collections
import unicodedata import unicodedata
import six import six
import io
def convert_to_unicode(text): def convert_to_unicode(text):
...@@ -69,7 +70,7 @@ def printable_text(text): ...@@ -69,7 +70,7 @@ def printable_text(text):
def load_vocab(vocab_file): def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary.""" """Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict() vocab = collections.OrderedDict()
fin = open(vocab_file) fin = io.open(vocab_file, encoding="utf8")
for num, line in enumerate(fin): for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t") items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2: if len(items) > 2:
......
...@@ -17,6 +17,10 @@ from __future__ import absolute_import ...@@ -17,6 +17,10 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os import os
import time import time
import sys import sys
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册