未验证 提交 941869e6 编写于 作者: X xiaoting 提交者: GitHub

Revert "add encoding for open (#10769)" (#10789)

This reverts commit c1b943fb.
上级 c1b943fb
...@@ -118,7 +118,7 @@ class BaseRecLabelEncode(object): ...@@ -118,7 +118,7 @@ class BaseRecLabelEncode(object):
self.lower = True self.lower = True
else: else:
self.character_str = [] self.character_str = []
with open(character_dict_path, "rb", encoding="utf-8") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n") line = line.decode('utf-8').strip("\n").strip("\r\n")
...@@ -278,7 +278,7 @@ class KieLabelEncode(object): ...@@ -278,7 +278,7 @@ class KieLabelEncode(object):
char = line.strip() char = line.strip()
self.dict[char] = idx self.dict[char] = idx
idx += 1 idx += 1
with open(class_path, "r", encoding="utf-8") as fin: with open(class_path, "r") as fin:
lines = fin.readlines() lines = fin.readlines()
for idx, line in enumerate(lines): for idx, line in enumerate(lines):
line = line.strip("\n") line = line.strip("\n")
...@@ -640,7 +640,7 @@ class TableLabelEncode(AttnLabelEncode): ...@@ -640,7 +640,7 @@ class TableLabelEncode(AttnLabelEncode):
self.replace_empty_cell_token = replace_empty_cell_token self.replace_empty_cell_token = replace_empty_cell_token
dict_character = [] dict_character = []
with open(character_dict_path, "rb", encoding="utf-8") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n") line = line.decode('utf-8').strip("\n").strip("\r\n")
...@@ -1380,7 +1380,7 @@ class SRLabelEncode(BaseRecLabelEncode): ...@@ -1380,7 +1380,7 @@ class SRLabelEncode(BaseRecLabelEncode):
super(SRLabelEncode, self).__init__(max_text_length, super(SRLabelEncode, self).__init__(max_text_length,
character_dict_path, use_space_char) character_dict_path, use_space_char)
self.dic = {} self.dic = {}
with open(character_dict_path, 'r', encoding="utf-8") as fin: with open(character_dict_path, 'r') as fin:
for line in fin.readlines(): for line in fin.readlines():
line = line.strip() line = line.strip()
character, sequence = line.split() character, sequence = line.split()
......
...@@ -59,7 +59,7 @@ class PubTabDataSet(Dataset): ...@@ -59,7 +59,7 @@ class PubTabDataSet(Dataset):
file_list = [file_list] file_list = [file_list]
data_lines = [] data_lines = []
for idx, file in enumerate(file_list): for idx, file in enumerate(file_list):
with open(file, "rb", encoding="utf-8") as f: with open(file, "rb") as f:
lines = f.readlines() lines = f.readlines()
if self.mode == "train" or ratio_list[idx] < 1.0: if self.mode == "train" or ratio_list[idx] < 1.0:
random.seed(self.seed) random.seed(self.seed)
...@@ -112,7 +112,7 @@ class PubTabDataSet(Dataset): ...@@ -112,7 +112,7 @@ class PubTabDataSet(Dataset):
'file_name': file_name 'file_name': file_name
} }
with open(data['img_path'], 'rb', encoding="utf-8") as f: with open(data['img_path'], 'rb') as f:
img = f.read() img = f.read()
data['image'] = img data['image'] = img
outs = transform(data, self.ops) outs = transform(data, self.ops)
......
...@@ -74,7 +74,7 @@ class SimpleDataSet(Dataset): ...@@ -74,7 +74,7 @@ class SimpleDataSet(Dataset):
file_list = [file_list] file_list = [file_list]
data_lines = [] data_lines = []
for idx, file in enumerate(file_list): for idx, file in enumerate(file_list):
with open(file, "rb", encoding="utf-8") as f: with open(file, "rb") as f:
lines = f.readlines() lines = f.readlines()
if self.mode == "train" or ratio_list[idx] < 1.0: if self.mode == "train" or ratio_list[idx] < 1.0:
random.seed(self.seed) random.seed(self.seed)
...@@ -120,7 +120,7 @@ class SimpleDataSet(Dataset): ...@@ -120,7 +120,7 @@ class SimpleDataSet(Dataset):
data = {'img_path': img_path, 'label': label} data = {'img_path': img_path, 'label': label}
if not os.path.exists(img_path): if not os.path.exists(img_path):
continue continue
with open(data['img_path'], 'rb', encoding="utf-8") as f: with open(data['img_path'], 'rb') as f:
img = f.read() img = f.read()
data['image'] = img data['image'] = img
data = transform(data, load_data_ops) data = transform(data, load_data_ops)
...@@ -146,7 +146,7 @@ class SimpleDataSet(Dataset): ...@@ -146,7 +146,7 @@ class SimpleDataSet(Dataset):
data = {'img_path': img_path, 'label': label} data = {'img_path': img_path, 'label': label}
if not os.path.exists(img_path): if not os.path.exists(img_path):
raise Exception("{} does not exist!".format(img_path)) raise Exception("{} does not exist!".format(img_path))
with open(data['img_path'], 'rb', encoding="utf-8") as f: with open(data['img_path'], 'rb') as f:
img = f.read() img = f.read()
data['image'] = img data['image'] = img
data['ext_data'] = self.get_ext_data() data['ext_data'] = self.get_ext_data()
...@@ -240,7 +240,7 @@ class MultiScaleDataSet(SimpleDataSet): ...@@ -240,7 +240,7 @@ class MultiScaleDataSet(SimpleDataSet):
data = {'img_path': img_path, 'label': label} data = {'img_path': img_path, 'label': label}
if not os.path.exists(img_path): if not os.path.exists(img_path):
raise Exception("{} does not exist!".format(img_path)) raise Exception("{} does not exist!".format(img_path))
with open(data['img_path'], 'rb', encoding="utf-8") as f: with open(data['img_path'], 'rb') as f:
img = f.read() img = f.read()
data['image'] = img data['image'] = img
data['ext_data'] = self.get_ext_data() data['ext_data'] = self.get_ext_data()
......
...@@ -31,7 +31,7 @@ class BaseRecLabelDecode(object): ...@@ -31,7 +31,7 @@ class BaseRecLabelDecode(object):
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
else: else:
with open(character_dict_path, "rb", encoding="utf-8") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n") line = line.decode('utf-8').strip("\n").strip("\r\n")
......
...@@ -26,7 +26,7 @@ class TableLabelDecode(AttnLabelDecode): ...@@ -26,7 +26,7 @@ class TableLabelDecode(AttnLabelDecode):
merge_no_span_structure=False, merge_no_span_structure=False,
**kwargs): **kwargs):
dict_character = [] dict_character = []
with open(character_dict_path, "rb", encoding="utf-8") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n") line = line.decode('utf-8').strip("\n").strip("\r\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册