From dc51469b42c3e29c6342c52e669fd449bf75dc36 Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Sun, 19 Dec 2021 07:10:03 +0000 Subject: [PATCH] add encoding='utf-8' --- ppstructure/vqa/eval_ser.py | 8 ++++++-- ppstructure/vqa/helper/eval_with_label_end2end.py | 2 +- ppstructure/vqa/helper/trans_xfun_data.py | 4 ++-- ppstructure/vqa/infer_re.py | 2 +- ppstructure/vqa/infer_ser.py | 10 +++++++--- ppstructure/vqa/infer_ser_e2e.py | 5 ++++- ppstructure/vqa/infer_ser_re_e2e.py | 5 ++++- ppstructure/vqa/utils.py | 2 +- ppstructure/vqa/xfun.py | 2 +- 9 files changed, 27 insertions(+), 13 deletions(-) diff --git a/ppstructure/vqa/eval_ser.py b/ppstructure/vqa/eval_ser.py index e56aa27c..e0612219 100644 --- a/ppstructure/vqa/eval_ser.py +++ b/ppstructure/vqa/eval_ser.py @@ -128,12 +128,16 @@ def evaluate(args, "f1": f1_score(out_label_list, preds_list), } - with open(os.path.join(args.output_dir, "test_gt.txt"), "w") as fout: + with open( + os.path.join(args.output_dir, "test_gt.txt"), "w", + encoding='utf-8') as fout: for lbl in out_label_list: for l in lbl: fout.write(l + "\t") fout.write("\n") - with open(os.path.join(args.output_dir, "test_pred.txt"), "w") as fout: + with open( + os.path.join(args.output_dir, "test_pred.txt"), "w", + encoding='utf-8') as fout: for lbl in preds_list: for l in lbl: fout.write(l + "\t") diff --git a/ppstructure/vqa/helper/eval_with_label_end2end.py b/ppstructure/vqa/helper/eval_with_label_end2end.py index a983b080..3aa439ac 100644 --- a/ppstructure/vqa/helper/eval_with_label_end2end.py +++ b/ppstructure/vqa/helper/eval_with_label_end2end.py @@ -37,7 +37,7 @@ def parse_ser_results_fp(fp, fp_type="gt", ignore_background=True): assert fp_type in ["gt", "pred"] key = "label" if fp_type == "gt" else "pred" res_dict = dict() - with open(fp, "r") as fin: + with open(fp, "r", encoding='utf-8') as fin: lines = fin.readlines() for _, line in enumerate(lines): diff --git a/ppstructure/vqa/helper/trans_xfun_data.py b/ppstructure/vqa/helper/trans_xfun_data.py index b5ebd5df..25b3963d 100644 --- a/ppstructure/vqa/helper/trans_xfun_data.py +++ b/ppstructure/vqa/helper/trans_xfun_data.py @@ -16,13 +16,13 @@ import json def transfer_xfun_data(json_path=None, output_file=None): - with open(json_path, "r") as fin: + with open(json_path, "r", encoding='utf-8') as fin: lines = fin.readlines() json_info = json.loads(lines[0]) documents = json_info["documents"] label_info = {} - with open(output_file, "w") as fout: + with open(output_file, "w", encoding='utf-8') as fout: for idx, document in enumerate(documents): img_info = document["img"] document = document["document"] diff --git a/ppstructure/vqa/infer_re.py b/ppstructure/vqa/infer_re.py index ae2f5255..139ade9a 100644 --- a/ppstructure/vqa/infer_re.py +++ b/ppstructure/vqa/infer_re.py @@ -92,7 +92,7 @@ def infer(args): def load_ocr(img_folder, json_path): import json d = [] - with open(json_path, "r") as fin: + with open(json_path, "r", encoding='utf-8') as fin: lines = fin.readlines() for line in lines: image_name, info_str = line.split("\t") diff --git a/ppstructure/vqa/infer_ser.py b/ppstructure/vqa/infer_ser.py index 4ad22009..3c7ad03a 100644 --- a/ppstructure/vqa/infer_ser.py +++ b/ppstructure/vqa/infer_ser.py @@ -59,7 +59,8 @@ def pad_sentences(tokenizer, encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0] ] * difference else: - assert False, f"padding_side of tokenizer just supports [\"right\"] but got {tokenizer.padding_side}" + assert False, "padding_side of tokenizer just supports [\"right\"] but got {}".format( + tokenizer.padding_side) else: if return_attention_mask: encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ @@ -224,7 +225,7 @@ def infer(args): # load ocr results json ocr_results = dict() - with open(args.ocr_json_path, "r") as fin: + with open(args.ocr_json_path, "r", encoding='utf-8') as fin: lines = fin.readlines() for line in lines: img_name, json_info = line.split("\t") @@ -234,7 +235,10 @@ def infer(args): infer_imgs = get_image_file_list(args.infer_imgs) # loop for infer - with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout: + with open( + os.path.join(args.output_dir, "infer_results.txt"), + "w", + encoding='utf-8') as fout: for idx, img_path in enumerate(infer_imgs): print("process: [{}/{}]".format(idx, len(infer_imgs), img_path)) diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py index 7cd9907d..a93a98ff 100644 --- a/ppstructure/vqa/infer_ser_e2e.py +++ b/ppstructure/vqa/infer_ser_e2e.py @@ -113,7 +113,10 @@ if __name__ == "__main__": # loop for infer ser_engine = SerPredictor(args) - with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout: + with open( + os.path.join(args.output_dir, "infer_results.txt"), + "w", + encoding='utf-8') as fout: for idx, img_path in enumerate(infer_imgs): print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path)) diff --git a/ppstructure/vqa/infer_ser_re_e2e.py b/ppstructure/vqa/infer_ser_re_e2e.py index a1d0f52e..23737406 100644 --- a/ppstructure/vqa/infer_ser_re_e2e.py +++ b/ppstructure/vqa/infer_ser_re_e2e.py @@ -112,7 +112,10 @@ if __name__ == "__main__": # loop for infer ser_re_engine = SerReSystem(args) - with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout: + with open( + os.path.join(args.output_dir, "infer_results.txt"), + "w", + encoding='utf-8') as fout: for idx, img_path in enumerate(infer_imgs): print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path)) diff --git a/ppstructure/vqa/utils.py b/ppstructure/vqa/utils.py index 7e862e97..44a62980 100644 --- a/ppstructure/vqa/utils.py +++ b/ppstructure/vqa/utils.py @@ -32,7 +32,7 @@ def set_seed(seed): def get_bio_label_maps(label_map_path): - with open(label_map_path, "r") as fin: + with open(label_map_path, "r", encoding='utf-8') as fin: lines = fin.readlines() lines = [line.strip() for line in lines] if "O" not in lines: diff --git a/ppstructure/vqa/xfun.py b/ppstructure/vqa/xfun.py index d62cdb5d..eb9750dd 100644 --- a/ppstructure/vqa/xfun.py +++ b/ppstructure/vqa/xfun.py @@ -162,7 +162,7 @@ class XFUNDataset(Dataset): return encoded_inputs def read_all_lines(self, ): - with open(self.label_path, "r") as fin: + with open(self.label_path, "r", encoding='utf-8') as fin: lines = fin.readlines() return lines -- GitLab