diff --git a/configs/kie/kie_unet_sdmgr.yml b/configs/kie/kie_unet_sdmgr.yml index a6968aaa3aa7a717a848416efc5ccc567f774b4d..da2e4fda504fcbff280788b0baf6d803cf75fe4b 100644 --- a/configs/kie/kie_unet_sdmgr.yml +++ b/configs/kie/kie_unet_sdmgr.yml @@ -17,7 +17,7 @@ Global: checkpoints: save_inference_dir: use_visualdl: False - class_path: ./train_data/wildreceipt/class_list.txt + class_path: &class_path ./train_data/wildreceipt/class_list.txt infer_img: ./train_data/wildreceipt/1.txt save_res_path: ./output/sdmgr_kie/predicts_kie.txt img_scale: [ 1024, 512 ] @@ -72,6 +72,7 @@ Train: order: 'hwc' - KieLabelEncode: # Class handling label character_dict_path: ./train_data/wildreceipt/dict.txt + class_path: *class_path - KieResize: - ToCHWImage: - KeepKeys: @@ -88,7 +89,6 @@ Eval: data_dir: ./train_data/wildreceipt label_file_list: - ./train_data/wildreceipt/wildreceipt_test.txt - # - /paddle/data/PaddleOCR/train_data/wildreceipt/1.txt transforms: - DecodeImage: # load image img_mode: RGB diff --git a/configs/vqa/ser/layoutlm.yml b/configs/vqa/ser/layoutlm.yml index 87131170c9daabd8553269b900893ac26fb32bc8..47ab093e1fce5cb38a75409eb1d9ac67c6426ba4 100644 --- a/configs/vqa/ser/layoutlm.yml +++ b/configs/vqa/ser/layoutlm.yml @@ -43,7 +43,7 @@ Optimizer: PostProcess: name: VQASerTokenLayoutLMPostProcess - class_path: &class_path ppstructure/vqa/labels/labels_ser.txt + class_path: &class_path train_data/XFUND/class_list_xfun.txt Metric: name: VQASerTokenMetric @@ -54,7 +54,7 @@ Train: name: SimpleDataSet data_dir: train_data/XFUND/zh_train/image label_file_list: - - train_data/XFUND/zh_train/xfun_normalize_train.json + - train_data/XFUND/zh_train/train.json transforms: - DecodeImage: # load image img_mode: RGB @@ -89,7 +89,7 @@ Eval: name: SimpleDataSet data_dir: train_data/XFUND/zh_val/image label_file_list: - - train_data/XFUND/zh_val/xfun_normalize_val.json + - train_data/XFUND/zh_val/val.json transforms: - DecodeImage: # load image img_mode: RGB diff --git a/configs/vqa/ser/layoutlmv2.yml b/configs/vqa/ser/layoutlmv2.yml index 33406252b31adf4175d7ea2f57772b0faf33cdab..d6a9c03e5ec9683a3a6423ed22a98f361769541f 100644 --- a/configs/vqa/ser/layoutlmv2.yml +++ b/configs/vqa/ser/layoutlmv2.yml @@ -44,7 +44,7 @@ Optimizer: PostProcess: name: VQASerTokenLayoutLMPostProcess - class_path: &class_path ppstructure/vqa/labels/labels_ser.txt + class_path: &class_path train_data/XFUND/class_list_xfun.txt Metric: name: VQASerTokenMetric @@ -55,7 +55,7 @@ Train: name: SimpleDataSet data_dir: train_data/XFUND/zh_train/image label_file_list: - - train_data/XFUND/zh_train/xfun_normalize_train.json + - train_data/XFUND/zh_train/train.json transforms: - DecodeImage: # load image img_mode: RGB @@ -90,7 +90,7 @@ Eval: name: SimpleDataSet data_dir: train_data/XFUND/zh_val/image label_file_list: - - train_data/XFUND/zh_val/xfun_normalize_val.json + - train_data/XFUND/zh_val/val.json transforms: - DecodeImage: # load image img_mode: RGB diff --git a/configs/vqa/ser/layoutxlm.yml b/configs/vqa/ser/layoutxlm.yml index eb1cca5a215dd65ef9c302441d05b482f2622a79..3686989ccf7481a28584fd49c3969c1a69cd04d4 100644 --- a/configs/vqa/ser/layoutxlm.yml +++ b/configs/vqa/ser/layoutxlm.yml @@ -11,7 +11,7 @@ Global: save_inference_dir: use_visualdl: False seed: 2022 - infer_img: doc/vqa/input/zh_val_42.jpg + infer_img: ppstructure/docs/vqa/input/zh_val_42.jpg save_res_path: ./output/ser Architecture: @@ -54,7 +54,7 @@ Train: name: SimpleDataSet data_dir: train_data/XFUND/zh_train/image label_file_list: - - train_data/XFUND/zh_train/xfun_normalize_train.json + - train_data/XFUND/zh_train/train.json ratio_list: [ 1.0 ] transforms: - DecodeImage: # load image @@ -90,7 +90,7 @@ Eval: name: SimpleDataSet data_dir: train_data/XFUND/zh_val/image label_file_list: - - train_data/XFUND/zh_val/xfun_normalize_val.json + - train_data/XFUND/zh_val/val.json transforms: - DecodeImage: # load image img_mode: RGB diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 312d6dc9ad25bfa73aa9009f932fe6f3d3ca7644..c95b326224515c43b03f90ee51c809006399dfff 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -259,15 +259,26 @@ class E2ELabelEncodeTrain(object): class KieLabelEncode(object): - def __init__(self, character_dict_path, norm=10, directed=False, **kwargs): + def __init__(self, + character_dict_path, + class_path, + norm=10, + directed=False, + **kwargs): super(KieLabelEncode, self).__init__() self.dict = dict({'': 0}) + self.label2classid_map = dict() with open(character_dict_path, 'r', encoding='utf-8') as fr: idx = 1 for line in fr: char = line.strip() self.dict[char] = idx idx += 1 + with open(class_path, "r") as fin: + lines = fin.readlines() + for idx, line in enumerate(lines): + line = line.strip("\n") + self.label2classid_map[line] = idx self.norm = norm self.directed = directed @@ -408,7 +419,7 @@ class KieLabelEncode(object): text_ind = [self.dict[c] for c in text if c in self.dict] text_inds.append(text_ind) if 'label' in ann.keys(): - labels.append(ann['label']) + labels.append(self.label2classid_map[ann['label']]) elif 'key_cls' in ann.keys(): labels.append(ann['key_cls']) else: @@ -876,15 +887,16 @@ class VQATokenLabelEncode(object): for info in ocr_info: if train_re: # for re - if len(info["text"]) == 0: + if len(info["transcription"]) == 0: empty_entity.add(info["id"]) continue id2label[info["id"]] = info["label"] relations.extend([tuple(sorted(l)) for l in info["linking"]]) # smooth_box + info["bbox"] = self.trans_poly_to_bbox(info["points"]) bbox = self._smooth_box(info["bbox"], height, width) - text = info["text"] + text = info["transcription"] encode_res = self.tokenizer.encode( text, pad_to_max_seq_len=False, return_attention_mask=True) @@ -900,7 +912,7 @@ class VQATokenLabelEncode(object): label = info['label'] gt_label = self._parse_label(label, encode_res) - # construct entities for re +# construct entities for re if train_re: if gt_label[0] != self.label2id_map["O"]: entity_id_to_index_map[info["id"]] = len(entities) @@ -944,29 +956,29 @@ class VQATokenLabelEncode(object): data['entity_id_to_index_map'] = entity_id_to_index_map return data - def _load_ocr_info(self, data): - def trans_poly_to_bbox(poly): - x1 = np.min([p[0] for p in poly]) - x2 = np.max([p[0] for p in poly]) - y1 = np.min([p[1] for p in poly]) - y2 = np.max([p[1] for p in poly]) - return [x1, y1, x2, y2] + def trans_poly_to_bbox(self, poly): + x1 = np.min([p[0] for p in poly]) + x2 = np.max([p[0] for p in poly]) + y1 = np.min([p[1] for p in poly]) + y2 = np.max([p[1] for p in poly]) + return [x1, y1, x2, y2] + def _load_ocr_info(self, data): if self.infer_mode: ocr_result = self.ocr_engine.ocr(data['image'], cls=False) ocr_info = [] for res in ocr_result: ocr_info.append({ - "text": res[1][0], - "bbox": trans_poly_to_bbox(res[0]), - "poly": res[0], + "transcription": res[1][0], + "bbox": self.trans_poly_to_bbox(res[0]), + "points": res[0], }) return ocr_info else: info = data['label'] # read text info info_dict = json.loads(info) - return info_dict["ocr_info"] + return info_dict def _smooth_box(self, bbox, height, width): bbox[0] = int(bbox[0] * 1000.0 / width) @@ -977,7 +989,7 @@ class VQATokenLabelEncode(object): def _parse_label(self, label, encode_res): gt_label = [] - if label.lower() == "other": + if label.lower() in ["other", "others", "ignore"]: gt_label.extend([0] * len(encode_res["input_ids"])) else: gt_label.append(self.label2id_map[("b-" + label).upper()]) diff --git a/ppocr/data/imaug/vqa/__init__.py b/ppocr/data/imaug/vqa/__init__.py index a5025e7985198e7ee40d6c92d8e1814eb1797032..bde175115536a3f644750260082204fe5f10dc05 100644 --- a/ppocr/data/imaug/vqa/__init__.py +++ b/ppocr/data/imaug/vqa/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. from .token import VQATokenPad, VQASerTokenChunk, VQAReTokenChunk, VQAReTokenRelation +from .augment import DistortBBox __all__ = [ - 'VQATokenPad', 'VQASerTokenChunk', 'VQAReTokenChunk', 'VQAReTokenRelation' + 'VQATokenPad', + 'VQASerTokenChunk', + 'VQAReTokenChunk', + 'VQAReTokenRelation', + 'DistortBBox', ] diff --git a/ppocr/data/imaug/vqa/augment.py b/ppocr/data/imaug/vqa/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..fcdc9685e9855c3a2d8e9f6f5add270f95f15a6c --- /dev/null +++ b/ppocr/data/imaug/vqa/augment.py @@ -0,0 +1,37 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import numpy as np +import random + + +class DistortBBox: + def __init__(self, prob=0.5, max_scale=1, **kwargs): + """Random distort bbox + """ + self.prob = prob + self.max_scale = max_scale + + def __call__(self, data): + if random.random() > self.prob: + return data + bbox = np.array(data['bbox']) + rnd_scale = (np.random.rand(*bbox.shape) - 0.5) * 2 * self.max_scale + bbox = np.round(bbox + rnd_scale).astype(bbox.dtype) + data['bbox'] = np.clip(data['bbox'], 0, 1000) + data['bbox'] = bbox.tolist() + sys.stdout.flush() + return data diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py index 4a25ff8b2fa182faaf4f4ce8909c9ec2e9b55ccc..b881fcab20bc5ca076a0002bd72349768c7d881a 100755 --- a/ppocr/utils/utility.py +++ b/ppocr/utils/utility.py @@ -91,18 +91,19 @@ def check_and_read_gif(img_path): def load_vqa_bio_label_maps(label_map_path): with open(label_map_path, "r", encoding='utf-8') as fin: lines = fin.readlines() - lines = [line.strip() for line in lines] - if "O" not in lines: - lines.insert(0, "O") - labels = [] - for line in lines: - if line == "O": - labels.append("O") - else: - labels.append("B-" + line) - labels.append("I-" + line) - label2id_map = {label: idx for idx, label in enumerate(labels)} - id2label_map = {idx: label for idx, label in enumerate(labels)} + old_lines = [line.strip() for line in lines] + lines = ["O"] + for line in old_lines: + # "O" has already been in lines + if line.upper() in ["OTHER", "OTHERS", "IGNORE"]: + continue + lines.append(line) + labels = ["O"] + for line in lines[1:]: + labels.append("B-" + line) + labels.append("I-" + line) + label2id_map = {label.upper(): idx for idx, label in enumerate(labels)} + id2label_map = {idx: label.upper() for idx, label in enumerate(labels)} return label2id_map, id2label_map diff --git a/ppocr/utils/visual.py b/ppocr/utils/visual.py index 7a8c1674a74f89299de59f7cd120b4577a7499d8..235eb572a3975b4446ae2f2c9ad9c8558d5c5ad8 100644 --- a/ppocr/utils/visual.py +++ b/ppocr/utils/visual.py @@ -19,7 +19,7 @@ from PIL import Image, ImageDraw, ImageFont def draw_ser_results(image, ocr_results, font_path="doc/fonts/simfang.ttf", - font_size=18): + font_size=14): np.random.seed(2021) color = (np.random.permutation(range(255)), np.random.permutation(range(255)), @@ -40,9 +40,15 @@ def draw_ser_results(image, if ocr_info["pred_id"] not in color_map: continue color = color_map[ocr_info["pred_id"]] - text = "{}: {}".format(ocr_info["pred"], ocr_info["text"]) + text = "{}: {}".format(ocr_info["pred"], ocr_info["transcription"]) - draw_box_txt(ocr_info["bbox"], text, draw, font, font_size, color) + if "bbox" in ocr_info: + # draw with ocr engine + bbox = ocr_info["bbox"] + else: + # draw with ocr groundtruth + bbox = trans_poly_to_bbox(ocr_info["points"]) + draw_box_txt(bbox, text, draw, font, font_size, color) img_new = Image.blend(image, img_new, 0.5) return np.array(img_new) @@ -62,6 +68,14 @@ def draw_box_txt(bbox, text, draw, font, font_size, color): draw.text((bbox[0][0] + 1, start_y), text, fill=(255, 255, 255), font=font) +def trans_poly_to_bbox(poly): + x1 = np.min([p[0] for p in poly]) + x2 = np.max([p[0] for p in poly]) + y1 = np.min([p[1] for p in poly]) + y2 = np.max([p[1] for p in poly]) + return [x1, y1, x2, y2] + + def draw_re_results(image, result, font_path="doc/fonts/simfang.ttf", @@ -80,10 +94,10 @@ def draw_re_results(image, color_line = (0, 255, 0) for ocr_info_head, ocr_info_tail in result: - draw_box_txt(ocr_info_head["bbox"], ocr_info_head["text"], draw, font, - font_size, color_head) - draw_box_txt(ocr_info_tail["bbox"], ocr_info_tail["text"], draw, font, - font_size, color_tail) + draw_box_txt(ocr_info_head["bbox"], ocr_info_head["transcription"], + draw, font, font_size, color_head) + draw_box_txt(ocr_info_tail["bbox"], ocr_info_tail["transcription"], + draw, font, font_size, color_tail) center_head = ( (ocr_info_head['bbox'][0] + ocr_info_head['bbox'][2]) // 2, diff --git a/ppstructure/docs/kie.md b/ppstructure/docs/kie.md index 35498b33478d1010fd2548dfcb8586b4710723a1..315dd9f7bafa6b6160489eab330e8d278b2d119d 100644 --- a/ppstructure/docs/kie.md +++ b/ppstructure/docs/kie.md @@ -16,7 +16,7 @@ SDMGR是一个关键信息提取算法,将每个检测到的文本区域分类 训练和测试的数据采用wildreceipt数据集,通过如下指令下载数据集: ``` -wget https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/wildreceipt.tar && tar xf wildreceipt.tar +wget https://paddleocr.bj.bcebos.com/ppstructure/dataset/wildreceipt.tar && tar xf wildreceipt.tar ``` 执行预测: diff --git a/ppstructure/docs/kie_en.md b/ppstructure/docs/kie_en.md index 1fe38b0b399e9290526dafa5409673dc87026db7..7b3752223dd765e780d56d146c90bd0f892aac7b 100644 --- a/ppstructure/docs/kie_en.md +++ b/ppstructure/docs/kie_en.md @@ -15,7 +15,7 @@ This section provides a tutorial example on how to quickly use, train, and evalu [Wildreceipt dataset](https://paperswithcode.com/dataset/wildreceipt) is used for this tutorial. It contains 1765 photos, with 25 classes, and 50000 text boxes, which can be downloaded by wget: ```shell -wget https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/wildreceipt.tar && tar xf wildreceipt.tar +wget https://paddleocr.bj.bcebos.com/ppstructure/dataset/wildreceipt.tar && tar xf wildreceipt.tar ``` Download the pretrained model and predict the result: diff --git a/ppstructure/vqa/README.md b/ppstructure/vqa/README.md index e3a10671ddb6494eb15073e7ac007aa1e8e6a32a..711ffa313865cd5a210143819cd4604dc28ef4f4 100644 --- a/ppstructure/vqa/README.md +++ b/ppstructure/vqa/README.md @@ -125,13 +125,13 @@ If you want to experience the prediction process directly, you can download the * Download the processed dataset -The download address of the processed XFUND Chinese dataset: [https://paddleocr.bj.bcebos.com/dataset/XFUND.tar](https://paddleocr.bj.bcebos.com/dataset/XFUND.tar). +The download address of the processed XFUND Chinese dataset: [link](https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar). Download and unzip the dataset, and place the dataset in the current directory after unzipping. ```shell -wget https://paddleocr.bj.bcebos.com/dataset/XFUND.tar +wget https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar ```` * Convert the dataset diff --git a/ppstructure/vqa/README_ch.md b/ppstructure/vqa/README_ch.md index b677dc07bce6c1a752d753b6a1c538b4d3f99271..297ba64f82e70eafd4a0b1fee0764899799219ad 100644 --- a/ppstructure/vqa/README_ch.md +++ b/ppstructure/vqa/README_ch.md @@ -122,13 +122,13 @@ python3 -m pip install -r ppstructure/vqa/requirements.txt * 下载处理好的数据集 -处理好的XFUND中文数据集下载地址:[https://paddleocr.bj.bcebos.com/dataset/XFUND.tar](https://paddleocr.bj.bcebos.com/dataset/XFUND.tar)。 +处理好的XFUND中文数据集下载地址:[链接](https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar)。 下载并解压该数据集,解压后将数据集放置在当前目录下。 ```shell -wget https://paddleocr.bj.bcebos.com/dataset/XFUND.tar +wget https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar ``` * 转换数据集 diff --git a/ppstructure/vqa/labels/labels_ser.txt b/ppstructure/vqa/labels/labels_ser.txt deleted file mode 100644 index 508e48112412f62538baf0c78bcf99ec8945196e..0000000000000000000000000000000000000000 --- a/ppstructure/vqa/labels/labels_ser.txt +++ /dev/null @@ -1,3 +0,0 @@ -QUESTION -ANSWER -HEADER diff --git a/ppstructure/vqa/tools/trans_xfun_data.py b/ppstructure/vqa/tools/trans_xfun_data.py index 93ec98163c6cec96ec93399c1d41524200ddc499..11d221bea40367f091b3e09dde42e87f2217a617 100644 --- a/ppstructure/vqa/tools/trans_xfun_data.py +++ b/ppstructure/vqa/tools/trans_xfun_data.py @@ -21,26 +21,22 @@ def transfer_xfun_data(json_path=None, output_file=None): json_info = json.loads(lines[0]) documents = json_info["documents"] - label_info = {} with open(output_file, "w", encoding='utf-8') as fout: for idx, document in enumerate(documents): + label_info = [] img_info = document["img"] document = document["document"] image_path = img_info["fname"] - label_info["height"] = img_info["height"] - label_info["width"] = img_info["width"] - - label_info["ocr_info"] = [] - for doc in document: - label_info["ocr_info"].append({ - "text": doc["text"], + x1, y1, x2, y2 = doc["box"] + points = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] + label_info.append({ + "transcription": doc["text"], "label": doc["label"], - "bbox": doc["box"], + "points": points, "id": doc["id"], - "linking": doc["linking"], - "words": doc["words"] + "linking": doc["linking"] }) fout.write(image_path + "\t" + json.dumps( diff --git a/tools/infer_kie.py b/tools/infer_kie.py index 0cb0b8702cbd7ea74a7b7fcff69122731578a1bd..346e2e0aeeee695ab49577b6b13dcc058150df1a 100755 --- a/tools/infer_kie.py +++ b/tools/infer_kie.py @@ -39,13 +39,12 @@ import time def read_class_list(filepath): - dict = {} + ret = {} with open(filepath, "r") as f: lines = f.readlines() - for line in lines: - key, value = line.split(" ") - dict[key] = value.rstrip() - return dict + for idx, line in enumerate(lines): + ret[idx] = line.strip("\n") + return ret def draw_kie_result(batch, node, idx_to_cls, count): @@ -71,7 +70,7 @@ def draw_kie_result(batch, node, idx_to_cls, count): x_min = int(min([point[0] for point in new_box])) y_min = int(min([point[1] for point in new_box])) - pred_label = str(node_pred_label[i]) + pred_label = node_pred_label[i] if pred_label in idx_to_cls: pred_label = idx_to_cls[pred_label] pred_score = '{:.2f}'.format(node_pred_score[i]) @@ -109,8 +108,7 @@ def main(): save_res_path = config['Global']['save_res_path'] class_path = config['Global']['class_path'] idx_to_cls = read_class_list(class_path) - if not os.path.exists(os.path.dirname(save_res_path)): - os.makedirs(os.path.dirname(save_res_path)) + os.makedirs(os.path.dirname(save_res_path), exist_ok=True) model.eval() diff --git a/tools/infer_vqa_token_ser.py b/tools/infer_vqa_token_ser.py index 83ed72b392e627c161903c3945f57be0abfabc2b..39ada64a99847a910158b74672c89398ba08f032 100755 --- a/tools/infer_vqa_token_ser.py +++ b/tools/infer_vqa_token_ser.py @@ -86,15 +86,16 @@ class SerPredictor(object): ] transforms.append(op) - global_config['infer_mode'] = True + if config["Global"].get("infer_mode", None) is None: + global_config['infer_mode'] = True self.ops = create_operators(config['Eval']['dataset']['transforms'], global_config) self.model.eval() - def __call__(self, img_path): - with open(img_path, 'rb') as f: + def __call__(self, data): + with open(data["img_path"], 'rb') as f: img = f.read() - data = {'image': img} + data["image"] = img batch = transform(data, self.ops) batch = to_tensor(batch) preds = self.model(batch) @@ -112,20 +113,35 @@ if __name__ == '__main__': ser_engine = SerPredictor(config) - infer_imgs = get_image_file_list(config['Global']['infer_img']) + if config["Global"].get("infer_mode", None) is False: + data_dir = config['Eval']['dataset']['data_dir'] + with open(config['Global']['infer_img'], "rb") as f: + infer_imgs = f.readlines() + else: + infer_imgs = get_image_file_list(config['Global']['infer_img']) + with open( os.path.join(config['Global']['save_res_path'], "infer_results.txt"), "w", encoding='utf-8') as fout: - for idx, img_path in enumerate(infer_imgs): + for idx, info in enumerate(infer_imgs): + if config["Global"].get("infer_mode", None) is False: + data_line = info.decode('utf-8') + substr = data_line.strip("\n").split("\t") + img_path = os.path.join(data_dir, substr[0]) + data = {'img_path': img_path, 'label': substr[1]} + else: + img_path = info + data = {'img_path': img_path} + save_img_path = os.path.join( config['Global']['save_res_path'], os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg") logger.info("process: [{}/{}], save result to {}".format( idx, len(infer_imgs), save_img_path)) - result, _ = ser_engine(img_path) + result, _ = ser_engine(data) result = result[0] fout.write(img_path + "\t" + json.dumps( { diff --git a/tools/program.py b/tools/program.py index aa3ba82c44d6afba725a8059dc7f8cae41947b3d..f598feb7e87d2f04a30d8ac27c5d2442f90eaf80 100755 --- a/tools/program.py +++ b/tools/program.py @@ -576,8 +576,8 @@ def preprocess(is_train=False): assert alg in [ 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE', - 'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'PREN', 'FCE', 'SVTR', - 'ViTSTR', 'ABINet' + 'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE', + 'SVTR', 'ViTSTR', 'ABINet' ] if use_xpu: