diff --git a/doc/doc_ch/e2e.md b/doc/doc_ch/e2e.md index 3927865de982d8fbc9472f5afda322338e48c503..e19096048c61a504f756636243aa5c5497a45bf5 100644 --- a/doc/doc_ch/e2e.md +++ b/doc/doc_ch/e2e.md @@ -31,7 +31,7 @@ |- rgb/ total_text数据集的训练数据 |- gt_0.png | ... - |-poly/ total_text数据集的测试标注 + |- poly/ total_text数据集的测试标注 |- gt_0.txt | ... ``` @@ -52,19 +52,11 @@ 您可以根据需求使用[PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/master/ppcls/modeling/architectures)中的模型更换backbone。 ```shell cd PaddleOCR/ -下载ResNet50_vd的预训练模型 -wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar +下载ResNet50_vd的动态图预训练模型 +wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams -# 解压预训练模型文件,以ResNet50_vd为例 -tar -xf ./pretrain_models/ResNet50_vd_ssld_pretrained.tar ./pretrain_models/ - -# 注:正确解压backbone预训练权重文件后,文件夹下包含众多以网络层命名的权重文件,格式如下: -./pretrain_models/ResNet50_vd_ssld_pretrained/ - └─ conv_last_bn_mean - └─ conv_last_bn_offset - └─ conv_last_bn_scale - └─ conv_last_bn_variance - └─ ...... +./pretrain_models/ + └─ ResNet50_vd_ssld_pretrained.pdparams ``` @@ -74,11 +66,9 @@ tar -xf ./pretrain_models/ResNet50_vd_ssld_pretrained.tar ./pretrain_models/ ```shell # 单机单卡训练 e2e 模型 -python3 tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml \ - -o Global.pretrain_weights=./pretrain_models/ResNet50_vd_ssld_pretrained/ Global.load_static_weights=True +python3 tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./pretrain_models/ResNet50_vd_ssld_pretrained Global.load_static_weights=False # 单机多卡训练,通过 --gpus 参数设置使用的GPU ID -python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml \ - -o Global.pretrain_weights=./pretrain_models/ResNet50_vd_ssld_pretrained/ Global.load_static_weights=True +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./pretrain_models/ResNet50_vd_ssld_pretrained Global.load_static_weights=False ``` diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index f06524090871f55dac5b2b3ef99bdce0c0ace749..1288d90692e154220b8ceb22cd7b6d98f53d3efb 100755 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -369,9 +369,9 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904) ### 1. PGNet端到端模型推理 #### (1). 四边形文本检测模型(ICDAR2015) -首先将PGNet端到端训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)),可以使用如下命令进行转换: +首先将PGNet端到端训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar)),可以使用如下命令进行转换: ``` -python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e +python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/iter_epoch_450 Global.load_static_weights=False Global.save_inference_dir=./inference/e2e ``` **PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`**,可以执行如下命令: ``` @@ -382,15 +382,10 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im ![](../imgs_results/e2e_res_img_10_pgnet.jpg) #### (2). 弯曲文本检测模型(Total-Text) -首先将PGNet端到端训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在Total-Text英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)),可以使用如下命令进行转换: - -``` -python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e -``` - +和四边形文本检测模型共用一个推理模型 **PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_polygon=True`,**可以执行如下命令: ``` -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True +python3.7 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True ``` 可视化文本端到端结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: diff --git a/ppocr/data/imaug/pg_process.py b/ppocr/data/imaug/pg_process.py index 09382f7ed7e6c0c6bd9ff704cf42358a61c4165a..0c9439d7a274af27ca8d296d5e737bafdec3bd1f 100644 --- a/ppocr/data/imaug/pg_process.py +++ b/ppocr/data/imaug/pg_process.py @@ -27,7 +27,7 @@ class PGProcessTrain(object): tcl_len, batch_size=14, min_crop_size=24, - min_text_size=10, + min_text_size=4, max_text_size=512, **kwargs): self.tcl_len = tcl_len @@ -197,7 +197,6 @@ class PGProcessTrain(object): for selected_poly in selected_polys: txts_tmp.append(txts[selected_poly]) txts = txts_tmp - # print(1111) return im[ymin: ymax + 1, xmin: xmax + 1, :], \ polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts else: @@ -309,7 +308,6 @@ class PGProcessTrain(object): cv2.fillPoly(direction_map, quad.round().astype(np.int32)[np.newaxis, :, :], direction_label) - cv2.imwrite("output/{}.png".format(k), direction_map * 255.0) k += 1 return direction_map diff --git a/ppocr/data/pgnet_dataset.py b/ppocr/data/pgnet_dataset.py index 3f1e254592203e6d3d992e8ccb7025e7aa92bb57..1010951253695337a241e1b8276fcbc2d0e1b123 100644 --- a/ppocr/data/pgnet_dataset.py +++ b/ppocr/data/pgnet_dataset.py @@ -67,10 +67,7 @@ class PGDataSet(Dataset): np.array( list(poly), dtype=np.float32).reshape(-1, 2)) txts.append(txt) - if txt == '###': - txt_tags.append(True) - else: - txt_tags.append(False) + txt_tags.append(txt == '###') return np.array(list(map(np.array, text_polys))), \ np.array(txt_tags, dtype=np.bool), txts @@ -84,8 +81,8 @@ class PGDataSet(Dataset): for ext in [ 'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'JPG' ]: - if os.path.exists(os.path.join(img_dir, info_list[0] + ext)): - img_path = os.path.join(img_dir, info_list[0] + ext) + if os.path.exists(os.path.join(img_dir, info_list[0] + "." + ext)): + img_path = os.path.join(img_dir, info_list[0] + "." + ext) break if img_path == '': diff --git a/ppocr/losses/e2e_pg_loss.py b/ppocr/losses/e2e_pg_loss.py index 680ab0e60a394df0b4f86d334c616ca338ec5d93..10a8ed0aa907123b155976ba498426604f23c2b0 100644 --- a/ppocr/losses/e2e_pg_loss.py +++ b/ppocr/losses/e2e_pg_loss.py @@ -20,7 +20,7 @@ from paddle import nn import paddle from .det_basic_loss import DiceLoss -from ppocr.utils.e2e_utils.extract_batchsize import * +from ppocr.utils.e2e_utils.extract_batchsize import pre_process class PGLoss(nn.Layer): diff --git a/ppocr/metrics/e2e_metric.py b/ppocr/metrics/e2e_metric.py index 04b73e0c4652c263d59380a0feff1f29da6c6817..75ffbfb001af6111dcf9dae4d9325b3116e8589b 100644 --- a/ppocr/metrics/e2e_metric.py +++ b/ppocr/metrics/e2e_metric.py @@ -18,8 +18,8 @@ from __future__ import print_function __all__ = ['E2EMetric'] -from ppocr.utils.e2e_metric.Deteval import * -from ppocr.utils.e2e_utils.extract_textpoint import * +from ppocr.utils.e2e_metric.Deteval import get_socre, combine_results +from ppocr.utils.e2e_utils.extract_textpoint import get_dict class E2EMetric(object): diff --git a/requirements.txt b/requirements.txt index 2401d52b48c10bad5ea5b244a0fd4c4365b94f09..1b01e690f77d2bf5e570c86b268c7128c8bf79fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ opencv-python==4.2.0.32 tqdm numpy visualdl -python-Levenshtein \ No newline at end of file +python-Levenshtein +opencv-contrib-python \ No newline at end of file diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py index 6744e7e20c64379c8b482b826066dffe64f1923a..406e1bf3678f073d7c5818472350de7fb610ff16 100755 --- a/tools/infer/predict_e2e.py +++ b/tools/infer/predict_e2e.py @@ -34,7 +34,7 @@ from ppocr.postprocess import build_post_process logger = get_logger() -class TextE2e(object): +class TextE2E(object): def __init__(self, args): self.args = args self.e2e_algorithm = args.e2e_algorithm @@ -130,7 +130,7 @@ class TextE2e(object): if __name__ == "__main__": args = utility.parse_args() image_file_list = get_image_file_list(args.image_dir) - text_detector = TextE2e(args) + text_detector = TextE2E(args) count = 0 total_time = 0 draw_img_save = "./inference_results" @@ -151,7 +151,7 @@ if __name__ == "__main__": src_im = utility.draw_e2e_res(points, strs, image_file) img_name_pure = os.path.split(image_file)[-1] img_path = os.path.join(draw_img_save, - "e2e_res_{}".format(img_name_pure)) + "e2e_res_{}_pgnet".format(img_name_pure)) cv2.imwrite(img_path, src_im) logger.info("The visualized image saved in {}".format(img_path)) if count > 1: