From 6824db26e7b75b914cfb01b2f882918df3f07fca Mon Sep 17 00:00:00 2001
From: Jethong <1147925384@qq.com>
Date: Wed, 7 Apr 2021 12:26:05 +0800
Subject: [PATCH] fix errors and add pretrain_model

---
 doc/doc_ch/e2e.md              | 24 +++++++-----------------
 doc/doc_ch/inference.md        | 13 ++++---------
 ppocr/data/imaug/pg_process.py |  4 +---
 ppocr/data/pgnet_dataset.py    |  9 +++------
 ppocr/losses/e2e_pg_loss.py    |  2 +-
 ppocr/metrics/e2e_metric.py    |  4 ++--
 requirements.txt               |  3 ++-
 tools/infer/predict_e2e.py     |  6 +++---
 8 files changed, 23 insertions(+), 42 deletions(-)
diff --git a/doc/doc_ch/e2e.md b/doc/doc_ch/e2e.md
index 3927865d..e1909604 100644
--- a/doc/doc_ch/e2e.md
+++ b/doc/doc_ch/e2e.md
@@ -31,7 +31,7 @@
   |- rgb/           total_text数据集的训练数据
       |- gt_0.png
       | ...  
-  |-poly/           total_text数据集的测试标注
+  |- poly/           total_text数据集的测试标注
       |- gt_0.txt
       | ...
 ```
@@ -52,19 +52,11 @@
 您可以根据需求使用[PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/master/ppcls/modeling/architectures)中的模型更换backbone。
 ```shell
 cd PaddleOCR/
-下载ResNet50_vd的预训练模型
-wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar
+下载ResNet50_vd的动态图预训练模型
+wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams
 
-# 解压预训练模型文件，以ResNet50_vd为例
-tar -xf ./pretrain_models/ResNet50_vd_ssld_pretrained.tar ./pretrain_models/
-
-# 注：正确解压backbone预训练权重文件后，文件夹下包含众多以网络层命名的权重文件，格式如下：
-./pretrain_models/ResNet50_vd_ssld_pretrained/
-  └─ conv_last_bn_mean
-  └─ conv_last_bn_offset
-  └─ conv_last_bn_scale
-  └─ conv_last_bn_variance
-  └─ ......
+./pretrain_models/
+  └─ ResNet50_vd_ssld_pretrained.pdparams
 
 ```
 
@@ -74,11 +66,9 @@ tar -xf ./pretrain_models/ResNet50_vd_ssld_pretrained.tar ./pretrain_models/
 
 ```shell
 # 单机单卡训练 e2e 模型
-python3 tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml \
-     -o Global.pretrain_weights=./pretrain_models/ResNet50_vd_ssld_pretrained/ Global.load_static_weights=True
+python3 tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./pretrain_models/ResNet50_vd_ssld_pretrained Global.load_static_weights=False
 # 单机多卡训练，通过 --gpus 参数设置使用的GPU ID
-python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml \
-     -o Global.pretrain_weights=./pretrain_models/ResNet50_vd_ssld_pretrained/  Global.load_static_weights=True
+python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./pretrain_models/ResNet50_vd_ssld_pretrained  Global.load_static_weights=False
 ```
 
 
diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md
index f0652409..1288d906 100755
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -369,9 +369,9 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
 <a name="PGNet端到端模型推理"></a>
 ### 1. PGNet端到端模型推理
 #### (1). 四边形文本检测模型（ICDAR2015）  
-首先将PGNet端到端训练过程中保存的模型，转换成inference model。以基于Resnet50_vd骨干网络，在ICDAR2015英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar))，可以使用如下命令进行转换：
+首先将PGNet端到端训练过程中保存的模型，转换成inference model。以基于Resnet50_vd骨干网络，在ICDAR2015英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar))，可以使用如下命令进行转换：
 ```
-python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./det_r50_vd_sast_icdar15_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
+python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/iter_epoch_450 Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
 ```
 **PGNet端到端模型推理，需要设置参数`--e2e_algorithm="PGNet"`**，可以执行如下命令：
 ```
@@ -382,15 +382,10 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im
 ![](../imgs_results/e2e_res_img_10_pgnet.jpg)
 
 #### (2). 弯曲文本检测模型（Total-Text）  
-首先将PGNet端到端训练过程中保存的模型，转换成inference model。以基于Resnet50_vd骨干网络，在Total-Text英文数据集训练的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar))，可以使用如下命令进行转换：
-
-```
-python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./det_r50_vd_sast_totaltext_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
-```
-
+和四边形文本检测模型共用一个推理模型
 **PGNet端到端模型推理，需要设置参数`--e2e_algorithm="PGNet"`，同时，还需要增加参数`--e2e_pgnet_polygon=True`，**可以执行如下命令：
 ```
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
+python3.7 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
 ```
 可视化文本端到端结果默认保存到`./inference_results`文件夹里面，结果文件的名称前缀为'e2e_res'。结果示例如下：
 
diff --git a/ppocr/data/imaug/pg_process.py b/ppocr/data/imaug/pg_process.py
index 09382f7e..0c9439d7 100644
--- a/ppocr/data/imaug/pg_process.py
+++ b/ppocr/data/imaug/pg_process.py
@@ -27,7 +27,7 @@ class PGProcessTrain(object):
                  tcl_len,
                  batch_size=14,
                  min_crop_size=24,
-                 min_text_size=10,
+                 min_text_size=4,
                  max_text_size=512,
                  **kwargs):
         self.tcl_len = tcl_len
@@ -197,7 +197,6 @@ class PGProcessTrain(object):
                     for selected_poly in selected_polys:
                         txts_tmp.append(txts[selected_poly])
                     txts = txts_tmp
-                    # print(1111)
                     return im[ymin: ymax + 1, xmin: xmax + 1, :], \
                            polys[selected_polys], tags[selected_polys], hv_tags[selected_polys], txts
                 else:
@@ -309,7 +308,6 @@ class PGProcessTrain(object):
             cv2.fillPoly(direction_map,
                          quad.round().astype(np.int32)[np.newaxis, :, :],
                          direction_label)
-            cv2.imwrite("output/{}.png".format(k), direction_map * 255.0)
             k += 1
         return direction_map
 
diff --git a/ppocr/data/pgnet_dataset.py b/ppocr/data/pgnet_dataset.py
index 3f1e2545..10109512 100644
--- a/ppocr/data/pgnet_dataset.py
+++ b/ppocr/data/pgnet_dataset.py
@@ -67,10 +67,7 @@ class PGDataSet(Dataset):
                     np.array(
                         list(poly), dtype=np.float32).reshape(-1, 2))
                 txts.append(txt)
-                if txt == '###':
-                    txt_tags.append(True)
-                else:
-                    txt_tags.append(False)
+                txt_tags.append(txt == '###')
 
         return np.array(list(map(np.array, text_polys))), \
                np.array(txt_tags, dtype=np.bool), txts
@@ -84,8 +81,8 @@ class PGDataSet(Dataset):
         for ext in [
                 'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'JPG'
         ]:
-            if os.path.exists(os.path.join(img_dir, info_list[0] + ext)):
-                img_path = os.path.join(img_dir, info_list[0] + ext)
+            if os.path.exists(os.path.join(img_dir, info_list[0] + "." + ext)):
+                img_path = os.path.join(img_dir, info_list[0] + "." + ext)
                 break
 
         if img_path == '':
diff --git a/ppocr/losses/e2e_pg_loss.py b/ppocr/losses/e2e_pg_loss.py
index 680ab0e6..10a8ed0a 100644
--- a/ppocr/losses/e2e_pg_loss.py
+++ b/ppocr/losses/e2e_pg_loss.py
@@ -20,7 +20,7 @@ from paddle import nn
 import paddle
 
 from .det_basic_loss import DiceLoss
-from ppocr.utils.e2e_utils.extract_batchsize import *
+from ppocr.utils.e2e_utils.extract_batchsize import pre_process
 
 
 class PGLoss(nn.Layer):
diff --git a/ppocr/metrics/e2e_metric.py b/ppocr/metrics/e2e_metric.py
index 04b73e0c..75ffbfb0 100644
--- a/ppocr/metrics/e2e_metric.py
+++ b/ppocr/metrics/e2e_metric.py
@@ -18,8 +18,8 @@ from __future__ import print_function
 
 __all__ = ['E2EMetric']
 
-from ppocr.utils.e2e_metric.Deteval import *
-from ppocr.utils.e2e_utils.extract_textpoint import *
+from ppocr.utils.e2e_metric.Deteval import get_socre, combine_results
+from ppocr.utils.e2e_utils.extract_textpoint import get_dict
 
 
 class E2EMetric(object):
diff --git a/requirements.txt b/requirements.txt
index 2401d52b..1b01e690 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,5 @@ opencv-python==4.2.0.32
 tqdm
 numpy
 visualdl
-python-Levenshtein
\ No newline at end of file
+python-Levenshtein
+opencv-contrib-python
\ No newline at end of file
diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py
index 6744e7e2..406e1bf3 100755
--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -34,7 +34,7 @@ from ppocr.postprocess import build_post_process
 logger = get_logger()
 
 
-class TextE2e(object):
+class TextE2E(object):
     def __init__(self, args):
         self.args = args
         self.e2e_algorithm = args.e2e_algorithm
@@ -130,7 +130,7 @@ class TextE2e(object):
 if __name__ == "__main__":
     args = utility.parse_args()
     image_file_list = get_image_file_list(args.image_dir)
-    text_detector = TextE2e(args)
+    text_detector = TextE2E(args)
     count = 0
     total_time = 0
     draw_img_save = "./inference_results"
@@ -151,7 +151,7 @@ if __name__ == "__main__":
         src_im = utility.draw_e2e_res(points, strs, image_file)
         img_name_pure = os.path.split(image_file)[-1]
         img_path = os.path.join(draw_img_save,
-                                "e2e_res_{}".format(img_name_pure))
+                                "e2e_res_{}_pgnet".format(img_name_pure))
         cv2.imwrite(img_path, src_im)
         logger.info("The visualized image saved in {}".format(img_path))
     if count > 1:
-- 
GitLab