Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dygraph

d9b7ce0a · andyjpaddle · ee22ebb4 · bdefa140 · d9b7ce0a · d9b7ce0a
8 changed file
--- a/applications/PCB字符识别/PCB字符识别.md
+++ b/applications/PCB字符识别/PCB字符识别.md
--- a/applications/PCB字符识别/gen_data/background/bg.jpg
+++ b/applications/PCB字符识别/gen_data/background/bg.jpg
--- a/applications/PCB字符识别/gen_data/corpus/text.txt
+++ b/applications/PCB字符识别/gen_data/corpus/text.txt
+5ZQ
+I4UL
+PWL
+SNOG
+ZL02
+1C30
+O3H
+YHRS
+N03S
+1U5Y
+JTK
+EN4F
+YKJ
+DWNH
+R42W
+X0V
+4OF5
+08AM
+Y93S
+GWE2
+0KR
+9U2A
+DBQ
+Y6J
+ROZ
+K06
+KIEY
+NZQJ
+UN1B
+6X4
\ No newline at end of file
--- a/applications/PCB字符识别/gen_data/det_background/1.png
+++ b/applications/PCB字符识别/gen_data/det_background/1.png
--- a/applications/PCB字符识别/gen_data/det_background/2.png
+++ b/applications/PCB字符识别/gen_data/det_background/2.png
--- a/applications/PCB字符识别/gen_data/gen.py
+++ b/applications/PCB字符识别/gen_data/gen.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/zcswdt/Color_OCR_image_generator
+"""
+import os
+import random
+from PIL import Image, ImageDraw, ImageFont
+import json
+import argparse
+def get_char_lines(txt_root_path):
+    """
+    desc:get corpus line
+    """
+    txt_files = os.listdir(txt_root_path)
+    char_lines = []
+    for txt in txt_files:
+        f = open(os.path.join(txt_root_path, txt), mode='r', encoding='utf-8')
+        lines = f.readlines()
+        f.close()
+        for line in lines:
+            char_lines.append(line.strip())
+        return char_lines
+def get_horizontal_text_picture(image_file, chars, fonts_list, cf):
+    """
+    desc:gen horizontal text picture
+    """
+    img = Image.open(image_file)
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+    img_w, img_h = img.size
+    # random choice font
+    font_path = random.choice(fonts_list)
+    # random choice font size
+    font_size = random.randint(cf.font_min_size, cf.font_max_size)
+    font = ImageFont.truetype(font_path, font_size)
+    ch_w = []
+    ch_h = []
+    for ch in chars:
+        wt, ht = font.getsize(ch)
+        ch_w.append(wt)
+        ch_h.append(ht)
+    f_w = sum(ch_w)
+    f_h = max(ch_h)
+    # add space
+    char_space_width = max(ch_w)
+    f_w += (char_space_width * (len(chars) - 1))
+    x1 = random.randint(0, img_w - f_w)
+    y1 = random.randint(0, img_h - f_h)
+    x2 = x1 + f_w
+    y2 = y1 + f_h
+    crop_y1 = y1
+    crop_x1 = x1
+    crop_y2 = y2
+    crop_x2 = x2
+    best_color = (0, 0, 0)
+    draw = ImageDraw.Draw(img)
+    for i, ch in enumerate(chars):
+        draw.text((x1, y1), ch, best_color, font=font)
+        x1 += (ch_w[i] + char_space_width)
+    crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
+    return crop_img, chars
+def get_vertical_text_picture(image_file, chars, fonts_list, cf):
+    """
+    desc:gen vertical text picture
+    """
+    img = Image.open(image_file)
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+    img_w, img_h = img.size
+    # random choice font
+    font_path = random.choice(fonts_list)
+    # random choice font size
+    font_size = random.randint(cf.font_min_size, cf.font_max_size)
+    font = ImageFont.truetype(font_path, font_size)
+    ch_w = []
+    ch_h = []
+    for ch in chars:
+        wt, ht = font.getsize(ch)
+        ch_w.append(wt)
+        ch_h.append(ht)
+    f_w = max(ch_w)
+    f_h = sum(ch_h)
+    x1 = random.randint(0, img_w - f_w)
+    y1 = random.randint(0, img_h - f_h)
+    x2 = x1 + f_w
+    y2 = y1 + f_h
+    crop_y1 = y1
+    crop_x1 = x1
+    crop_y2 = y2
+    crop_x2 = x2
+    best_color = (0, 0, 0)
+    draw = ImageDraw.Draw(img)
+    i = 0
+    for ch in chars:
+        draw.text((x1, y1), ch, best_color, font=font)
+        y1 = y1 + ch_h[i]
+        i = i + 1
+    crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2))
+    crop_img = crop_img.transpose(Image.ROTATE_90)
+    return crop_img, chars
+def get_fonts(fonts_path):
+    """
+    desc: get all fonts
+    """
+    font_files = os.listdir(fonts_path)
+    fonts_list=[]
+    for font_file in font_files:
+        font_path=os.path.join(fonts_path, font_file)
+        fonts_list.append(font_path)
+    return fonts_list
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--num_img', type=int, default=30, help="Number of images to generate")
+    parser.add_argument('--font_min_size', type=int, default=11)
+    parser.add_argument('--font_max_size', type=int, default=12,
+                        help="Help adjust the size of the generated text and the size of the picture")
+    parser.add_argument('--bg_path', type=str, default='./background',
+                        help='The generated text pictures will be pasted onto the pictures of this folder')
+    parser.add_argument('--det_bg_path', type=str, default='./det_background',
+                        help='The generated text pictures will use the pictures of this folder as the background')
+    parser.add_argument('--fonts_path', type=str, default='../../StyleText/fonts',
+                        help='The font used to generate the picture')
+    parser.add_argument('--corpus_path', type=str, default='./corpus',
+                        help='The corpus used to generate the text picture')
+    parser.add_argument('--output_dir', type=str, default='./output/', help='Images save dir')
+    cf = parser.parse_args()
+    # save path
+    if not os.path.exists(cf.output_dir):
+        os.mkdir(cf.output_dir)
+    # get corpus
+    txt_root_path = cf.corpus_path
+    char_lines = get_char_lines(txt_root_path=txt_root_path)
+    # get all fonts
+    fonts_path = cf.fonts_path
+    fonts_list = get_fonts(fonts_path)
+    # rec bg
+    img_root_path = cf.bg_path
+    imnames=os.listdir(img_root_path)
+    # det bg
+    det_bg_path = cf.det_bg_path
+    bg_pics = os.listdir(det_bg_path)
+    # OCR det files
+    det_val_file = open(cf.output_dir + 'det_gt_val.txt', 'w', encoding='utf-8')
+    det_train_file = open(cf.output_dir + 'det_gt_train.txt', 'w', encoding='utf-8')
+    # det imgs
+    det_save_dir = 'imgs/'
+    if not os.path.exists(cf.output_dir + det_save_dir):
+        os.mkdir(cf.output_dir + det_save_dir)
+    det_val_save_dir = 'imgs_val/'
+    if not os.path.exists(cf.output_dir + det_val_save_dir):
+        os.mkdir(cf.output_dir + det_val_save_dir)
+    # OCR rec files
+    rec_val_file = open(cf.output_dir + 'rec_gt_val.txt', 'w', encoding='utf-8')
+    rec_train_file = open(cf.output_dir + 'rec_gt_train.txt', 'w', encoding='utf-8')
+    # rec imgs
+    rec_save_dir = 'rec_imgs/'
+    if not os.path.exists(cf.output_dir + rec_save_dir):
+        os.mkdir(cf.output_dir + rec_save_dir)
+    rec_val_save_dir = 'rec_imgs_val/'
+    if not os.path.exists(cf.output_dir + rec_val_save_dir):
+        os.mkdir(cf.output_dir + rec_val_save_dir)
+    val_ratio = cf.num_img * 0.2  # val dataset ratio
+    print('start generating...')
+    for i in range(0, cf.num_img):
+        imname = random.choice(imnames)
+        img_path = os.path.join(img_root_path, imname)
+        rnd = random.random()
+        # gen horizontal text picture
+        if rnd < 0.5:
+            gen_img, chars = get_horizontal_text_picture(img_path, char_lines[i], fonts_list, cf)
+            ori_w, ori_h = gen_img.size
+            gen_img = gen_img.crop((0, 3, ori_w, ori_h))
+        # gen vertical text picture
+        else:
+            gen_img, chars = get_vertical_text_picture(img_path, char_lines[i], fonts_list, cf)
+            ori_w, ori_h = gen_img.size
+            gen_img = gen_img.crop((3, 0, ori_w, ori_h))
+        ori_w, ori_h = gen_img.size
+        # rec imgs
+        save_img_name = str(i).zfill(4) + '.jpg'
+        if i < val_ratio:
+            save_dir = os.path.join(rec_val_save_dir, save_img_name)
+            line = save_dir + '\t' + char_lines[i] + '\n'
+            rec_val_file.write(line)
+        else:
+            save_dir = os.path.join(rec_save_dir, save_img_name)
+            line = save_dir + '\t' + char_lines[i] + '\n'
+            rec_train_file.write(line)
+        gen_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)
+        # det img
+        # random choice bg
+        bg_pic = random.sample(bg_pics, 1)[0]
+        det_img = Image.open(os.path.join(det_bg_path, bg_pic))
+        # the PCB position is fixed, modify it according to your own scenario
+        if bg_pic == '1.png':
+            x1 = 38
+            y1 = 3
+        else:
+            x1 = 34
+            y1 = 1
+        det_img.paste(gen_img, (x1, y1))
+        # text pos
+        chars_pos = [[x1, y1], [x1 + ori_w, y1], [x1 + ori_w, y1 + ori_h], [x1, y1 + ori_h]]
+        label = [{"transcription":char_lines[i], "points":chars_pos}]
+        if i < val_ratio:
+            save_dir = os.path.join(det_val_save_dir, save_img_name)
+            det_val_file.write(save_dir + '\t' + json.dumps(
+                    label, ensure_ascii=False) + '\n')
+        else:
+            save_dir = os.path.join(det_save_dir, save_img_name)
+            det_train_file.write(save_dir + '\t' + json.dumps(
+                    label, ensure_ascii=False) + '\n')
+        det_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)
--- a/applications/轻量级车牌识别.md
+++ b/applications/轻量级车牌识别.md
@@ -249,7 +249,7 @@ tar -xf ch_PP-OCRv3_det_distill_train.tar
 cd /home/aistudio/PaddleOCR
 ```
-预训练模型下载完成后，我们使用[ch_PP-OCRv3_det_student.yml](../configs/chepai/ch_PP-OCRv3_det_student.yml) 配置文件进行后续实验，在开始评估之前需要对配置文件中部分字段进行设置，具体如下：
+预训练模型下载完成后，我们使用[ch_PP-OCRv3_det_student.yml](../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_student.yml) 配置文件进行后续实验，在开始评估之前需要对配置文件中部分字段进行设置，具体如下：
 1. 模型存储和训练相关:
   1. Global.pretrained_model: 指向PP-OCRv3文本检测预训练模型地址
@@ -787,12 +787,12 @@ python tools/infer/predict_system.py \
 - 端侧部署
-端侧部署我们采用基于 PaddleLite 的 cpp 推理。Paddle Lite是飞桨轻量化推理引擎，为手机、IOT端提供高效推理能力，并广泛整合跨平台硬件，为端侧部署及应用落地问题提供轻量化的部署方案。具体可参考 [PaddleOCR lite教程](../dygraph/deploy/lite/readme_ch.md)
+端侧部署我们采用基于 PaddleLite 的 cpp 推理。Paddle Lite是飞桨轻量化推理引擎，为手机、IOT端提供高效推理能力，并广泛整合跨平台硬件，为端侧部署及应用落地问题提供轻量化的部署方案。具体可参考 [PaddleOCR lite教程](../deploy/lite/readme_ch.md)
 ### 4.5 实验总结
-我们分别使用PP-OCRv3中英文超轻量预训练模型在车牌数据集上进行了直接评估和 fine-tune 和 fine-tune +量化3种方案的实验，并基于[PaddleOCR lite教程](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/deploy/lite/readme_ch.md)进行了速度测试，指标对比如下：
+我们分别使用PP-OCRv3中英文超轻量预训练模型在车牌数据集上进行了直接评估和 fine-tune 和 fine-tune +量化3种方案的实验，并基于[PaddleOCR lite教程](../deploy/lite/readme_ch.md)进行了速度测试，指标对比如下：
 - 检测

--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
@@ -18,7 +18,7 @@ The table recognition mainly contains three models
 The table recognition flow chart is as follows
-![tableocr_pipeline](../../doc/table/tableocr_pipeline_en.jpg)
+![tableocr_pipeline](../docs/table/tableocr_pipeline_en.jpg)
 1. The coordinates of single-line text is detected by DB model, and then sends it to the recognition model to get the recognition result.
 2. The table structure and cell coordinates is predicted by RARE model.