diff --git a/README.md b/README.md index 2af6e07dab91065664cadc4eb3e7709a8d5f4dce..fd885b06cc358118bc02362b5db5fa98eb2ff95e 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ For a new language request, please refer to [Guideline for new language_requests - [Benchmark](./doc/doc_en/benchmark_en.md) - Data Annotation and Synthesis - [Semi-automatic Annotation Tool: PPOCRLabel](./PPOCRLabel/README.md) - - [Data Synthesis Tool: Style_Edit](./StyleTextRec/README.md) + - [Data Synthesis Tool: Style-Text](./StyleText/README.md) - [Other Data Annotation Tools](./doc/doc_en/data_annotation_en.md) - [Other Data Synthesis Tools](./doc/doc_en/data_synthesis_en.md) - Datasets diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md index c2b6add7918896edddcfa55bb5e83ba2e9ca1588..def2f120a2f798040c094a43bcab8bc20ec7d46f 100644 --- a/StyleText/README_ch.md +++ b/StyleText/README_ch.md @@ -1,4 +1,4 @@ -## Style Text +## Style-Text ### 目录 - [一、工具简介](#工具简介) @@ -85,7 +85,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ #### 批量合成 -在实际应用场景中,经常需要批量合成图片,补充到训练集中。StyleText可以使用一批风格图片和语料,批量合成数据。合成过程如下: +在实际应用场景中,经常需要批量合成图片,补充到训练集中。Style-Text可以使用一批风格图片和语料,批量合成数据。合成过程如下: 1. 在`configs/dataset_config.yml`中配置目标场景风格图像和语料的路径,具体如下: @@ -100,7 +100,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ * `language`:语料的语种; * `corpus_file`: 语料文件路径。 - StyleText也提供了一批中英韩5万张通用场景数据用作文本风格图像,便于合成场景丰富的文本图像,下图给出了一些示例。 + Style-Text也提供了一批中英韩5万张通用场景数据用作文本风格图像,便于合成场景丰富的文本图像,下图给出了一些示例。 中英韩5万张通用场景数据: [下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/style_text/chkoen_5w.tar) @@ -116,7 +116,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ ### 四、应用案例 -下面以金属表面英文数字识别和通用韩语识别两个场景为例,说明使用StyleText合成数据,来提升文本识别效果的实际案例。下图给出了一些真实场景图像和合成图像的示例: +下面以金属表面英文数字识别和通用韩语识别两个场景为例,说明使用Style-Text合成数据,来提升文本识别效果的实际案例。下图给出了一些真实场景图像和合成图像的示例:
@@ -134,38 +134,38 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ ### 五、代码结构 ``` style_text_rec -|-- arch +|-- arch // 网络结构定义文件 | |-- base_module.py | |-- decoder.py | |-- encoder.py | |-- spectral_norm.py | `-- style_text_rec.py -|-- configs +|-- configs // 配置文件 | |-- config.yml | `-- dataset_config.yml -|-- engine -| |-- corpus_generators.py -| |-- predictors.py -| |-- style_samplers.py -| |-- synthesisers.py -| |-- text_drawers.py -| `-- writers.py -|-- examples +|-- engine // 数据合成引擎 +| |-- corpus_generators.py // 从文本采样或随机生成语料 +| |-- predictors.py // 调用网络生成数据 +| |-- style_samplers.py // 采样风格图片 +| |-- synthesisers.py // 调度各个模块,合成数据 +| |-- text_drawers.py // 生成标准文字图片,用作输入 +| `-- writers.py // 将合成的图片和标签写入本地目录 +|-- examples // 示例文件 | |-- corpus | | `-- example.txt | |-- image_list.txt | `-- style_images | |-- 1.jpg | `-- 2.jpg -|-- fonts +|-- fonts // 字体文件 | |-- ch_standard.ttf | |-- en_standard.ttf | `-- ko_standard.ttf -|-- tools +|-- tools // 程序入口 | |-- __init__.py -| |-- synth_dataset.py -| `-- synth_image.py -`-- utils +| |-- synth_dataset.py // 批量合成数据 +| `-- synth_image.py // 合成单张图片 +`-- utils // 其他基础功能模块 |-- config.py |-- load_params.py |-- logging.py diff --git a/StyleText/doc/images/10.png b/StyleText/doc/images/10.png new file mode 100644 index 0000000000000000000000000000000000000000..6123cff27c6b7a89abc5cd318e4bf30a1aec767c Binary files /dev/null and b/StyleText/doc/images/10.png differ diff --git a/StyleText/doc/images/11.png b/StyleText/doc/images/11.png new file mode 100644 index 0000000000000000000000000000000000000000..eb8bd73c074910cad95e5a02c0c6da6cb9674f09 Binary files /dev/null and b/StyleText/doc/images/11.png differ diff --git a/StyleText/doc/images/12.png b/StyleText/doc/images/12.png new file mode 100644 index 0000000000000000000000000000000000000000..37f1cd698ff6e30136cf77a833b5d18ac352f09d Binary files /dev/null and b/StyleText/doc/images/12.png differ diff --git a/StyleText/doc/images/4.jpg b/StyleText/doc/images/4.jpg index 5fda9548632b63e55b42315dca4a5b9cec2a353c..d881074a13a5320035e739b91ce4b98f78191301 100644 Binary files a/StyleText/doc/images/4.jpg and b/StyleText/doc/images/4.jpg differ diff --git a/StyleText/doc/images/7.jpg b/StyleText/doc/images/7.jpg index 60a4e0ee6ae3d42cc43c43747d72a837bc170f9d..887094fb3a005e4649bf355fe9e61acf628fceca 100644 Binary files a/StyleText/doc/images/7.jpg and b/StyleText/doc/images/7.jpg differ diff --git a/StyleText/doc/images/8.jpg b/StyleText/doc/images/8.jpg index fbed5a7bb5368090e612933bba8f57ec1a74a4c4..234d7f33e7a3a29201fda2f8b844128c8e730e06 100644 Binary files a/StyleText/doc/images/8.jpg and b/StyleText/doc/images/8.jpg differ diff --git a/StyleText/doc/images/9.png b/StyleText/doc/images/9.png new file mode 100644 index 0000000000000000000000000000000000000000..179780250a563537188b336069b91c2472291a16 Binary files /dev/null and b/StyleText/doc/images/9.png differ diff --git a/StyleText/examples/style_images/2.jpg b/StyleText/examples/style_images/2.jpg index f68ce49aa5558124d36ae6eaa801be5b0e79e152..0ab932b1d9348ab41ad8ea153740e86e6477fdeb 100644 Binary files a/StyleText/examples/style_images/2.jpg and b/StyleText/examples/style_images/2.jpg differ diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index dc760fd7cba0d891e3b6844ef0530f8dbbd384cc..beec3c68f3b76e639802106c07a63fe3d7f8f3e7 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -20,7 +20,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |[inference model (coming soon)](link) / [slim model (coming soon)](link)| +|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| |inference model (coming soon) / slim model (coming soon)| |ch_ppocr_mobile_v2.0_det|Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|General model, which is larger than the lightweight model, but achieved better performance|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| @@ -32,7 +32,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |[inference model (coming soon)](link) / [slim model (coming soon)](link) | +|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)| |inference model (coming soon) / slim model (coming soon) | |ch_ppocr_mobile_v2.0_rec|Original lightweight model, supporting Chinese, English and number recognition|[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)|3.71M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | |ch_ppocr_server_v2.0_rec|General model, supporting Chinese, English and number recognition|[rec_chinese_common_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml)|94.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar) / [pre-trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | @@ -44,7 +44,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |[inference model (coming soon )](link) / [slim model (coming soon)](link) | +|en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| |inference model (coming soon ) / slim model (coming soon) | |en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.56M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) | @@ -62,6 +62,6 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |[inference model (coming soon)](link) / [trained model](link) / [slim model](link) | +|ch_ppocr_mobile_slim_v2.0_cls|Slim quantized model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)| |inference model (coming soon) / trained model / slim model| |ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |