diff --git a/StyleText/README.md b/StyleText/README.md index 648b12674d23a9f413317644cc198fd7fda24bc8..60a9ee99a2d7273db2b07fc0dadc5cf4b8b84d75 100644 --- a/StyleText/README.md +++ b/StyleText/README.md @@ -69,12 +69,14 @@ fusion_generator: 1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder. ```python -python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en +python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en ``` * Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean. -* Note 2: Synth-Text is mainly used to generate images for OCR recognition models. +* Note 2: Synth-Text is mainly used to generate images for OCR recognition models. So the height of style images should be around 32 pixels. Images in other sizes may behave poorly. +* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction. + For example, enter the following image and corpus `PaddleOCR`. @@ -139,9 +141,10 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag 2. You can run the following command to start synthesis task: ``` bash - python -m tools.synth_dataset.py -c configs/dataset_config.yml + python3 tools/synth_dataset.py -c configs/dataset_config.yml ``` -We also provide example corpus and images in `examples` folder. + +We also provide example corpus and images in `examples` folder.
diff --git a/StyleText/README_ch.md b/StyleText/README_ch.md index 0dd5822b1eac488099477d289dff83a99577b8c9..5b8a3ee0fef321ed9ccee7733a74645234c44a12 100644 --- a/StyleText/README_ch.md +++ b/StyleText/README_ch.md @@ -61,11 +61,12 @@ fusion_generator: 输入一张风格图和一段文字语料,运行tools/synth_image,合成单张图片,结果图像保存在当前目录下: ```python -python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en +python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en ``` * 注1:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。 * 注2:Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计,我们主要支持高度在32左右的风格图像。 如果输入图像尺寸相差过多,效果可能不佳。 +* 注3:可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。 例如,输入如下图片和语料"PaddleOCR": @@ -127,7 +128,7 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_ 2. 运行`tools/synth_dataset`合成数据: ``` bash - python -m tools.synth_dataset -c configs/dataset_config.yml + python tools/synth_dataset.py -c configs/dataset_config.yml ``` 我们在examples目录下提供了样例图片和语料。
diff --git a/StyleText/engine/predictors.py b/StyleText/engine/predictors.py index d9f4afe4a18bd1e0a96ac37aa0359f26434ddb3d..a1ba21f1b6cd084f9f95140d4227d600d4631715 100644 --- a/StyleText/engine/predictors.py +++ b/StyleText/engine/predictors.py @@ -28,6 +28,7 @@ class StyleTextRecPredictor(object): ], "Generator {} not supported.".format(algorithm) use_gpu = config["Global"]['use_gpu'] check_gpu(use_gpu) + paddle.set_device('gpu' if use_gpu else 'cpu') self.logger = get_logger() self.generator = getattr(style_text_rec, algorithm)(config) self.height = config["Global"]["image_height"] diff --git a/StyleText/tools/synth_dataset.py b/StyleText/tools/synth_dataset.py index 4a0e6d5e1f701c49558cfe1ea1df61e9b4180a89..a75f7f393b6a0825bc9735e00a50c468f9b4a1ae 100644 --- a/StyleText/tools/synth_dataset.py +++ b/StyleText/tools/synth_dataset.py @@ -11,6 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) + from engine.synthesisers import DatasetSynthesiser diff --git a/StyleText/tools/synth_image.py b/StyleText/tools/synth_image.py index 7b4827b825e4a28dd1fb2eba722d23e64e8ce0be..cbc3118675fd1d628bdfefee295acb4248128bd6 100644 --- a/StyleText/tools/synth_image.py +++ b/StyleText/tools/synth_image.py @@ -16,13 +16,13 @@ import cv2 import sys import glob -from utils.config import ArgsParser -from engine.synthesisers import ImageSynthesiser - __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) +from utils.config import ArgsParser +from engine.synthesisers import ImageSynthesiser + def synth_image(): args = ArgsParser().parse_args() diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index 30a6912eb919101b59dacb46127577988ca89388..8bd76c045b82513ea82a53af58b5805e1b34fc8d 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -107,10 +107,10 @@ make inference_lib_dist For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/advanced_guide/inference_deployment/inference/build_and_install_lib_en.html). -* After the compilation process, you can see the following files in the folder of `build/fluid_inference_install_dir/`. +* After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`. ``` -build/fluid_inference_install_dir/ +build/paddle_inference_install_dir/ |-- CMakeCache.txt |-- paddle |-- third_party diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp index 494b774ad0b2ecf73e555b2c31c250b7b0730c65..cdb20c31973dac50322016ed0124e8c3b1d8d4e7 100644 --- a/deploy/cpp_infer/src/preprocess_op.cpp +++ b/deploy/cpp_infer/src/preprocess_op.cpp @@ -81,14 +81,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, else if (resize_h / 32 < 1 + 1e-5) resize_h = 32; else - resize_h = (resize_h / 32 - 1) * 32; + resize_h = (resize_h / 32) * 32; if (resize_w % 32 == 0) resize_w = resize_w; else if (resize_w / 32 < 1 + 1e-5) resize_w = 32; else - resize_w = (resize_w / 32 - 1) * 32; + resize_w = (resize_w / 32) * 32; cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt index 95d7989bfc06a9e061874a824d070ca60bc3848d..f1ab0b1131ef5d55b098667612c019e0fc01c9dc 100644 --- a/deploy/cpp_infer/tools/config.txt +++ b/deploy/cpp_infer/tools/config.txt @@ -11,7 +11,7 @@ max_side_len 960 det_db_thresh 0.3 det_db_box_thresh 0.5 det_db_unclip_ratio 2.0 -det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/ +det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/ # cls config use_angle_cls 0 diff --git a/paddleocr.py b/paddleocr.py index 1d8cd254644af77ea965d3fb5905f87a9b141e52..3c3c47ab83ba30df798a2f65e0cb0ee80895e363 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem): logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL)) sys.exit(0) - postprocess_params.rec_char_dict_path = Path( - __file__).parent / postprocess_params.rec_char_dict_path + postprocess_params.rec_char_dict_path = str( + Path(__file__).parent / postprocess_params.rec_char_dict_path) # init det_model and rec_model super().__init__(postprocess_params) diff --git a/setup.py b/setup.py index f92074be1274bb44b3f2b8fdc621554df88d054f..58f6de48548d494a7fde8528130b8e881bc7620d 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ setup( package_dir={'paddleocr': ''}, include_package_data=True, entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, - version='2.0.1', + version='2.0.2', install_requires=requirements, license='Apache License 2.0', description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index ba0adaee258096ea9970425cc05ca7a8f1cf08c4..fe772991b7db06b192f4e3c4b99cef703c64b0df 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -35,6 +35,7 @@ logger = get_logger() class TextDetector(object): def __init__(self, args): + self.args = args self.det_algorithm = args.det_algorithm self.use_zero_copy_run = args.use_zero_copy_run pre_process_list = [{ @@ -70,6 +71,9 @@ class TextDetector(object): postprocess_params["cover_thresh"] = args.det_east_cover_thresh postprocess_params["nms_thresh"] = args.det_east_nms_thresh elif self.det_algorithm == "SAST": + pre_process_list[0] = { + 'DetResizeForTest': {'resize_long': args.det_limit_side_len} + } postprocess_params['name'] = 'SASTPostProcess' postprocess_params["score_thresh"] = args.det_sast_score_thresh postprocess_params["nms_thresh"] = args.det_sast_nms_thresh diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 4b06b60b9e25954be7375882b5fb67343312b222..b793254da688079c5a6782f2c071f1c3d8f992d4 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -33,6 +33,8 @@ def parse_args(): parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--use_tensorrt", type=str2bool, default=False) + parser.add_argument("--use_fp16", type=str2bool, default=False) + parser.add_argument("--max_batch_size", type=int, default=10) parser.add_argument("--gpu_mem", type=int, default=8000) # params for text detector @@ -46,7 +48,7 @@ def parse_args(): parser.add_argument("--det_db_thresh", type=float, default=0.3) parser.add_argument("--det_db_box_thresh", type=float, default=0.5) parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6) - + parser.add_argument("--max_batch_size", type=int, default=10) # EAST parmas parser.add_argument("--det_east_score_thresh", type=float, default=0.8) parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) @@ -62,7 +64,7 @@ def parse_args(): parser.add_argument("--rec_model_dir", type=str) parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") parser.add_argument("--rec_char_type", type=str, default='ch') - parser.add_argument("--rec_batch_num", type=int, default=6) + parser.add_argument("--rec_batch_num", type=int, default=1) parser.add_argument("--max_text_length", type=int, default=25) parser.add_argument( "--rec_char_dict_path", @@ -78,7 +80,7 @@ def parse_args(): parser.add_argument("--cls_model_dir", type=str) parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") parser.add_argument("--label_list", type=list, default=['0', '180']) - parser.add_argument("--cls_batch_num", type=int, default=30) + parser.add_argument("--cls_batch_num", type=int, default=6) parser.add_argument("--cls_thresh", type=float, default=0.9) parser.add_argument("--enable_mkldnn", type=str2bool, default=False) @@ -113,6 +115,11 @@ def create_predictor(args, mode, logger): if args.use_gpu: config.enable_use_gpu(args.gpu_mem, 0) + if args.use_tensorrt: + config.enable_tensorrt_engine( + precision_mode=AnalysisConfig.Precision.Half + if args.use_fp16 else AnalysisConfig.Precision.Float32, + max_batch_size=args.max_batch_size) else: config.disable_gpu() config.set_cpu_math_library_num_threads(6)