diff --git a/README.md b/README.md index 9b94466bfc17626272b32f692d0400782645fe69..56e3f9930c692794cf9af867141849ae747bbe3b 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 #### 2.inference模型下载 +*windows 环境下没有如果没有安装wget,下载模型时可将链接复制到浏览器中下载,并解压放置在相应目录下* + #### (1)超轻量级中文OCR模型下载 ``` mkdir inference && cd inference @@ -63,6 +65,9 @@ cd .. # 设置PYTHONPATH环境变量 export PYTHONPATH=. +# windows下设置环境变量 +SET PYTHONPATH=. + # 预测image_dir指定的单张图像 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_mv3_db/" --rec_model_dir="./inference/ch_rec_mv3_crnn/" diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml index 4517f3e26a6440b7ca2ad2d006e8083167ad6eae..b64313a1b8f24cf4bcb1c20c9491ae8b00250fdb 100755 --- a/configs/rec/rec_chinese_lite_train.yml +++ b/configs/rec/rec_chinese_lite_train.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 320] max_text_length: 25 character_type: ch diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml index b783cc2fb27dce2bd635a279b71b4a4cd79a94cf..934a94109cb304c5dd5e8db281f1fbf00d928e39 100755 --- a/configs/rec/rec_icdar15_train.yml +++ b/configs/rec/rec_icdar15_train.yml @@ -9,13 +9,12 @@ Global: eval_batch_step: 500 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en loss_type: ctc reader_yml: ./configs/rec/rec_icdar15_reader.yml - pretrain_weights: + pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy checkpoints: save_inference_dir: infer_img: diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml index 35b5206c90e35c544c42167dc3758b0ec5c0377e..d2e096fb1c51588a6bd2c7ca8321cf817d435f23 100755 --- a/configs/rec/rec_mv3_none_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml index 7219134983d930a78989fc5f7c1df3a996edb6cc..ceec09ce6f3b6cb2238d6fb2e15f510cb31e0fd8 100755 --- a/configs/rec/rec_mv3_none_none_ctc.yml +++ b/configs/rec/rec_mv3_none_none_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_mv3_tps_bilstm_attn.yml b/configs/rec/rec_mv3_tps_bilstm_attn.yml index c4f64f0ae7a0a98a08efc4d80eddf211775e468c..7fc4f6799459bf9fbcd25e1609aeca5e3fd12a74 100755 --- a/configs/rec/rec_mv3_tps_bilstm_attn.yml +++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml index 8236dd8ab8bf5bedc8797c89700a0d6097dcd6d6..4b9660bcdec60989a6d9b9926c40814a83db6f39 100755 --- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml index c234cbb7adc1b392baa85ad05a6aeef74e1896aa..b71e8feae7ac8f235bf471101efd4383c61bfab2 100755 --- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml index 457c79adc41abae0a41a9d2a5f30345162ea4734..d9c9458d6d8fcdb9df590b0093d54b71e3e53fcc 100755 --- a/configs/rec/rec_r34_vd_none_none_ctc.yml +++ b/configs/rec/rec_r34_vd_none_none_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml index 2f351ae4324be2c208cb5c56f711c0a9bfaaef85..405082bdbec0f4b9ac0c885801963e9261c43e6e 100755 --- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml +++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml index 9302170921c9f797d3c981cbd8773b56185f98f6..517322c374a7faf80d6e2b69b7f3e8b2dbb5b5af 100755 --- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml @@ -9,7 +9,6 @@ Global: eval_batch_step: 2000 train_batch_size_per_card: 256 test_batch_size_per_card: 256 - drop_last: false image_shape: [3, 32, 100] max_text_length: 25 character_type: en diff --git a/doc/inference.md b/doc/inference.md index b16b89a963be1cd5a6b87d13b5ae696681036cf2..0d5f45fd57036aa48b217dd0382d80ed52e3cb1f 100644 --- a/doc/inference.md +++ b/doc/inference.md @@ -166,6 +166,10 @@ STAR-Net文本识别模型推理,可以执行如下命令: python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" ``` +### 3.基于Attention损失的识别模型推理 + +基于Attention损失的识别模型与ctc不同,需要额外设置识别算法参数 --rec_algorithm="RARE" + RARE 文本识别模型推理,可以执行如下命令: ``` python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/sare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE" diff --git a/doc/installation.md b/doc/installation.md index 9bdd5379882f136d5609acd3c213e9554ddb34a0..f1edbf4a54d88e0fa5901e9d2d41bcd888a707ad 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -8,6 +8,8 @@ PaddleOCR 工作环境 建议使用我们提供的docker运行PaddleOCR,有关docker使用请参考[链接](https://docs.docker.com/get-started/)。 +*如您希望使用 mac 或 windows直接运行预测代码,可以从第2步开始执行。* + 1. (建议)准备docker环境。第一次使用这个镜像,会自动下载该镜像,请耐心等待。 ``` # 切换到工作目录下 @@ -54,6 +56,10 @@ python3 -m pip install paddlepaddle-gpu==1.7.2.post97 -i https://pypi.tuna.tsing 如果您的机器安装的是CUDA10,请运行以下命令安装 python3 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple +如果您的机器是CPU,请运行以下命令安装 + +python3 -m pip install paddlepaddle==1.7.2 -i https://pypi.tuna.tsinghua.edu.cn/simple + 更多的版本需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。 ``` diff --git a/doc/recognition.md b/doc/recognition.md index ea38c0f3a26f2bc22dc9f66dba3a7e7d825577da..7dd9ca7ecd56267c00db077eb290096dffa65de3 100644 --- a/doc/recognition.md +++ b/doc/recognition.md @@ -41,6 +41,8 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件,通 wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt # 测试集标签 wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt + + ``` 最终训练集应有如下文件结构: @@ -168,10 +170,11 @@ Global: 评估数据集可以通过 `configs/rec/rec_icdar15_reader.yml` 修改EvalReader中的 `label_file_path` 设置。 +*注意* 评估时必须确保配置文件中 infer_img 字段为空 ``` export CUDA_VISIBLE_DEVICES=0 # GPU 评估, Global.checkpoints 为待测权重 -python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy +python3 tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy ``` ### 预测 @@ -184,7 +187,7 @@ python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkp ``` # 预测英文结果 -python3 tools/infer_rec.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png +python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png ``` 预测图片: diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 6c2b7a1aeeb26a70f8b632afb299837e17b96d58..71b488f4eca6a3ba45354a62c7af1b65d944df25 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -42,14 +42,15 @@ class LMDBReader(object): self.max_text_length = params['max_text_length'] self.mode = params['mode'] self.drop_last = False - self.tps = False + self.use_tps = False if "tps" in params: - self.tps = True + self.ues_tps = True if params['mode'] == 'train': self.batch_size = params['train_batch_size_per_card'] - self.drop_last = params['drop_last'] + self.drop_last = True else: self.batch_size = params['test_batch_size_per_card'] + self.drop_last = False self.infer_img = params['infer_img'] def load_hierarchical_lmdb_dataset(self): @@ -114,7 +115,7 @@ class LMDBReader(object): img=img, image_shape=self.image_shape, char_ops=self.char_ops, - tps=self.tps, + tps=self.use_tps, infer_mode=True) yield norm_img else: @@ -181,15 +182,15 @@ class SimpleReader(object): self.max_text_length = params['max_text_length'] self.mode = params['mode'] self.infer_img = params['infer_img'] - self.tps = False + self.use_tps = False if "tps" in params: - self.tps = True - self.drop_last = False + self.ues_tps = True if params['mode'] == 'train': self.batch_size = params['train_batch_size_per_card'] - self.drop_last = params['drop_last'] + self.drop_last = True else: self.batch_size = params['test_batch_size_per_card'] + self.drop_last = False def __call__(self, process_id): if self.mode != 'train': @@ -206,7 +207,7 @@ class SimpleReader(object): img=img, image_shape=self.image_shape, char_ops=self.char_ops, - tps=self.tps, + tps=self.use_tps, infer_mode=True) yield norm_img else: diff --git a/ppocr/data/rec/img_tools.py b/ppocr/data/rec/img_tools.py index 6d7b66e935dc5f1db29d3cd31eb0c2f459316228..575432933bf5f261550636a8e1a24fba3ec33177 100755 --- a/ppocr/data/rec/img_tools.py +++ b/ppocr/data/rec/img_tools.py @@ -95,14 +95,10 @@ def process_image(img, max_text_length=None, tps=None, infer_mode=False): - if not infer_mode or char_ops.character_type == "en": + if not infer_mode or char_ops.character_type == "en" or tps != None: norm_img = resize_norm_img(img, image_shape) else: - if tps != None and char_ops.character_type == "ch": - image_shape = [3, 32, 320] - norm_img = resize_norm_img(img, image_shape) - else: - norm_img = resize_norm_img_chinese(img, image_shape) + norm_img = resize_norm_img_chinese(img, image_shape) norm_img = norm_img[np.newaxis, :] if label is not None: char_num = char_ops.get_char_num() diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index e8b485fb000a4aa7386d4351d3ad2fa4e706bb5a..3417d50086b17b297d7b5c8b9340e8c002c77f56 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -38,8 +38,10 @@ class TextRecognizer(object): char_ops_params["character_dict_path"] = args.rec_char_dict_path if self.rec_algorithm != "RARE": char_ops_params['loss_type'] = 'ctc' + self.loss_type = 'ctc' else: char_ops_params['loss_type'] = 'attention' + self.loss_type = 'attention' self.char_ops = CharacterOps(char_ops_params) def resize_norm_img(self, img, max_wh_ratio): @@ -85,7 +87,7 @@ class TextRecognizer(object): self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.zero_copy_run() - if self.rec_algorithm != "RARE": + if self.loss_type == "ctc": rec_idx_batch = self.output_tensors[0].copy_to_cpu() rec_idx_lod = self.output_tensors[0].lod()[0] predict_batch = self.output_tensors[1].copy_to_cpu() @@ -139,9 +141,13 @@ if __name__ == "__main__": img_list.append(img) try: rec_res, predict_time = text_recognizer(img_list) - except: + except Exception as e: + print(e) logger.info( - "ERROR!! \nInput image shape is not equal with config. TPS does not support variable shape.\n" + "ERROR!!!! \n" + "Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" + "If your model has tps module: " + "TPS does not support variable shape.\n" "Please set --rec_image_shape=input_shape and --rec_char_type='en' ") exit() for ino in range(len(img_list)):