diff --git a/benchmark/run_benchmark_det.sh b/benchmark/run_benchmark_det.sh index 3ab3ad2b66bd052fa49a850562cff624c5ce2c22..54263e953f3f758b318df147d34ee942a247ed18 100644 --- a/benchmark/run_benchmark_det.sh +++ b/benchmark/run_benchmark_det.sh @@ -17,7 +17,7 @@ function _set_params(){ skip_steps=2 # 解析日志,有些模型前几个step耗时长,需要跳过 (必填) keyword="ips:" # 解析日志,筛选出数据所在行的关键字 (必填) index="1" - model_name=${model_item}_${run_mode}_bs${batch_size}_${fp_item} # model_item 用于yml文件名匹配,model_name 用于数据入库前端展示 + model_name=${model_item}_bs${batch_size}_${fp_item} # model_item 用于yml文件名匹配,model_name 用于数据入库前端展示 # 以下不用修改 device=${CUDA_VISIBLE_DEVICES//,/ } arr=(${device}) diff --git a/benchmark/run_det.sh b/benchmark/run_det.sh index c7755d5d76187faebc2be37a1a5bf64a3bc1381e..be0c141f7ee168d10eebb6efb57158d18ed02f72 100644 --- a/benchmark/run_det.sh +++ b/benchmark/run_det.sh @@ -2,6 +2,7 @@ # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 # 执行目录: ./PaddleOCR # 1 安装该模型需要的依赖 (如需开启优化策略请注明) +log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} python -m pip install -r requirements.txt # 2 拷贝该模型需要数据、预训练模型 wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../ @@ -12,18 +13,22 @@ wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dyg model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse) fp_item_list=(fp32) -bs_list=(8 16) for model_mode in ${model_mode_list[@]}; do for fp_item in ${fp_item_list[@]}; do + if [ ${model_mode} == "det_r50_vd_east" ]; then + bs_list=(16) + else + bs_list=(8 16) + fi for bs_item in ${bs_list[@]}; do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - log_name=ocr_${model_mode}_${run_mode}_bs${bs_item}_${fp_item} + log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode} CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 1 ${model_mode} | tee ${log_path}/${log_name}_speed_1gpus 2>&1 # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp - log_name=ocr_${model_mode}_${run_mode}_bs${bs_item}_${fp_item} + log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode} CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} | tee ${log_path}/${log_name}_speed_8gpus8p 2>&1 sleep 60 done diff --git a/deploy/slim/prune/export_prune_model.py b/deploy/slim/prune/export_prune_model.py index 2c9d0a1831c3c0de321668dfdde55aecb825ab06..f4385972009e1b5382504754dc655381f0cc7717 100644 --- a/deploy/slim/prune/export_prune_model.py +++ b/deploy/slim/prune/export_prune_model.py @@ -52,12 +52,17 @@ def main(config, device, logger, vdl_writer): config['Architecture']["Head"]['out_channels'] = char_num model = build_model(config['Architecture']) - flops = paddle.flops(model, [1, 3, 640, 640]) - logger.info(f"FLOPs before pruning: {flops}") + if config['Architecture']['model_type'] == 'det': + input_shape = [1, 3, 640, 640] + elif config['Architecture']['model_type'] == 'rec': + input_shape = [1, 3, 32, 320] + + flops = paddle.flops(model, input_shape) + logger.info("FLOPs before pruning: {}".format(flops)) from paddleslim.dygraph import FPGMFilterPruner model.train() - pruner = FPGMFilterPruner(model, [1, 3, 640, 640]) + pruner = FPGMFilterPruner(model, input_shape) # build metric eval_class = build_metric(config['Metric']) @@ -65,8 +70,13 @@ def main(config, device, logger, vdl_writer): def eval_fn(): metric = program.eval(model, valid_dataloader, post_process_class, eval_class) - logger.info(f"metric['hmean']: {metric['hmean']}") - return metric['hmean'] + if config['Architecture']['model_type'] == 'det': + main_indicator = 'hmean' + else: + main_indicator = 'acc' + logger.info("metric[{}]: {}".format(main_indicator, metric[ + main_indicator])) + return metric[main_indicator] params_sensitive = pruner.sensitive( eval_func=eval_fn, @@ -81,18 +91,22 @@ def main(config, device, logger, vdl_writer): # calculate pruned params's ratio params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02) for key in params_sensitive.keys(): - logger.info(f"{key}, {params_sensitive[key]}") + logger.info("{}, {}".format(key, params_sensitive[key])) plan = pruner.prune_vars(params_sensitive, [0]) - flops = paddle.flops(model, [1, 3, 640, 640]) - logger.info(f"FLOPs after pruning: {flops}") + flops = paddle.flops(model, input_shape) + logger.info("FLOPs after pruning: {}".format(flops)) # load pretrain model load_model(config, model) metric = program.eval(model, valid_dataloader, post_process_class, eval_class) - logger.info(f"metric['hmean']: {metric['hmean']}") + if config['Architecture']['model_type'] == 'det': + main_indicator = 'hmean' + else: + main_indicator = 'acc' + logger.info("metric['']: {}".format(main_indicator, metric[main_indicator])) # start export model from paddle.jit import to_static diff --git a/deploy/slim/prune/sensitivity_anal.py b/deploy/slim/prune/sensitivity_anal.py index c5d008779eaafef36f4264b45295ec7bc78e3d27..306f1a83ae0945614518514dcd00ca869254d5f8 100644 --- a/deploy/slim/prune/sensitivity_anal.py +++ b/deploy/slim/prune/sensitivity_anal.py @@ -73,13 +73,18 @@ def main(config, device, logger, vdl_writer): char_num = len(getattr(post_process_class, 'character')) config['Architecture']["Head"]['out_channels'] = char_num model = build_model(config['Architecture']) + if config['Architecture']['model_type'] == 'det': + input_shape = [1, 3, 640, 640] + elif config['Architecture']['model_type'] == 'rec': + input_shape = [1, 3, 32, 320] + flops = paddle.flops(model, input_shape) - flops = paddle.flops(model, [1, 3, 640, 640]) logger.info("FLOPs before pruning: {}".format(flops)) from paddleslim.dygraph import FPGMFilterPruner model.train() - pruner = FPGMFilterPruner(model, [1, 3, 640, 640]) + + pruner = FPGMFilterPruner(model, input_shape) # build loss loss_class = build_loss(config['Loss']) @@ -107,8 +112,14 @@ def main(config, device, logger, vdl_writer): def eval_fn(): metric = program.eval(model, valid_dataloader, post_process_class, eval_class, False) - logger.info("metric['hmean']: {}".format(metric['hmean'])) - return metric['hmean'] + if config['Architecture']['model_type'] == 'det': + main_indicator = 'hmean' + else: + main_indicator = 'acc' + + logger.info("metric[{}]: {}".format(main_indicator, metric[ + main_indicator])) + return metric[main_indicator] run_sensitive_analysis = False """ @@ -149,7 +160,7 @@ def main(config, device, logger, vdl_writer): plan = pruner.prune_vars(params_sensitive, [0]) - flops = paddle.flops(model, [1, 3, 640, 640]) + flops = paddle.flops(model, input_shape) logger.info("FLOPs after pruning: {}".format(flops)) # start train diff --git a/doc/doc_ch/code_and_doc.md b/doc/doc_ch/code_and_doc.md new file mode 100644 index 0000000000000000000000000000000000000000..b1d8b4b36bd45fc1574b5049ce9af808a00b7574 --- /dev/null +++ b/doc/doc_ch/code_and_doc.md @@ -0,0 +1,324 @@ +# 附录 + +本附录包含了Python、文档规范以及Pull Request流程,请各位开发者遵循相关内容 + +- [附录1:Python代码规范](#附录1) + +- [附录2:文档规范](#附录2) + +- [附录3:Pull Request说明](#附录3) + + + +## 附录1:Python代码规范 + +PaddleOCR的Python代码遵循 [PEP8规范](https://www.python.org/dev/peps/pep-0008/),其中一些关注的重点包括如下内容 + +- 空格 + + - 空格应该加在逗号、分号、冒号前,而非他们的后面 + + ```python + # 正确: + print(x, y) + + # 错误: + print(x , y) + ``` + + - 在函数中指定关键字参数或默认参数值时, 不要在其两侧使用空格 + + ```python + # 正确: + def complex(real, imag=0.0) + # 错误: + def complex(real, imag = 0.0) + ``` + +- 注释 + + - 行内注释:行内注释使用 `#` 号表示,在代码与 `#` 之间需要空两个空格, `#` 与注释之间应当空一个空格,例如 + + ```python + x = x + 1 # Compensate for border + ``` + + - 函数和方法:每个函数的定义后的描述应该包括以下内容: + + - 函数描述:函数的作用,输入输出的 + + - Args:每个参数的名字以及对该参数的描述 + - Returns:返回值的含义和类型 + + ```python + def fetch_bigtable_rows(big_table, keys, other_silly_variable=None): + """Fetches rows from a Bigtable. + + Retrieves rows pertaining to the given keys from the Table instance + represented by big_table. Silly things may happen if + other_silly_variable is not None. + + Args: + big_table: An open Bigtable Table instance. + keys: A sequence of strings representing the key of each table row + to fetch. + other_silly_variable: Another optional variable, that has a much + longer name than the other args, and which does nothing. + + Returns: + A dict mapping keys to the corresponding table row data + fetched. Each row is represented as a tuple of strings. For + example: + + {'Serak': ('Rigel VII', 'Preparer'), + 'Zim': ('Irk', 'Invader'), + 'Lrrr': ('Omicron Persei 8', 'Emperor')} + + If a key from the keys argument is missing from the dictionary, + then that row was not found in the table. + """ + pass + ``` + + + +## 附录2:文档规范 + +### 2.1 总体说明 + +- 文档位置:如果您增加的新功能可以补充在原有的Markdown文件中,请**不要重新新建**一个文件。如果您对添加的位置不清楚,可以先PR代码,然后在commit中询问官方人员。 + +- 新增Markdown文档名称:使用英文描述文档内容,一般由小写字母与下划线组合而成,例如 `add_new_algorithm.md` + +- 新增Markdown文档格式:目录 - 正文 - FAQ + + > 目录生成方法可以使用 [此网站](https://ecotrust-canada.github.io/markdown-toc/) 将md内容复制之后自动提取目录,然后在md文件的每个标题前添加 `` + +- 中英双语:任何对文档的改动或新增都需要分别在中文和英文文档上进行。 + +### 2.2 格式规范 + +- 标题格式:文档标题格式按照:阿拉伯数字小数点组合 - 空格 - 标题的格式(例如 `2.1 XXXX` , `2. XXXX`) + +- 代码块:通过代码块格式展示需要运行的代码,在代码块前描述命令参数的含义。例如: + + > 检测+方向分类器+识别全流程:设置方向分类器参数 `--use_angle_cls true` 后可对竖排文本进行识别。 + > + > ``` + > paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true + > ``` + +- 变量引用:如果在行内引用到代码变量或命令参数,需要用行内代码表示,例如上方 `--use_angle_cls true` ,并在前后各空一格 + +- 补充说明:通过引用格式 `>` 补充说明,或对注意事项进行说明 + +- 图片:如果在说明文档中增加了图片,请规范图片的命名形式(描述图片内容),并将图片添加在 `doc/` 下 + + + +## 附录3:Pull Request说明 + +### 3.1 PaddleOCR分支说明 + +PaddleOCR未来将维护2种分支,分别为: + +- release/x.x系列分支:为稳定的发行版本分支,也是默认分支。PaddleOCR会根据功能更新情况发布新的release分支,同时适配Paddle的release版本。随着版本迭代,release/x.x系列分支会越来越多,默认维护最新版本的release分支。 +- dygraph分支:为开发分支,适配Paddle动态图的dygraph版本,主要用于开发新功能。如果有同学需要进行二次开发,请选择dygraph分支。为了保证dygraph分支能在需要的时候拉出release/x.x分支,dygraph分支的代码只能使用Paddle最新release分支中有效的api。也就是说,如果Paddle dygraph分支中开发了新的api,但尚未出现在release分支代码中,那么请不要在PaddleOCR中使用。除此之外,对于不涉及api的性能优化、参数调整、策略更新等,都可以正常进行开发。 + +PaddleOCR的历史分支,未来将不再维护。考虑到一些同学可能仍在使用,这些分支还会继续保留: + +- develop分支:这个分支曾用于静态图的开发与测试,目前兼容>=1.7版本的Paddle。如果有特殊需求,要适配旧版本的Paddle,那还可以使用这个分支,但除了修复bug外不再更新代码。 + +PaddleOCR欢迎大家向repo中积极贡献代码,下面给出一些贡献代码的基本流程。 + +### 3.2 PaddleOCR代码提交流程与规范 + +> 如果你熟悉Git使用,可以直接跳转到 [3.2.10 提交代码的一些约定](#提交代码的一些约定) + +#### 3.2.1 创建你的 `远程仓库` + +- 在PaddleOCR的 [GitHub首页](https://github.com/PaddlePaddle/PaddleOCR),点击左上角 `Fork` 按钮,在你的个人目录下创建 `远程仓库`,比如`https://github.com/{your_name}/PaddleOCR`。 + +![banner](/Users/zhulingfeng01/OCR/PaddleOCR/doc/banner.png) + +- 将 `远程仓库` Clone到本地 + +``` +# 拉取develop分支的代码 +git clone https://github.com/{your_name}/PaddleOCR.git -b dygraph +cd PaddleOCR +``` + +> 多数情况下clone失败是由于网络原因,请稍后重试或配置代理 + +#### 3.2.2 和 `远程仓库` 建立连接 + +首先查看当前 `远程仓库` 的信息。 + +``` +git remote -v +# origin https://github.com/{your_name}/PaddleOCR.git (fetch) +# origin https://github.com/{your_name}/PaddleOCR.git (push) +``` + +只有clone的 `远程仓库` 的信息,也就是自己用户名下的 PaddleOCR,接下来我们创建一个原始 PaddleOCR 仓库的远程主机,命名为 upstream。 + +``` +git remote add upstream https://github.com/PaddlePaddle/PaddleOCR.git +``` + +使用 `git remote -v` 查看当前 `远程仓库` 的信息,输出如下,发现包括了origin和upstream 2个 `远程仓库` 。 + +``` +origin https://github.com/{your_name}/PaddleOCR.git (fetch) +origin https://github.com/{your_name}/PaddleOCR.git (push) +upstream https://github.com/PaddlePaddle/PaddleOCR.git (fetch) +upstream https://github.com/PaddlePaddle/PaddleOCR.git (push) +``` + +这主要是为了后续在提交pull request(PR)时,始终保持本地仓库最新。 + +#### 3.2.3 创建本地分支 + +可以基于当前分支创建新的本地分支,命令如下。 + +``` +git checkout -b new_branch +``` + +也可以基于远程或者上游的分支创建新的分支,命令如下。 + +``` +# 基于用户远程仓库(origin)的develop创建new_branch分支 +git checkout -b new_branch origin/develop +# 基于上游远程仓库(upstream)的develop创建new_branch分支 +# 如果需要从upstream创建新的分支,需要首先使用git fetch upstream获取上游代码 +git checkout -b new_branch upstream/develop +``` + +最终会显示切换到新的分支,输出信息如下 + +``` +Branch new_branch set up to track remote branch develop from upstream. +Switched to a new branch 'new_branch' +``` + +#### 3.2.4 使用pre-commit勾子 + +Paddle 开发人员使用 pre-commit 工具来管理 Git 预提交钩子。 它可以帮助我们格式化源代码(C++,Python),在提交(commit)前自动检查一些基本事宜(如每个文件只有一个 EOL,Git 中不要添加大文件等)。 + +pre-commit测试是 Travis-CI 中单元测试的一部分,不满足钩子的 PR 不能被提交到 PaddleOCR,首先安装并在当前目录运行它: + +``` +pip install pre-commit +pre-commit install +``` + + > 1. Paddle 使用 clang-format 来调整 C/C++ 源代码格式,请确保 `clang-format` 版本在 3.8 以上。 + > + > 2. 通过pip install pre-commit和conda install -c conda-forge pre-commit安装的yapf稍有不同的,PaddleOCR 开发人员使用的是 `pip install pre-commit`。 + +#### 3.2.5 修改与提交代码 + + 假设对PaddleOCR的 `README.md` 做了一些修改,可以通过 `git status` 查看改动的文件,然后使用 `git add` 添加改动文件。 + +``` +git status # 查看改动文件 +git add README.md +pre-commit +``` + +重复上述步骤,直到pre-comit格式检查不报错。如下所示。 + +[![img](https://github.com/PaddlePaddle/PaddleClas/raw/release/2.3/docs/images/quick_start/community/003_precommit_pass.png)](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/images/quick_start/community/003_precommit_pass.png) + +使用下面的命令完成提交。 + +``` +git commit -m "your commit info" +``` + +#### 3.2.6 保持本地仓库最新 + +获取 upstream 的最新代码并更新当前分支。这里的upstream来自于2.2节的`和远程仓库建立连接`部分。 + +``` +git fetch upstream +# 如果是希望提交到其他分支,则需要从upstream的其他分支pull代码,这里是develop +git pull upstream develop +``` + +#### 3.2.7 push到远程仓库 + +``` +git push origin new_branch +``` + +#### 3.2.7 提交Pull Request + +点击new pull request,选择本地分支和目标分支,如下图所示。在PR的描述说明中,填写该PR所完成的功能。接下来等待review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。 + +![banner](/Users/zhulingfeng01/OCR/PaddleOCR/doc/pr.png) + +#### 3.2.8 签署CLA协议和通过单元测试 + +- 签署CLA 在首次向PaddlePaddle提交Pull Request时,您需要您签署一次CLA(Contributor License Agreement)协议,以保证您的代码可以被合入,具体签署方式如下: + + 1. 请您查看PR中的Check部分,找到license/cla,并点击右侧detail,进入CLA网站 + + 2. 点击CLA网站中的“Sign in with GitHub to agree”,点击完成后将会跳转回您的Pull Request页面 + +#### 3.2.9 删除分支 + +- 删除远程分支 + + 在 PR 被 merge 进主仓库后,我们可以在 PR 的页面删除远程仓库的分支。 + + 也可以使用 `git push origin :分支名` 删除远程分支,如: + + ``` + git push origin :new_branch + ``` + +- 删除本地分支 + + ``` + # 切换到develop分支,否则无法删除当前分支 + git checkout develop + + # 删除new_branch分支 + git branch -D new_branch + ``` + + + +#### 3.2.10 提交代码的一些约定 + +为了使官方维护人员在评审代码时更好地专注于代码本身,请您每次提交代码时,遵守以下约定: + +1)请保证Travis-CI 中单元测试能顺利通过。如果没过,说明提交的代码存在问题,官方维护人员一般不做评审。 + +2)提交Pull Request前: + +- 请注意commit的数量。 + + 原因:如果仅仅修改一个文件但提交了十几个commit,每个commit只做了少量的修改,这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改,且不排除commit之间的修改存在相互覆盖的情况。 + + 建议:每次提交时,保持尽量少的commit,可以通过git commit --amend补充上次的commit。对已经Push到远程仓库的多个commit,可以参考[squash commits after push](https://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)。 + +- 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。 + + +3)如果解决了某个Issue的问题,请在该Pull Request的第一个评论框中加上:fix #issue_number,这样当该Pull Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)。 + +此外,在回复评审人意见时,请您遵守以下约定: + +1)官方维护人员的每一个review意见都希望得到回复,这样会更好地提升开源社区的贡献。 + +- 对评审意见同意且按其修改完的,给个简单的Done即可; +- 对评审意见不同意的,请给出您自己的反驳理由。 + +2)如果评审意见比较多: + +- 请给出总体的修改情况。 +- 请采用`start a review`进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。 \ No newline at end of file diff --git a/doc/doc_ch/detection.md b/doc/doc_ch/detection.md index cfc9d52bf280400982a9fcd9941ddc4cce3f5e5c..f76ae7f842fb6b7002e084be59dc7ccb31f39771 100644 --- a/doc/doc_ch/detection.md +++ b/doc/doc_ch/detection.md @@ -247,3 +247,7 @@ Q1: 训练模型转inference 模型之后预测效果不一致? **A**:此类问题出现较多,问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例,训练模型、inference模型预测结果不一致问题解决方式如下: - 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116),和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候,输入图像大小会影响精度,为了和论文保持一致,训练icdar15配置文件中将图像resize到[736, 1280],但是在inference model预测的时候只有一套默认参数,会考虑到预测速度问题,默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147) - 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51),和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。 + +Q1: 训练EAST模型提示找不到lanms库? + +**A**:执行pip3 install lanms-nova 即可。 diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index 4e0f1d131e2547f0d4a8bdf35c0f4a6f8bf2e7a3..c964d23117d022531d1181455a7b1c6c1d08ccae 100755 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -34,6 +34,8 @@ inference 模型(`paddle.jit.save`保存的模型) - [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理) - [2. 其他模型推理](#其他模型推理) +- [六、参数解释](参数解释) + ## 一、训练模型转inference模型 @@ -394,3 +396,127 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d 执行命令后,识别结果图像如下: ![](../imgs_results/img_10_east_starnet.jpg) + + + + +# 六、参数解释 + +更多关于预测过程的参数解释如下所示。 + +* 全局信息 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| image_dir | str | 无,必须显式指定 | 图像或者文件夹路径 | +| vis_font_path | str | "./doc/fonts/simfang.ttf" | 用于可视化的字体路径 | +| drop_score | float | 0.5 | 识别得分小于该值的结果会被丢弃,不会作为返回结果 | +| use_pdserving | bool | False | 是否使用Paddle Serving进行预测 | +| warmup | bool | False | 是否开启warmup,在统计预测耗时的时候,可以使用这种方法 | +| draw_img_save_dir | str | "./inference_results" | 系统串联预测OCR结果的保存文件夹 | +| save_crop_res | bool | False | 是否保存OCR的识别文本图像 | +| crop_res_save_dir | str | "./output" | 保存OCR识别出来的文本图像路径 | +| use_mp | bool | False | 是否开启多进程预测 | +| total_process_num | int | 6 | 开启的进城数,`use_mp`为`True`时生效 | +| process_id | int | 0 | 当前进程的id号,无需自己修改 | +| benchmark | bool | False | 是否开启benchmark,对预测速度、显存占用等进行统计 | +| save_log_path | str | "./log_output/" | 开启`benchmark`时,日志结果的保存文件夹 | +| show_log | bool | True | 是否显示预测中的日志信息 | +| use_onnx | bool | False | 是否开启onnx预测 | + + +* 预测引擎相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| use_gpu | bool | True | 是否使用GPU进行预测 | +| ir_optim | bool | True | 是否对计算图进行分析与优化,开启后可以加速预测过程 | +| use_tensorrt | bool | False | 是否开启tensorrt | +| min_subgraph_size | int | 15 | tensorrt中最小子图size,当子图的size大于该值时,才会尝试对该子图使用trt engine计算 | +| precision | str | fp32 | 预测的精度,支持`fp32`, `fp16`, `int8` 3种输入 | +| enable_mkldnn | bool | True | 是否开启mkldnn | +| cpu_threads | int | 10 | 开启mkldnn时,cpu预测的线程数 | + +* 文本检测模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_algorithm | str | "DB" | 文本检测算法名称,目前支持`DB`, `EAST`, `SAST`, `PSE` | +| det_model_dir | str | xx | 检测inference模型路径 | +| det_limit_side_len | int | 960 | 检测的图像边长限制 | +| det_limit_type | str | "max" | 检测的变成限制类型,目前支持`min`, `max`,`min`表示保证图像最短边不小于`det_limit_side_len`,`max`表示保证图像最长边不大于`det_limit_side_len` | + +其中,DB算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_db_thresh | float | 0.3 | DB输出的概率图中,得分大于该阈值的像素点才会被认为是文字像素点 | +| det_db_box_thresh | float | 0.6 | 检测结果边框内,所有像素点的平均得分大于该阈值时,该结果会被认为是文字区域 | +| det_db_unclip_ratio | float | 1.5 | `Vatti clipping`算法的扩张系数,使用该方法对文字区域进行扩张 | +| max_batch_size | int | 10 | 预测的batch size | +| use_dilation | bool | False | 是否对分割结果进行膨胀以获取更优检测效果 | +| det_db_score_mode | str | "fast" | DB的检测结果得分计算方法,支持`fast`和`slow`,`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 | + +EAST算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_east_score_thresh | float | 0.8 | EAST后处理中score map的阈值 | +| det_east_cover_thresh | float | 0.1 | EAST后处理中文本框的平均得分阈值 | +| det_east_nms_thresh | float | 0.2 | EAST后处理中nms的阈值 | + +SAST算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_sast_score_thresh | float | 0.5 | SAST后处理中的得分阈值 | +| det_sast_nms_thresh | float | 0.5 | SAST后处理中nms的阈值 | +| det_sast_polygon | bool | False | 是否多边形检测,弯曲文本场景(如Total-Text)设置为True | + +PSE算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_pse_thresh | float | 0.0 | 对输出图做二值化的阈值 | +| det_pse_box_thresh | float | 0.85 | 对box进行过滤的阈值,低于此阈值的丢弃 | +| det_pse_min_area | float | 16 | box的最小面积,低于此阈值的丢弃 | +| det_pse_box_type | str | "box" | 返回框的类型,box:四点坐标,poly: 弯曲文本的所有点坐标 | +| det_pse_scale | int | 1 | 输入图像相对于进后处理的图的比例,如`640*640`的图像,网络输出为`160*160`,scale为2的情况下,进后处理的图片shape为`320*320`。这个值调大可以加快后处理速度,但是会带来精度的下降 | + +* 文本识别模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| rec_algorithm | str | "CRNN" | 文本识别算法名称,目前支持`CRNN`, `SRN`, `RARE`, `NETR`, `SAR` | +| rec_model_dir | str | 无,如果使用识别模型,该项是必填项 | 识别inference模型路径 | +| rec_image_shape | list | [3, 32, 320] | 识别时的图像尺寸, | +| rec_batch_num | int | 6 | 识别的batch size | +| max_text_length | int | 25 | 识别结果最大长度,在`SRN`中有效 | +| rec_char_dict_path | str | "./ppocr/utils/ppocr_keys_v1.txt" | 识别的字符字典文件 | +| use_space_char | bool | True | 是否包含空格,如果为`True`,则会在最后字符字典中补充`空格`字符 | + + +* 端到端文本检测与识别模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| e2e_algorithm | str | "PGNet" | 端到端算法名称,目前支持`PGNet` | +| e2e_model_dir | str | 无,如果使用端到端模型,该项是必填项 | 端到端模型inference模型路径 | +| e2e_limit_side_len | int | 768 | 端到端的输入图像边长限制 | +| e2e_limit_type | str | "max" | 端到端的边长限制类型,目前支持`min`, `max`,`min`表示保证图像最短边不小于`e2e_limit_side_len`,`max`表示保证图像最长边不大于`e2e_limit_side_len` | +| e2e_pgnet_score_thresh | float | xx | xx | +| e2e_char_dict_path | str | "./ppocr/utils/ic15_dict.txt" | 识别的字典文件路径 | +| e2e_pgnet_valid_set | str | "totaltext" | 验证集名称,目前支持`totaltext`, `partvgg`,不同数据集对应的后处理方式不同,与训练过程保持一致即可 | +| e2e_pgnet_mode | str | "fast" | PGNet的检测结果得分计算方法,支持`fast`和`slow`,`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 | + + +* 方向分类器模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| use_angle_cls | bool | False | 是否使用方向分类器 | +| cls_model_dir | str | 无,如果需要使用,则必须显式指定路径 | 方向分类器inference模型路径 | +| cls_image_shape | list | [3, 48, 192] | 预测尺度 | +| label_list | list | ['0', '180'] | class id对应的角度值 | +| cls_batch_num | int | 6 | 方向分类器预测的batch size | +| cls_thresh | float | 0.9 | 预测阈值,模型预测结果为180度,且得分大于该阈值时,认为最终预测结果为180度,需要翻转 | diff --git a/doc/joinus.PNG b/doc/joinus.PNG index cd9de9c14beaf0be346a1f7f1d09450a0905a880..99964b62d0e8a5867d5eb7a29640f0414c7af3b2 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/ppocr/modeling/backbones/__init__.py b/ppocr/modeling/backbones/__init__.py index 169eb821f110d4a212068ebab4d46d636e241307..66b507fd24158ddf64d68dd7392f828a2e17c399 100755 --- a/ppocr/modeling/backbones/__init__.py +++ b/ppocr/modeling/backbones/__init__.py @@ -16,7 +16,7 @@ __all__ = ["build_backbone"] def build_backbone(config, model_type): - if model_type == "det": + if model_type == "det" or model_type == "table": from .det_mobilenet_v3 import MobileNetV3 from .det_resnet_vd import ResNet from .det_resnet_vd_sast import ResNet_SAST @@ -36,10 +36,6 @@ def build_backbone(config, model_type): elif model_type == "e2e": from .e2e_resnet_vd_pg import ResNet support_dict = ["ResNet"] - elif model_type == "table": - from .table_resnet_vd import ResNet - from .table_mobilenet_v3 import MobileNetV3 - support_dict = ["ResNet", "MobileNetV3"] else: raise NotImplementedError diff --git a/ppocr/modeling/backbones/rec_mobilenet_v3.py b/ppocr/modeling/backbones/rec_mobilenet_v3.py index c5dcfdd5a3ad1f2c356f488a89e0f1e660a4a832..917e000d94ea01ce0057e08c1f4839240561a368 100644 --- a/ppocr/modeling/backbones/rec_mobilenet_v3.py +++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py @@ -26,8 +26,10 @@ class MobileNetV3(nn.Layer): scale=0.5, large_stride=None, small_stride=None, + disable_se=False, **kwargs): super(MobileNetV3, self).__init__() + self.disable_se = disable_se if small_stride is None: small_stride = [2, 2, 2, 2] if large_stride is None: @@ -101,6 +103,7 @@ class MobileNetV3(nn.Layer): block_list = [] inplanes = make_divisible(inplanes * scale) for (k, exp, c, se, nl, s) in cfg: + se = se and not self.disable_se block_list.append( ResidualUnit( in_channels=inplanes, diff --git a/ppocr/modeling/backbones/table_mobilenet_v3.py b/ppocr/modeling/backbones/table_mobilenet_v3.py deleted file mode 100644 index daa87f976038d8d5eeafadceb869b9232ba22cd9..0000000000000000000000000000000000000000 --- a/ppocr/modeling/backbones/table_mobilenet_v3.py +++ /dev/null @@ -1,287 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - -__all__ = ['MobileNetV3'] - - -def make_divisible(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class MobileNetV3(nn.Layer): - def __init__(self, - in_channels=3, - model_name='large', - scale=0.5, - disable_se=False, - **kwargs): - """ - the MobilenetV3 backbone network for detection module. - Args: - params(dict): the super parameters for build network - """ - super(MobileNetV3, self).__init__() - - self.disable_se = disable_se - - if model_name == "large": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', 1], - [3, 64, 24, False, 'relu', 2], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', 2], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hardswish', 2], - [3, 200, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 480, 112, True, 'hardswish', 1], - [3, 672, 112, True, 'hardswish', 1], - [5, 672, 160, True, 'hardswish', 2], - [5, 960, 160, True, 'hardswish', 1], - [5, 960, 160, True, 'hardswish', 1], - ] - cls_ch_squeeze = 960 - elif model_name == "small": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', 2], - [3, 72, 24, False, 'relu', 2], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hardswish', 2], - [5, 240, 40, True, 'hardswish', 1], - [5, 240, 40, True, 'hardswish', 1], - [5, 120, 48, True, 'hardswish', 1], - [5, 144, 48, True, 'hardswish', 1], - [5, 288, 96, True, 'hardswish', 2], - [5, 576, 96, True, 'hardswish', 1], - [5, 576, 96, True, 'hardswish', 1], - ] - cls_ch_squeeze = 576 - else: - raise NotImplementedError("mode[" + model_name + - "_model] is not implemented!") - - supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] - assert scale in supported_scale, \ - "supported scale are {} but input scale is {}".format(supported_scale, scale) - inplanes = 16 - # conv1 - self.conv = ConvBNLayer( - in_channels=in_channels, - out_channels=make_divisible(inplanes * scale), - kernel_size=3, - stride=2, - padding=1, - groups=1, - if_act=True, - act='hardswish', - name='conv1') - - self.stages = [] - self.out_channels = [] - block_list = [] - i = 0 - inplanes = make_divisible(inplanes * scale) - for (k, exp, c, se, nl, s) in cfg: - se = se and not self.disable_se - start_idx = 2 if model_name == 'large' else 0 - if s == 2 and i > start_idx: - self.out_channels.append(inplanes) - self.stages.append(nn.Sequential(*block_list)) - block_list = [] - block_list.append( - ResidualUnit( - in_channels=inplanes, - mid_channels=make_divisible(scale * exp), - out_channels=make_divisible(scale * c), - kernel_size=k, - stride=s, - use_se=se, - act=nl, - name="conv" + str(i + 2))) - inplanes = make_divisible(scale * c) - i += 1 - block_list.append( - ConvBNLayer( - in_channels=inplanes, - out_channels=make_divisible(scale * cls_ch_squeeze), - kernel_size=1, - stride=1, - padding=0, - groups=1, - if_act=True, - act='hardswish', - name='conv_last')) - self.stages.append(nn.Sequential(*block_list)) - self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) - for i, stage in enumerate(self.stages): - self.add_sublayer(sublayer=stage, name="stage{}".format(i)) - - def forward(self, x): - x = self.conv(x) - out_list = [] - for stage in self.stages: - x = stage(x) - out_list.append(x) - return out_list - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=None, - param_attr=ParamAttr(name=name + "_bn_scale"), - bias_attr=ParamAttr(name=name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hardswish": - x = F.hardswish(x) - else: - print("The activation function({}) is selected incorrectly.". - format(self.act)) - exit() - return x - - -class ResidualUnit(nn.Layer): - def __init__(self, - in_channels, - mid_channels, - out_channels, - kernel_size, - stride, - use_se, - act=None, - name=''): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_channels == out_channels - self.if_se = use_se - - self.expand_conv = ConvBNLayer( - in_channels=in_channels, - out_channels=mid_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + "_expand") - self.bottleneck_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=mid_channels, - kernel_size=kernel_size, - stride=stride, - padding=int((kernel_size - 1) // 2), - groups=mid_channels, - if_act=True, - act=act, - name=name + "_depthwise") - if self.if_se: - self.mid_se = SEModule(mid_channels, name=name + "_se") - self.linear_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name=name + "_linear") - - def forward(self, inputs): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = paddle.add(inputs, x) - return x - - -class SEModule(nn.Layer): - def __init__(self, in_channels, reduction=4, name=""): - super(SEModule, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels // reduction, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name=name + "_1_weights"), - bias_attr=ParamAttr(name=name + "_1_offset")) - self.conv2 = nn.Conv2D( - in_channels=in_channels // reduction, - out_channels=in_channels, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name + "_2_weights"), - bias_attr=ParamAttr(name=name + "_2_offset")) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) - return inputs * outputs \ No newline at end of file diff --git a/ppocr/modeling/backbones/table_resnet_vd.py b/ppocr/modeling/backbones/table_resnet_vd.py deleted file mode 100644 index 1c07c2684eec8d0c4a445cc88c543bfe1da9c864..0000000000000000000000000000000000000000 --- a/ppocr/modeling/backbones/table_resnet_vd.py +++ /dev/null @@ -1,280 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, - 1024] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=2, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(bottleneck_block) - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(basic_block) - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - out = [] - for block in self.stages: - y = block(y) - out.append(y) - return out diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py index 4db34f7b4833c1c9b2901c68899bfb294b5843c4..043bb56b8a526c12b2e0799bf41e128c6499c1fc 100644 --- a/ppocr/modeling/transforms/tps_spatial_transformer.py +++ b/ppocr/modeling/transforms/tps_spatial_transformer.py @@ -53,7 +53,7 @@ def compute_partial_repr(input_points, control_points): 1] repr_matrix = 0.5 * pairwise_dist * paddle.log(pairwise_dist) # fix numerical error for 0 * log(0), substitute all nan with 0 - mask = repr_matrix != repr_matrix + mask = np.array(repr_matrix != repr_matrix) repr_matrix[mask] = 0 return repr_matrix diff --git a/ppocr/postprocess/east_postprocess.py b/ppocr/postprocess/east_postprocess.py index ec6bf663854d3391bf8c584aa749dc6d1805d344..c194c81c6911aac0f9210109c37b76b44532e9c4 100755 --- a/ppocr/postprocess/east_postprocess.py +++ b/ppocr/postprocess/east_postprocess.py @@ -20,7 +20,6 @@ import numpy as np from .locality_aware_nms import nms_locality import cv2 import paddle -import lanms import os import sys @@ -61,6 +60,7 @@ class EASTPostProcess(object): """ restore text boxes from score map and geo map """ + score_map = score_map[0] geo_map = np.swapaxes(geo_map, 1, 0) geo_map = np.swapaxes(geo_map, 1, 2) @@ -76,8 +76,15 @@ class EASTPostProcess(object): boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] - boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) - # boxes = nms_locality(boxes.astype(np.float64), nms_thresh) + + try: + import lanms + boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) + except: + print( + 'you should install lanms by pip3 install lanms-nova to speed up nms_locality' + ) + boxes = nms_locality(boxes.astype(np.float64), nms_thresh) if boxes.shape[0] == 0: return [] # Here we filter some low score boxes by the average score map, diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 4b890f6fa352772e6ebe1614b798e1ce69cdd17c..f6013a406634ed110ea5af613a5f31e56ce90ead 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -67,6 +67,7 @@ def load_model(config, model, optimizer=None): if key not in params: logger.warning("{} not in loaded params {} !".format( key, params.keys())) + continue pre_value = params[key] if list(value.shape) == list(pre_value.shape): new_state_dict[key] = pre_value @@ -76,9 +77,14 @@ def load_model(config, model, optimizer=None): format(key, value.shape, pre_value.shape)) model.set_state_dict(new_state_dict) - optim_dict = paddle.load(checkpoints + '.pdopt') if optimizer is not None: - optimizer.set_state_dict(optim_dict) + if os.path.exists(checkpoints + '.pdopt'): + optim_dict = paddle.load(checkpoints + '.pdopt') + optimizer.set_state_dict(optim_dict) + else: + logger.warning( + "{}.pdopt is not exists, params of optimizer is not loaded". + format(checkpoints)) if os.path.exists(checkpoints + '.states'): with open(checkpoints + '.states', 'rb') as f: diff --git a/requirements.txt b/requirements.txt index 903b8eda055573621f5d5479e85b17986b702ead..0c87c5c95069a2699f5a3a50320c883c6118ffe7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,4 @@ cython lxml premailer openpyxl -fasttext==0.9.1 -lanms-nova \ No newline at end of file +fasttext==0.9.1 \ No newline at end of file diff --git a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt index 2cd2ba5f1e8198cacadab653d3979d5a1662f9ea..fcac6e3984cf3fd45fec9f7b736f794289278b25 100644 --- a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -1,16 +1,16 @@ -===========================ch_ppocr_mobile_v2.0=========================== +===========================ch_PP-OCRv2=========================== model_name:ch_PP-OCRv2 python:python3.7 infer_model:./inference/ch_PP-OCRv2_det_infer/ infer_export:null -infer_quant:True +infer_quant:False inference:tools/infer/predict_system.py ---use_gpu:False ---enable_mkldnn:False +--use_gpu:False|True +--enable_mkldnn:False|True --cpu_threads:1|6 --rec_batch_num:1 ---use_tensorrt:False ---precision:int8 +--use_tensorrt:False|True +--precision:fp32 --det_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ --rec_model_dir:./inference/ch_PP-OCRv2_rec_infer/ diff --git a/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt index 0ff24cbccfe282c12982714b5d079b0031703a04..1aad65b687992155133ed11533a14f642510361d 100644 --- a/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -1,15 +1,17 @@ ===========================kl_quant_params=========================== model_name:PPOCRv2_ocr_det_kl python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null infer_model:./inference/ch_PP-OCRv2_det_infer/ infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o infer_quant:True inference:tools/infer/predict_det.py ---use_gpu:False ---enable_mkldnn:False +--use_gpu:False|True +--enable_mkldnn:True --cpu_threads:1|6 --rec_batch_num:1 ---use_tensorrt:False +--use_tensorrt:False|True --precision:int8 --det_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ diff --git a/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt index 8826bb4f078d518a79748f9cb305268c5ec2c198..083a3ae26e726e290ffde4095821cbf3c40f7178 100644 --- a/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -1,15 +1,17 @@ ===========================kl_quant_params=========================== model_name:PPOCRv2_ocr_rec_kl python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null infer_model:./inference/ch_PP-OCRv2_rec_infer/ infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o infer_quant:True inference:tools/infer/predict_rec.py ---use_gpu:False ---enable_mkldnn:False +--use_gpu:False|True +--enable_mkldnn:False|True --cpu_threads:1|6 --rec_batch_num:1|6 ---use_tensorrt:False +--use_tensorrt:True --precision:int8 --rec_model_dir: --image_dir:./inference/rec_inference diff --git a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt index 77889729e61a4b859895ee0de52c92ed258ace31..92ac3e9d37460a7f299f5cc2929a9bcaabdc34ef 100644 --- a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt @@ -4,7 +4,7 @@ python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:null -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null @@ -15,7 +15,7 @@ null:null trainer:fpgm_train norm_train:null pact_train:null -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy distill_train:null null:null null:null @@ -29,7 +29,7 @@ Global.save_inference_dir:./output/ Global.pretrained_model: norm_export:null quant_export:null -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o distill_export:null export1:null export2:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt index eea9d789dd4919fe8112d337e48b82fabacfc57a..4a46f0cf09dcf2bb812910f0cf322dda0749b87c 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -3,14 +3,14 @@ model_name:ch_ppocr_mobile_v2.0 python:python3.7 infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ infer_export:null -infer_quant:True +infer_quant:False inference:tools/infer/predict_system.py ---use_gpu:False ---enable_mkldnn:False +--use_gpu:False|True +--enable_mkldnn:False|True --cpu_threads:1|6 --rec_batch_num:1 ---use_tensorrt:False ---precision:int8 +--use_tensorrt:False|True +--precision:fp32 --det_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ --rec_model_dir:./inference/ch_ppocr_mobile_v2.0_rec_infer/ diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt index 46093302972aec7f0e0443b33482af06d98bbf4f..977312f2a49e76d92e4edc11f8f0d3ecf866999a 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt @@ -4,7 +4,7 @@ python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:null -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt index 4001ca18284b703b92a6998d2218df3f003c74d3..014dad5fc9d87c08a0725f57127f8bf2cb248be3 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt @@ -4,7 +4,7 @@ python:python gpu_list:-1 Global.use_gpu:False Global.auto_cast:null -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null @@ -12,10 +12,10 @@ train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## -trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null distill_train:null null:null null:null @@ -27,9 +27,9 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o +norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +quant_export:null +fpgm_export:null distill_export:null export1:null export2:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt index 0f4faee4b32925b4d0780ece6838c176238c7000..6a63b39d976c0e9693deec097c37eb0ff212d8af 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt @@ -4,7 +4,7 @@ python:python gpu_list:0 Global.use_gpu:True Global.auto_cast:fp32|amp -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null @@ -12,10 +12,10 @@ train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## -trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null distill_train:null null:null null:null @@ -27,9 +27,9 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o +norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +quant_export:null +fpgm_export:null distill_export:null export1:null export2:null @@ -49,63 +49,4 @@ inference:tools/infer/predict_det.py null:null --benchmark:True null:null -===========================cpp_infer_params=========================== -use_opencv:True -infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ -infer_quant:False -inference:./deploy/cpp_infer/build/ppocr det ---use_gpu:True|False ---enable_mkldnn:True|False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False|True ---precision:fp32|fp16 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -===========================serving_params=========================== -model_name:ocr_det -python:python3.7 -trans_model:-m paddle_serving_client.convert ---dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/ ---model_filename:inference.pdmodel ---params_filename:inference.pdiparams ---serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/ ---serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/ -serving_dir:./deploy/pdserving -web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1 -op.det.local_service_conf.devices:null|0 -op.det.local_service_conf.use_mkldnn:True|False -op.det.local_service_conf.thread_num:1|6 -op.det.local_service_conf.use_trt:False|True -op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py|pipeline_rpc_client.py ---image_dir=../../doc/imgs -===========================kl_quant_params=========================== -infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ -infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o -infer_quant:True -inference:tools/infer/predict_det.py ---use_gpu:True|False ---enable_mkldnn:True|False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False|True ---precision:int8 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -null:null -null:null -===========================lite_params=========================== -inference:./ocr_db_crnn det -infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb ---cpu_threads:1|4 ---batch_size:1 ---power_mode:LITE_POWER_HIGH|LITE_POWER_LOW ---image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg ---config_dir:./config.txt ---rec_dict_dir:./ppocr_keys_v1.txt ---benchmark:True + diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt index bd58e964033243c00e7a270d642f97ced7659114..1039dcad06d63bb1fc1a47b7cc4760cd8d75ed63 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -1,15 +1,17 @@ ===========================kl_quant_params=========================== -model_name:PPOCRv2_ocr_det +model_name:ch_ppocr_mobile_v2.0_det_KL python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o infer_quant:True inference:tools/infer/predict_det.py ---use_gpu:False ---enable_mkldnn:False +--use_gpu:False|True +--enable_mkldnn:True --cpu_threads:1|6 --rec_batch_num:1 ---use_tensorrt:False +--use_tensorrt:False|True --precision:int8 --det_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt index 7328be25ffd0ffa0abac83ec80e46be42ff93185..8a6c6568584250d269acfe63aef43ef66410fd99 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt @@ -4,7 +4,7 @@ python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:null -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null @@ -14,7 +14,7 @@ null:null ## trainer:pact_train norm_train:null -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o fpgm_train:null distill_train:null null:null @@ -28,7 +28,7 @@ null:null Global.save_inference_dir:./output/ Global.pretrained_model: norm_export:null -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o fpgm_export:null distill_export:null export1:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..ee42dbfd0c80667ae5c3da4ee6df6416e1908388 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,102 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + disable_se: True + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..a968573d4410f3d474788cb5f6ab414c5d02aae3 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_FPGM +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:fpgm_train +norm_train:null +pact_train:null +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./pretrain_models/ch_ppocr_mobile_v2.0_rec_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:null +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..92f33c58c9e97347e53b778bde5a21472b769f36 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,21 @@ +===========================kl_quant_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_KL +python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null +infer_model:./inference/ch_ppocr_mobile_v2.0_rec_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_rec.py +--use_gpu:False|True +--enable_mkldnn:True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..b06dafe7fdc01eadeee51e70dfa4e8c675bda531 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..b06dafe7fdc01eadeee51e70dfa4e8c675bda531 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..7bbdd58ae13eca00623123cf2ca39d3b76daa72a --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_PACT +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.checkpoints:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -ctest_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt index 5a93571a76366de191d2fb1736aa3ff4c71b1737..92d7031e884d10df3a5c98bf675d64d63b3cb335 100644 --- a/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -1,16 +1,16 @@ -===========================ch_ppocr_mobile_v2.0=========================== +===========================ch_ppocr_server_v2.0=========================== model_name:ch_ppocr_server_v2.0 python:python3.7 infer_model:./inference/ch_ppocr_server_v2.0_det_infer/ infer_export:null infer_quant:True inference:tools/infer/predict_system.py ---use_gpu:False ---enable_mkldnn:False +--use_gpu:False|True +--enable_mkldnn:False|True --cpu_threads:1|6 --rec_batch_num:1 --use_tensorrt:False ---precision:int8 +--precision:fp32 --det_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ --rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/ diff --git a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt index f35bb3876737606f58949968d5b43f3bd796be30..f10985a91902716968660af5188473e4f1a7ae3d 100644 --- a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt @@ -37,13 +37,13 @@ export2:null infer_model:null infer_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o infer_quant:False -inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +inference:tools/infer/predict_rec.py --use_gpu:True|False --enable_mkldnn:True|False --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..d37fdcfbb5b27404403674d99c1b8abe8cd65e85 --- /dev/null +++ b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml @@ -0,0 +1,135 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_mv3_pse/ + save_epoch_step: 600 + # evaluation is run every 63 iterations + eval_batch_step: [ 0,1000 ] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: null + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: FPN + out_channels: 96 + Head: + name: PSEHead + hidden_dim: 96 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 16 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..f9909027f10d9e9f96d65f9f5a1c5f3fd5c9e1c6 --- /dev/null +++ b/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_mv3_pse_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_mv3_pse/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:PSE diff --git a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt index e9eaa779520f78622509153482fd6a84322c9cc5..dfb376237ee35c277fcd86a88328c562d5c0429a 100644 --- a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt +++ b/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt @@ -34,7 +34,7 @@ distill_export:null export1:null export2:null ## -train_model:./inference/det_mv3_east/best_accuracy +train_model:./inference/det_r50_vd_east/best_accuracy infer_export:tools/export_model.py -c test_tipc/cconfigs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o infer_quant:False inference:tools/infer/predict_det.py diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml b/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ebc4252718d5572837eac58061bf6f9eb35bf73 --- /dev/null +++ b/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml @@ -0,0 +1,134 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_r50_vd_pse/ + save_epoch_step: 600 + # evaluation is run every 125 iterations + eval_batch_step: [ 0,1000 ] + cal_metric_during_train: False + pretrained_model: + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: + Backbone: + name: ResNet + layers: 50 + Neck: + name: FPN + out_channels: 256 + Head: + name: PSEHead + hidden_dim: 256 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.0001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ab6d45d7c1eb5e3c17fd53a8c8c504812c1012c --- /dev/null +++ b/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_vd_pse_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_r50_vd_pse/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:PSE diff --git a/test_tipc/configs/en_server_pgnetA/train_infer_python.txt b/test_tipc/configs/en_server_pgnetA/train_infer_python.txt index c7b2d1b0a712693b666cd0b40cff4a8871084aa6..d70776998c4e326905920586e90f2833fe42e89b 100644 --- a/test_tipc/configs/en_server_pgnetA/train_infer_python.txt +++ b/test_tipc/configs/en_server_pgnetA/train_infer_python.txt @@ -44,7 +44,7 @@ inference:tools/infer/predict_e2e.py --rec_batch_num:1 --use_tensorrt:False|True --precision:fp32|fp16|int8 ---det_model_dir: +--e2e_model_dir: --image_dir:./inference/ch_det_data_50/all-sum-510/ null:null --benchmark:True diff --git a/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml b/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml new file mode 100644 index 0000000000000000000000000000000000000000..15119bb2a9de02c19684d21ad5a1859db94895ce --- /dev/null +++ b/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml @@ -0,0 +1,103 @@ +Global: + use_gpu: True + epoch_num: 21 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/nrtr/ + save_epoch_step: 1 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: ppocr/utils/EN_symbol_dict.txt + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_nrtr.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.99 + clip_norm: 5.0 + lr: + name: Cosine + learning_rate: 0.0005 + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0. + +Architecture: + model_type: rec + algorithm: NRTR + in_channels: 1 + Transform: + Backbone: + name: MTB + cnn_num: 2 + Head: + name: Transformer + d_model: 512 + num_encoder_layers: 6 + beam_size: -1 # When Beam size is greater than 0, it means to use beam search when evaluation. + + +Loss: + name: NRTRLoss + smoothing: True + +PostProcess: + name: NRTRLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - NRTRLabelEncode: # Class handling label + - NRTRRecResizeImg: + image_shape: [100, 32] + resize_type: PIL # PIL or OpenCV + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 512 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - NRTRLabelEncode: # Class handling label + - NRTRRecResizeImg: + image_shape: [100, 32] + resize_type: PIL # PIL or OpenCV + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 1 + use_shared_memory: False diff --git a/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..67630d858c7633daf8e1800b1ab10adb86e6c3bc --- /dev/null +++ b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_mtb_nrtr +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/EN_symbol_dict.txt --rec_image_shape="1,32,100" --rec_algorithm="NRTR" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt index 698c14ee66914bea7a6926650234ad7a979d01b4..fdc39f1b851a4e05735744f878917a3dfcc1d405 100644 --- a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt +++ b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt @@ -43,7 +43,7 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt index 53250c57e49967b9816b62dae07c64c0d22d58af..9810689679903b4cedff253834a1a999c4e8a5f8 100644 --- a/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt +++ b/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt @@ -43,7 +43,7 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml new file mode 100644 index 0000000000000000000000000000000000000000..2b14c047d4645104fb9532a1b391072dc341f3b7 --- /dev/null +++ b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml @@ -0,0 +1,103 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/rec_mv3_tps_bilstm_att/ + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_mv3_tps_bilstm_att.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: RARE + Transform: + name: TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: small + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 96 + Head: + name: AttentionHead + hidden_size: 96 + + +Loss: + name: AttentionLoss + +PostProcess: + name: AttnLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 1 diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..3791aa17b2b5a16565ab3456932e43fd77254472 --- /dev/null +++ b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_mv3_tps_bilstm_att_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" --rec_algorithm="RARE" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt index 5de24ae5835f91ee2b4a6d7f816197ce694002f6..33700ad696394ad9404a5424cddf93608220917a 100644 --- a/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt +++ b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt @@ -43,7 +43,7 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/rec_r31_sar/rec_r31_sar.yml b/test_tipc/configs/rec_r31_sar/rec_r31_sar.yml new file mode 100644 index 0000000000000000000000000000000000000000..36bc3c5d12c55de574507cd613da772bbe0d2ced --- /dev/null +++ b/test_tipc/configs/rec_r31_sar/rec_r31_sar.yml @@ -0,0 +1,98 @@ +Global: + use_gpu: true + epoch_num: 5 + log_smooth_window: 20 + print_batch_step: 20 + save_model_dir: ./sar_rec + save_epoch_step: 1 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: + # for data or label process + character_dict_path: ppocr/utils/dict90.txt + max_text_length: 30 + infer_mode: False + use_space_char: False + rm_symbol: True + save_res_path: ./output/rec/predicts_sar.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Piecewise + decay_epochs: [3, 4] + values: [0.001, 0.0001, 0.00001] + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: SAR + Transform: + Backbone: + name: ResNet31 + Head: + name: SARHead + +Loss: + name: SARLoss + +PostProcess: + name: SARLabelDecode + +Metric: + name: RecMetric + + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SARLabelEncode: # Class handling label + - SARRecResizeImg: + image_shape: [3, 48, 48, 160] # h:48 w:[48,160] + width_downsample_ratio: 0.25 + - KeepKeys: + keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 64 + drop_last: True + num_workers: 8 + use_shared_memory: False + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SARLabelEncode: # Class handling label + - SARRecResizeImg: + image_shape: [3, 48, 48, 160] + width_downsample_ratio: 0.25 + - KeepKeys: + keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 64 + num_workers: 4 + use_shared_memory: False + diff --git a/test_tipc/configs/rec_r31_sar/train_infer_python.txt b/test_tipc/configs/rec_r31_sar/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..5cc31b7b8b793e7c82f6676f1fec9a5e8b2393f4 --- /dev/null +++ b/test_tipc/configs/rec_r31_sar/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_r31_sar +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict90.txt --rec_image_shape="3,48,48,160" --rec_algorithm="SAR" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt index baf306e1897c9f0f65ebe45747738b18173dd286..857a212fe6f5e0bd9612b55841e748e6b4409061 100644 --- a/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt +++ b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt @@ -43,7 +43,7 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt index 0d54bd5647d9cd4d8b1ffec5b2baa99874cff7f6..85804b7453729dad6d2e87d0efd1a053dd9a0aac 100644 --- a/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt +++ b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt @@ -43,7 +43,7 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml new file mode 100644 index 0000000000000000000000000000000000000000..5dd797b0ec742932ca7f85353b9ea4c5eb637edd --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml @@ -0,0 +1,102 @@ +Global: + use_gpu: True + epoch_num: 400 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/b3_rare_r34_none_gru/ + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_b3_rare_r34_none_gru.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0.00000 + +Architecture: + model_type: rec + algorithm: RARE + Transform: + name: TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: large + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 256 #96 + Head: + name: AttentionHead # AttentionHead + hidden_size: 256 # + l2_decay: 0.00001 + +Loss: + name: AttentionLoss + +PostProcess: + name: AttnLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..e816868f33de7ca8794068e8498f6f7845df0324 --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_r34_vd_tps_bilstm_att_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" --rec_algorithm="RARE" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt index 666c7d159b4cecb262197b3373ba3df125037d67..bb49ae5977208b2921f4a825b62afa7935f572f1 100644 --- a/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt @@ -43,7 +43,7 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic --cpu_threads:1|6 --rec_batch_num:1|6 --use_tensorrt:True|False ---precision:fp32|fp16|int8 +--precision:fp32|int8 --rec_model_dir: --image_dir:./inference/rec_inference --save_log_path:./test/output/ diff --git a/test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml b/test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml new file mode 100644 index 0000000000000000000000000000000000000000..41e525205d2b047934a69a8b41a5e7d776990097 --- /dev/null +++ b/test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml @@ -0,0 +1,108 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 5 + save_model_dir: ./output/rec/srn_new + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 5000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + max_text_length: 25 + num_heads: 8 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_srn.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + clip_norm: 10.0 + lr: + learning_rate: 0.0001 + +Architecture: + model_type: rec + algorithm: SRN + in_channels: 1 + Transform: + Backbone: + name: ResNetFPN + Head: + name: SRNHead + max_text_length: 25 + num_heads: 8 + num_encoder_TUs: 2 + num_decoder_TUs: 4 + hidden_dims: 512 + +Loss: + name: SRNLoss + +PostProcess: + name: SRNLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SRNLabelEncode: # Class handling label + - SRNRecResizeImg: + image_shape: [1, 64, 256] + - KeepKeys: + keep_keys: ['image', + 'label', + 'length', + 'encoder_word_pos', + 'gsrm_word_pos', + 'gsrm_slf_attn_bias1', + 'gsrm_slf_attn_bias2'] # dataloader will return list in this order + loader: + shuffle: False + batch_size_per_card: 64 + drop_last: False + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SRNLabelEncode: # Class handling label + - SRNRecResizeImg: + image_shape: [1, 64, 256] + - KeepKeys: + keep_keys: ['image', + 'label', + 'length', + 'encoder_word_pos', + 'gsrm_word_pos', + 'gsrm_slf_attn_bias1', + 'gsrm_slf_attn_bias2'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 32 + num_workers: 4 diff --git a/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt b/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3549c635f267cdb0b494341e9f250669cd74bfe --- /dev/null +++ b/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_r50_fpn_vd_none_srn +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="1,64,256" --rec_algorithm="SRN" --use_space_char=False +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/docs/jeston_test_train_inference_python.md b/test_tipc/docs/jeston_test_train_inference_python.md index e23aa7651da8b57c9f5e92338bb21dbde2ccda05..d96505985ea8a291b3579acb2aaee1b3d66c1baa 100644 --- a/test_tipc/docs/jeston_test_train_inference_python.md +++ b/test_tipc/docs/jeston_test_train_inference_python.md @@ -1,6 +1,6 @@ # Jeston端基础训练预测功能测试 -Jeston端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,由于Jeston端CPU较差,Jeston只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 +Jeston端基础训练预测功能测试的主程序为`test_inference_inference.sh`,由于Jeston端CPU较差,Jeston只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 ## 1. 测试结论汇总 @@ -40,21 +40,21 @@ Jeston端基础训练预测功能测试的主程序为`test_train_inference_pyth ### 2.2 功能测试 -先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 +先运行`prepare.sh`准备数据和模型,然后运行`test_inference_inference.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 -`test_train_inference_python.sh`包含5种[运行模式](./test_train_inference_python.md),在Jeston端,仅需要测试预测推理的模式即可: +`test_inference_inference.sh`仅有一个模式`whole_infer`,在Jeston端,仅需要测试预测推理的模式即可: ``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' # 用法1: -bash test_tipc/test_inference_jeston.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' +bash test_tipc/test_inference_inference.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 bash test_tipc/test_inference_jeston.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' '1' ``` -运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: +运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`whole_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: ``` test_tipc/output/ |- results_python.log # 运行指令状态的日志 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 9854da77170e5c0a6a9ed5b8dee7c267b39ce833..d152ef29d0a2983e656f9868147158a3b7e66aa5 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -45,7 +45,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/total_text_lite.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate cd ./pretrain_models/ && tar xf en_server_pgnetA.tar && cd ../ - cd ./train_data && tar xf total_text_lite.tar && ln -s total_text && cd ../ + cd ./train_data && tar xf total_text_lite.tar && ln -s total_text_lite total_text && cd ../ fi if [ ${model_name} == "det_r50_vd_sast_icdar15_v2.0" ] || [ ${model_name} == "det_r50_vd_sast_totaltext_v2.0" ]; then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate @@ -61,6 +61,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar --no-check-certificate cd ./inference/ && tar xf det_r50_vd_db_v2.0_train.tar && cd ../ fi + if [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_FPGM" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar --no-check-certificate + cd ./pretrain_models/ && tar xf ch_ppocr_mobile_v2.0_rec_train.tar && cd ../ + fi elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate @@ -137,11 +141,6 @@ elif [ ${MODE} = "whole_infer" ];then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ fi - if [ ${model_name} = "ch_PPOCRv2_det" ]; then - wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate - wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/e2e_server_pgnetA_infer.tar --no-check-certificate - cd ./inference && tar xf e2e_server_pgnetA_infer.tar && tar xf ch_det_data_50.tar && cd ../ - fi if [ ${model_name} == "en_server_pgnetA" ]; then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate cd ./inference && tar xf en_server_pgnetA.tar && cd ../ @@ -160,7 +159,10 @@ elif [ ${MODE} = "whole_infer" ];then fi fi if [ ${MODE} = "klquant_whole_infer" ]; then - if [ ${model_name} = "ch_ppocr_mobile_v2.0_det" ]; then + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar --no-check-certificate + cd ./train_data/ && tar xf icdar2015_lite.tar + ln -s ./icdar2015_lite ./icdar2015 && cd ../ + if [ ${model_name} = "ch_ppocr_mobile_v2.0_det_KL" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../ @@ -171,6 +173,13 @@ if [ ${MODE} = "klquant_whole_infer" ]; then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ fi + if [ ${model_name} = "ch_ppocr_mobile_v2.0_rec_KL" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar --no-check-certificate + cd ./train_data/ && tar xf ic15_data.tar && cd ../ + cd ./inference && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf rec_inference.tar && cd ../ + fi fi if [ ${MODE} = "cpp_infer" ];then diff --git a/test_tipc/test_inference_jeston.sh b/test_tipc/test_inference_jeston.sh deleted file mode 100644 index 2fd76e1e9e7e8c7b52d0b6838cd15840a59fe5c4..0000000000000000000000000000000000000000 --- a/test_tipc/test_inference_jeston.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash -source test_tipc/common_func.sh -source test_tipc/test_train_inference_python.sh - -FILENAME=$1 -# MODE be one of ['whole_infer'] -MODE=$2 - -dataline=$(awk 'NR==1, NR==17{print}' $FILENAME) - -# parser params -IFS=$'\n' -lines=(${dataline}) - -model_name=$(func_parser_value "${lines[1]}") -python=$(func_parser_value "${lines[2]}") - -infer_model_dir_list=$(func_parser_value "${lines[3]}") -infer_export_list=$(func_parser_value "${lines[4]}") -infer_is_quant=$(func_parser_value "${lines[5]}") -# parser inference -inference_py=$(func_parser_value "${lines[6]}") -use_gpu_key=$(func_parser_key "${lines[7]}") -use_gpu_list=$(func_parser_value "${lines[7]}") -use_mkldnn_key=$(func_parser_key "${lines[8]}") -use_mkldnn_list=$(func_parser_value "${lines[8]}") -cpu_threads_key=$(func_parser_key "${lines[9]}") -cpu_threads_list=$(func_parser_value "${lines[9]}") -batch_size_key=$(func_parser_key "${lines[10]}") -batch_size_list=$(func_parser_value "${lines[10]}") -use_trt_key=$(func_parser_key "${lines[11]}") -use_trt_list=$(func_parser_value "${lines[11]}") -precision_key=$(func_parser_key "${lines[12]}") -precision_list=$(func_parser_value "${lines[12]}") -infer_model_key=$(func_parser_key "${lines[13]}") -image_dir_key=$(func_parser_key "${lines[14]}") -infer_img_dir=$(func_parser_value "${lines[14]}") -save_log_key=$(func_parser_key "${lines[15]}") -benchmark_key=$(func_parser_key "${lines[16]}") -benchmark_value=$(func_parser_value "${lines[16]}") -infer_key1=$(func_parser_key "${lines[17]}") -infer_value1=$(func_parser_value "${lines[17]}") - - -LOG_PATH="./test_tipc/output" -mkdir -p ${LOG_PATH} -status_log="${LOG_PATH}/results_python.log" - - -if [ ${MODE} = "whole_infer" ]; then - GPUID=$3 - if [ ${#GPUID} -le 0 ];then - env=" " - else - env="export CUDA_VISIBLE_DEVICES=${GPUID}" - fi - # set CUDA_VISIBLE_DEVICES - eval $env - export Count=0 - IFS="|" - infer_run_exports=(${infer_export_list}) - infer_quant_flag=(${infer_is_quant}) - for infer_model in ${infer_model_dir_list[*]}; do - # run export - if [ ${infer_run_exports[Count]} != "null" ];then - save_infer_dir=$(dirname $infer_model) - set_export_weight=$(func_set_params "${export_weight}" "${infer_model}") - set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}") - export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" - echo ${infer_run_exports[Count]} - echo $export_cmd - eval $export_cmd - status_export=$? - status_check $status_export "${export_cmd}" "${status_log}" - else - save_infer_dir=${infer_model} - fi - #run inference - is_quant=${infer_quant_flag[Count]} - if [ ${MODE} = "klquant_infer" ]; then - is_quant="True" - fi - func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} - Count=$(($Count + 1)) - done -fi - diff --git a/test_tipc/test_inference_python.sh b/test_tipc/test_inference_python.sh new file mode 100644 index 0000000000000000000000000000000000000000..72516e044ed8a23c660a4c4f486d19f22a584fb0 --- /dev/null +++ b/test_tipc/test_inference_python.sh @@ -0,0 +1,169 @@ +#!/bin/bash +source test_tipc/common_func.sh +#source test_tipc/test_train_inference_python.sh + +FILENAME=$1 +# MODE be one of ['whole_infer'] +MODE=$2 + +dataline=$(awk 'NR==1, NR==20{print}' $FILENAME) + +# parser params +IFS=$'\n' +lines=(${dataline}) + +model_name=$(func_parser_value "${lines[1]}") +python=$(func_parser_value "${lines[2]}") + +infer_model_dir_list=$(func_parser_value "${lines[3]}") +infer_export_list=$(func_parser_value "${lines[4]}") +infer_is_quant=$(func_parser_value "${lines[5]}") +# parser inference +inference_py=$(func_parser_value "${lines[6]}") +use_gpu_key=$(func_parser_key "${lines[7]}") +use_gpu_list=$(func_parser_value "${lines[7]}") +use_mkldnn_key=$(func_parser_key "${lines[8]}") +use_mkldnn_list=$(func_parser_value "${lines[8]}") +cpu_threads_key=$(func_parser_key "${lines[9]}") +cpu_threads_list=$(func_parser_value "${lines[9]}") +batch_size_key=$(func_parser_key "${lines[10]}") +batch_size_list=$(func_parser_value "${lines[10]}") +use_trt_key=$(func_parser_key "${lines[11]}") +use_trt_list=$(func_parser_value "${lines[11]}") +precision_key=$(func_parser_key "${lines[12]}") +precision_list=$(func_parser_value "${lines[12]}") +infer_model_key=$(func_parser_key "${lines[13]}") +image_dir_key=$(func_parser_key "${lines[14]}") +infer_img_dir=$(func_parser_value "${lines[14]}") +rec_model_key=$(func_parser_key "${lines[15]}") +rec_model_value=$(func_parser_value "${lines[15]}") +benchmark_key=$(func_parser_key "${lines[16]}") +benchmark_value=$(func_parser_value "${lines[16]}") +infer_key1=$(func_parser_key "${lines[17]}") +infer_value1=$(func_parser_value "${lines[17]}") + + + +LOG_PATH="./test_tipc/output" +mkdir -p ${LOG_PATH} +status_log="${LOG_PATH}/results_python.log" + + +function func_inference(){ + IFS='|' + _python=$1 + _script=$2 + _model_dir=$3 + _log_path=$4 + _img_dir=$5 + _flag_quant=$6 + # inference + for use_gpu in ${use_gpu_list[*]}; do + if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then + for use_mkldnn in ${use_mkldnn_list[*]}; do + if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then + continue + fi + for threads in ${cpu_threads_list[*]}; do + for batch_size in ${batch_size_list[*]}; do + for precision in ${precision_list[*]}; do + if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then + continue + fi # skip when enable fp16 but disable mkldnn + if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then + continue + fi # skip when quant model inference but precision is not int8 + set_precision=$(func_set_params "${precision_key}" "${precision}") + + _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log" + set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") + set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") + set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") + set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}") + set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${rec_model_key}" "${rec_model_value}") + set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_params0} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 " + eval $command + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${command}" "${status_log}" + done + done + done + done + elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then + for use_trt in ${use_trt_list[*]}; do + for precision in ${precision_list[*]}; do + if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then + continue + fi + if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then + continue + fi + if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then + continue + fi + for batch_size in ${batch_size_list[*]}; do + _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log" + set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") + set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") + set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") + set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}") + set_precision=$(func_set_params "${precision_key}" "${precision}") + set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}") + set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} ${set_infer_params0} > ${_save_log_path} 2>&1 " + eval $command + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${command}" "${status_log}" + + done + done + done + else + echo "Does not support hardware other than CPU and GPU Currently!" + fi + done +} + +if [ ${MODE} = "whole_infer" ]; then + GPUID=$3 + if [ ${#GPUID} -le 0 ];then + env=" " + else + env="export CUDA_VISIBLE_DEVICES=${GPUID}" + fi + # set CUDA_VISIBLE_DEVICES + eval $env + export Count=0 + IFS="|" + infer_run_exports=(${infer_export_list}) + infer_quant_flag=(${infer_is_quant}) + for infer_model in ${infer_model_dir_list[*]}; do + # run export + if [ ${infer_run_exports[Count]} != "null" ];then + save_infer_dir=$(dirname $infer_model) + set_export_weight=$(func_set_params "${export_weight}" "${infer_model}") + set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}") + export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" + echo ${infer_run_exports[Count]} + eval $export_cmd + status_export=$? + status_check $status_export "${export_cmd}" "${status_log}" + else + save_infer_dir=${infer_model} + fi + #run inference + is_quant=${infer_quant_flag[Count]} + if [ ${MODE} = "klquant_infer" ]; then + is_quant="True" + fi + func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} + Count=$(($Count + 1)) + done +fi + + diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh index 7d035256527e01f31a4a1bc113caff3c744d859d..0b0a4e4a75f5e978f64404b27a5f26594dbd484e 100644 --- a/test_tipc/test_train_inference_python.sh +++ b/test_tipc/test_train_inference_python.sh @@ -90,36 +90,39 @@ infer_value1=$(func_parser_value "${lines[50]}") # parser klquant_infer if [ ${MODE} = "klquant_whole_infer" ]; then - dataline=$(awk 'NR==1 NR==17{print}' $FILENAME) + dataline=$(awk 'NR==1, NR==17{print}' $FILENAME) lines=(${dataline}) model_name=$(func_parser_value "${lines[1]}") python=$(func_parser_value "${lines[2]}") + export_weight=$(func_parser_key "${lines[3]}") + save_infer_key=$(func_parser_key "${lines[4]}") # parser inference model - infer_model_dir_list=$(func_parser_value "${lines[3]}") - infer_export_list=$(func_parser_value "${lines[4]}") - infer_is_quant=$(func_parser_value "${lines[5]}") + infer_model_dir_list=$(func_parser_value "${lines[5]}") + infer_export_list=$(func_parser_value "${lines[6]}") + infer_is_quant=$(func_parser_value "${lines[7]}") # parser inference - inference_py=$(func_parser_value "${lines[6]}") - use_gpu_key=$(func_parser_key "${lines[7]}") - use_gpu_list=$(func_parser_value "${lines[7]}") - use_mkldnn_key=$(func_parser_key "${lines[8]}") - use_mkldnn_list=$(func_parser_value "${lines[8]}") - cpu_threads_key=$(func_parser_key "${lines[9]}") - cpu_threads_list=$(func_parser_value "${lines[9]}") - batch_size_key=$(func_parser_key "${lines[10]}") - batch_size_list=$(func_parser_value "${lines[10]}") - use_trt_key=$(func_parser_key "${lines[11]}") - use_trt_list=$(func_parser_value "${lines[11]}") - precision_key=$(func_parser_key "${lines[12]}") - precision_list=$(func_parser_value "${lines[12]}") - infer_model_key=$(func_parser_key "${lines[13]}") - image_dir_key=$(func_parser_key "${lines[14]}") - infer_img_dir=$(func_parser_value "${lines[14]}") - save_log_key=$(func_parser_key "${lines[15]}") - benchmark_key=$(func_parser_key "${lines[16]}") - benchmark_value=$(func_parser_value "${lines[16]}") - infer_key1=$(func_parser_key "${lines[17]}") - infer_value1=$(func_parser_value "${lines[17]}") + inference_py=$(func_parser_value "${lines[8]}") + use_gpu_key=$(func_parser_key "${lines[9]}") + use_gpu_list=$(func_parser_value "${lines[9]}") + use_mkldnn_key=$(func_parser_key "${lines[10]}") + use_mkldnn_list=$(func_parser_value "${lines[10]}") + cpu_threads_key=$(func_parser_key "${lines[11]}") + cpu_threads_list=$(func_parser_value "${lines[11]}") + batch_size_key=$(func_parser_key "${lines[12]}") + batch_size_list=$(func_parser_value "${lines[12]}") + use_trt_key=$(func_parser_key "${lines[13]}") + use_trt_list=$(func_parser_value "${lines[13]}") + precision_key=$(func_parser_key "${lines[14]}") + precision_list=$(func_parser_value "${lines[14]}") + infer_model_key=$(func_parser_key "${lines[15]}") + image_dir_key=$(func_parser_key "${lines[16]}") + infer_img_dir=$(func_parser_value "${lines[16]}") + save_log_key=$(func_parser_key "${lines[17]}") + save_log_value=$(func_parser_value "${lines[17]}") + benchmark_key=$(func_parser_key "${lines[18]}") + benchmark_value=$(func_parser_value "${lines[18]}") + infer_key1=$(func_parser_key "${lines[19]}") + infer_value1=$(func_parser_value "${lines[19]}") fi LOG_PATH="./test_tipc/output" @@ -159,8 +162,9 @@ function func_inference(){ set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}") set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}") set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") - command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 " + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_params0} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 " eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" @@ -189,8 +193,9 @@ function func_inference(){ set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}") set_precision=$(func_set_params "${precision_key}" "${precision}") set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}") set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") - command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 " + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} ${set_infer_params0} > ${_save_log_path} 2>&1 " eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" @@ -235,7 +240,7 @@ if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then fi #run inference is_quant=${infer_quant_flag[Count]} - if [ ${MODE} = "klquant_infer" ]; then + if [ ${MODE} = "klquant_whole_infer" ]; then is_quant="True" fi func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} diff --git a/tools/infer/utility.py b/tools/infer/utility.py index bd9e14a65749f4223eeb6cf79a37546909854d17..f437056ec7b10e28e626d2028b6401cebc647bb1 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -211,7 +211,7 @@ def create_predictor(args, mode, logger): "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] } max_input_shape = { - "x": [1, 3, 1280, 1280], + "x": [1, 3, 1536, 1536], "conv2d_92.tmp_0": [1, 120, 400, 400], "conv2d_91.tmp_0": [1, 24, 200, 200], "conv2d_59.tmp_0": [1, 96, 400, 400], @@ -261,7 +261,7 @@ def create_predictor(args, mode, logger): opt_input_shape.update(opt_pact_shape) elif mode == "rec": min_input_shape = {"x": [1, 3, 32, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1024]} + max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1536]} opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} elif mode == "cls": min_input_shape = {"x": [1, 3, 48, 10]} diff --git a/tools/infer_det.py b/tools/infer_det.py index bb2cca7362e81494018aa3471664d60bef1b852c..1c679e0faf0d3ebdb6ca7ed4c317ce3eecfa910f 100755 --- a/tools/infer_det.py +++ b/tools/infer_det.py @@ -53,6 +53,7 @@ def draw_det_res(dt_boxes, config, img, img_name, save_path): logger.info("The detected Image saved in {}".format(save_path)) +@paddle.no_grad() def main(): global_config = config['Global']