diff --git a/benchmark/PaddleOCR_DBNet/.gitattributes b/benchmark/PaddleOCR_DBNet/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..8543e0a71603c693e83841b4a29a04c54a24d2a0
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/.gitattributes
@@ -0,0 +1,2 @@
+*.html linguist-language=python
+*.ipynb linguist-language=python
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/.gitignore b/benchmark/PaddleOCR_DBNet/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..cef1c73b393daf2b192bef1aafd9c612517247a4
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/.gitignore
@@ -0,0 +1,16 @@
+.DS_Store
+*.pth
+*.pyc
+*.pyo
+*.log
+*.tmp
+*.pkl
+__pycache__/
+.idea/
+output/
+test/*.jpg
+datasets/
+index/
+train_log/
+log/
+profiling_log/
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/LICENSE.md b/benchmark/PaddleOCR_DBNet/LICENSE.md
new file mode 100644
index 0000000000000000000000000000000000000000..b09cd7856d58590578ee1a4f3ad45d1310a97f87
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/LICENSE.md
@@ -0,0 +1,201 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/benchmark/PaddleOCR_DBNet/README.MD b/benchmark/PaddleOCR_DBNet/README.MD
new file mode 100644
index 0000000000000000000000000000000000000000..dbc07faafb1fbf519ad64bb6b6a801889fd3042e
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/README.MD
@@ -0,0 +1,132 @@
+# Real-time Scene Text Detection with Differentiable Binarization
+
+**note**: some code is inherited from [WenmuZhou/DBNet.pytorch](https://github.com/WenmuZhou/DBNet.pytorch)
+
+[中文解读](https://zhuanlan.zhihu.com/p/94677957)
+
+![network](imgs/paper/db.jpg)
+
+## update
+2020-06-07: 添加灰度图训练,训练灰度图时需要在配置里移除`dataset.args.transforms.Normalize`
+
+## Install Using Conda
+```
+conda env create -f environment.yml
+git clone https://github.com/WenmuZhou/DBNet.paddle.git
+cd DBNet.paddle/
+```
+
+or
+## Install Manually
+```bash
+conda create -n dbnet python=3.6
+conda activate dbnet
+
+conda install ipython pip
+
+# python dependencies
+pip install -r requirement.txt
+
+# clone repo
+git clone https://github.com/WenmuZhou/DBNet.paddle.git
+cd DBNet.paddle/
+
+```
+
+## Requirements
+* paddlepaddle 2.4+
+
+## Download
+
+TBD
+
+## Data Preparation
+
+Training data: prepare a text `train.txt` in the following format, use '\t' as a separator
+```
+./datasets/train/img/001.jpg ./datasets/train/gt/001.txt
+```
+
+Validation data: prepare a text `test.txt` in the following format, use '\t' as a separator
+```
+./datasets/test/img/001.jpg ./datasets/test/gt/001.txt
+```
+- Store images in the `img` folder
+- Store groundtruth in the `gt` folder
+
+The groundtruth can be `.txt` files, with the following format:
+```
+x1, y1, x2, y2, x3, y3, x4, y4, annotation
+```
+
+
+## Train
+1. config the `dataset['train']['dataset'['data_path']'`,`dataset['validate']['dataset'['data_path']`in [config/icdar2015_resnet18_fpn_DBhead_polyLR.yaml](cconfig/icdar2015_resnet18_fpn_DBhead_polyLR.yaml)
+* . single gpu train
+```bash
+bash singlel_gpu_train.sh
+```
+* . Multi-gpu training
+```bash
+bash multi_gpu_train.sh
+```
+## Test
+
+[eval.py](tools/eval.py) is used to test model on test dataset
+
+1. config `model_path` in [eval.sh](eval.sh)
+2. use following script to test
+```bash
+bash eval.sh
+```
+
+## Predict
+[predict.py](tools/predict.py) Can be used to inference on all images in a folder
+1. config `model_path`,`input_folder`,`output_folder` in [predict.sh](predict.sh)
+2. use following script to predict
+```
+bash predict.sh
+```
+You can change the `model_path` in the `predict.sh` file to your model location.
+
+tips: if result is not good, you can change `thre` in [predict.sh](predict.sh)
+
+## Export Model
+
+[export_model.py](tools/export_model.py) Can be used to inference on all images in a folder
+
+use following script to export inference model
+```
+python tools/export_model.py --config_file config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml -o trainer.resume_checkpoint=model_best.pth trainer.output_dir=output/infer
+```
+
+## Paddle Inference infer
+
+[infer.py](tools/infer.py) Can be used to inference on all images in a folder
+
+use following script to export inference model
+```
+python tools/infer.py --model-dir=output/infer/ --img-path imgs/paper/db.jpg
+```
+
+
+
+### [ICDAR 2015](http://rrc.cvc.uab.es/?ch=4)
+only train on ICDAR2015 dataset
+
+| Method | image size (short size) |learning rate | Precision (%) | Recall (%) | F-measure (%) | FPS |
+|:--------------------------:|:-------:|:--------:|:--------:|:------------:|:---------------:|:-----:|
+| ImageNet-resnet50-FPN-DBHead(torch) |736 |1e-3|90.19 | 78.14 | 83.88 | 27 |
+| ImageNet-resnet50-FPN-DBHead(paddle) |736 |1e-3| 89.47 | 79.03 | 83.92 | 27 |
+| ImageNet-resnet50-FPN-DBHead(paddle_amp) |736 |1e-3| 88.62 | 79.95 | 84.06 | 27 |
+
+
+### examples
+TBD
+
+
+### reference
+1. https://arxiv.org/pdf/1911.08947.pdf
+2. https://github.com/WenmuZhou/DBNet.pytorch
+
+**If this repository helps you,please star it. Thanks.**
diff --git a/benchmark/PaddleOCR_DBNet/base/__init__.py b/benchmark/PaddleOCR_DBNet/base/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..223e9e02d7ab1fbc2819f9ff33e04a2bd0d0303c
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/base/__init__.py
@@ -0,0 +1,2 @@
+from .base_trainer import BaseTrainer
+from .base_dataset import BaseDataSet
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/base/base_dataset.py b/benchmark/PaddleOCR_DBNet/base/base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a839a8ffbc34a2f671e905f8750269eb5af0371
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/base/base_dataset.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/4 13:12
+# @Author : zhoujun
+import copy
+from paddle.io import Dataset
+from data_loader.modules import *
+
+
+class BaseDataSet(Dataset):
+ def __init__(self,
+ data_path: str,
+ img_mode,
+ pre_processes,
+ filter_keys,
+ ignore_tags,
+ transform=None,
+ target_transform=None):
+ assert img_mode in ['RGB', 'BRG', 'GRAY']
+ self.ignore_tags = ignore_tags
+ self.data_list = self.load_data(data_path)
+ item_keys = [
+ 'img_path', 'img_name', 'text_polys', 'texts', 'ignore_tags'
+ ]
+ for item in item_keys:
+ assert item in self.data_list[
+ 0], 'data_list from load_data must contains {}'.format(
+ item_keys)
+ self.img_mode = img_mode
+ self.filter_keys = filter_keys
+ self.transform = transform
+ self.target_transform = target_transform
+ self._init_pre_processes(pre_processes)
+
+ def _init_pre_processes(self, pre_processes):
+ self.aug = []
+ if pre_processes is not None:
+ for aug in pre_processes:
+ if 'args' not in aug:
+ args = {}
+ else:
+ args = aug['args']
+ if isinstance(args, dict):
+ cls = eval(aug['type'])(**args)
+ else:
+ cls = eval(aug['type'])(args)
+ self.aug.append(cls)
+
+ def load_data(self, data_path: str) -> list:
+ """
+ 把数据加载为一个list:
+ :params data_path: 存储数据的文件夹或者文件
+ return a dict ,包含了,'img_path','img_name','text_polys','texts','ignore_tags'
+ """
+ raise NotImplementedError
+
+ def apply_pre_processes(self, data):
+ for aug in self.aug:
+ data = aug(data)
+ return data
+
+ def __getitem__(self, index):
+ try:
+ data = copy.deepcopy(self.data_list[index])
+ im = cv2.imread(data['img_path'], 1
+ if self.img_mode != 'GRAY' else 0)
+ if self.img_mode == 'RGB':
+ im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+ data['img'] = im
+ data['shape'] = [im.shape[0], im.shape[1]]
+ data = self.apply_pre_processes(data)
+
+ if self.transform:
+ data['img'] = self.transform(data['img'])
+ data['text_polys'] = data['text_polys'].tolist()
+ if len(self.filter_keys):
+ data_dict = {}
+ for k, v in data.items():
+ if k not in self.filter_keys:
+ data_dict[k] = v
+ return data_dict
+ else:
+ return data
+ except:
+ return self.__getitem__(np.random.randint(self.__len__()))
+
+ def __len__(self):
+ return len(self.data_list)
diff --git a/benchmark/PaddleOCR_DBNet/base/base_trainer.py b/benchmark/PaddleOCR_DBNet/base/base_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..82c308d361ca0b1da274aefdef6b626a81ae3cc7
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/base/base_trainer.py
@@ -0,0 +1,250 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:50
+# @Author : zhoujun
+
+import os
+import pathlib
+import shutil
+from pprint import pformat
+
+import anyconfig
+import paddle
+import numpy as np
+import random
+from paddle.jit import to_static
+from paddle.static import InputSpec
+
+from utils import setup_logger
+
+
+class BaseTrainer:
+ def __init__(self,
+ config,
+ model,
+ criterion,
+ train_loader,
+ validate_loader,
+ metric_cls,
+ post_process=None):
+ config['trainer']['output_dir'] = os.path.join(
+ str(pathlib.Path(os.path.abspath(__name__)).parent),
+ config['trainer']['output_dir'])
+ config['name'] = config['name'] + '_' + model.name
+ self.save_dir = config['trainer']['output_dir']
+ self.checkpoint_dir = os.path.join(self.save_dir, 'checkpoint')
+
+ os.makedirs(self.checkpoint_dir, exist_ok=True)
+
+ self.global_step = 0
+ self.start_epoch = 0
+ self.config = config
+ self.criterion = criterion
+ # logger and tensorboard
+ self.visualdl_enable = self.config['trainer'].get('visual_dl', False)
+ self.epochs = self.config['trainer']['epochs']
+ self.log_iter = self.config['trainer']['log_iter']
+ if paddle.distributed.get_rank() == 0:
+ anyconfig.dump(config, os.path.join(self.save_dir, 'config.yaml'))
+ self.logger = setup_logger(os.path.join(self.save_dir, 'train.log'))
+ self.logger_info(pformat(self.config))
+
+ self.model = self.apply_to_static(model)
+
+ # device
+ if paddle.device.cuda.device_count(
+ ) > 0 and paddle.device.is_compiled_with_cuda():
+ self.with_cuda = True
+ random.seed(self.config['trainer']['seed'])
+ np.random.seed(self.config['trainer']['seed'])
+ paddle.seed(self.config['trainer']['seed'])
+ else:
+ self.with_cuda = False
+ self.logger_info('train with and paddle {}'.format(paddle.__version__))
+ # metrics
+ self.metrics = {
+ 'recall': 0,
+ 'precision': 0,
+ 'hmean': 0,
+ 'train_loss': float('inf'),
+ 'best_model_epoch': 0
+ }
+
+ self.train_loader = train_loader
+ if validate_loader is not None:
+ assert post_process is not None and metric_cls is not None
+ self.validate_loader = validate_loader
+ self.post_process = post_process
+ self.metric_cls = metric_cls
+ self.train_loader_len = len(train_loader)
+
+ if self.validate_loader is not None:
+ self.logger_info(
+ 'train dataset has {} samples,{} in dataloader, validate dataset has {} samples,{} in dataloader'.
+ format(
+ len(self.train_loader.dataset), self.train_loader_len,
+ len(self.validate_loader.dataset),
+ len(self.validate_loader)))
+ else:
+ self.logger_info(
+ 'train dataset has {} samples,{} in dataloader'.format(
+ len(self.train_loader.dataset), self.train_loader_len))
+
+ self._initialize_scheduler()
+
+ self._initialize_optimizer()
+
+ # resume or finetune
+ if self.config['trainer']['resume_checkpoint'] != '':
+ self._load_checkpoint(
+ self.config['trainer']['resume_checkpoint'], resume=True)
+ elif self.config['trainer']['finetune_checkpoint'] != '':
+ self._load_checkpoint(
+ self.config['trainer']['finetune_checkpoint'], resume=False)
+
+ if self.visualdl_enable and paddle.distributed.get_rank() == 0:
+ from visualdl import LogWriter
+ self.writer = LogWriter(self.save_dir)
+
+ # 混合精度训练
+ self.amp = self.config.get('amp', None)
+ if self.amp == 'None':
+ self.amp = None
+ if self.amp:
+ self.amp['scaler'] = paddle.amp.GradScaler(
+ init_loss_scaling=self.amp.get("scale_loss", 1024),
+ use_dynamic_loss_scaling=self.amp.get(
+ 'use_dynamic_loss_scaling', True))
+ self.model, self.optimizer = paddle.amp.decorate(
+ models=self.model,
+ optimizers=self.optimizer,
+ level=self.amp.get('amp_level', 'O2'))
+
+ # 分布式训练
+ if paddle.device.cuda.device_count() > 1:
+ self.model = paddle.DataParallel(self.model)
+ # make inverse Normalize
+ self.UN_Normalize = False
+ for t in self.config['dataset']['train']['dataset']['args'][
+ 'transforms']:
+ if t['type'] == 'Normalize':
+ self.normalize_mean = t['args']['mean']
+ self.normalize_std = t['args']['std']
+ self.UN_Normalize = True
+
+ def apply_to_static(self, model):
+ support_to_static = self.config['trainer'].get('to_static', False)
+ if support_to_static:
+ specs = None
+ print('static')
+ specs = [InputSpec([None, 3, -1, -1])]
+ model = to_static(model, input_spec=specs)
+ self.logger_info(
+ "Successfully to apply @to_static with specs: {}".format(specs))
+ return model
+
+ def train(self):
+ """
+ Full training logic
+ """
+ for epoch in range(self.start_epoch + 1, self.epochs + 1):
+ self.epoch_result = self._train_epoch(epoch)
+ self._on_epoch_finish()
+ if paddle.distributed.get_rank() == 0 and self.visualdl_enable:
+ self.writer.close()
+ self._on_train_finish()
+
+ def _train_epoch(self, epoch):
+ """
+ Training logic for an epoch
+
+ :param epoch: Current epoch number
+ """
+ raise NotImplementedError
+
+ def _eval(self, epoch):
+ """
+ eval logic for an epoch
+
+ :param epoch: Current epoch number
+ """
+ raise NotImplementedError
+
+ def _on_epoch_finish(self):
+ raise NotImplementedError
+
+ def _on_train_finish(self):
+ raise NotImplementedError
+
+ def _save_checkpoint(self, epoch, file_name):
+ """
+ Saving checkpoints
+
+ :param epoch: current epoch number
+ :param log: logging information of the epoch
+ :param save_best: if True, rename the saved checkpoint to 'model_best.pth.tar'
+ """
+ state_dict = self.model.state_dict()
+ state = {
+ 'epoch': epoch,
+ 'global_step': self.global_step,
+ 'state_dict': state_dict,
+ 'optimizer': self.optimizer.state_dict(),
+ 'config': self.config,
+ 'metrics': self.metrics
+ }
+ filename = os.path.join(self.checkpoint_dir, file_name)
+ paddle.save(state, filename)
+
+ def _load_checkpoint(self, checkpoint_path, resume):
+ """
+ Resume from saved checkpoints
+ :param checkpoint_path: Checkpoint path to be resumed
+ """
+ self.logger_info("Loading checkpoint: {} ...".format(checkpoint_path))
+ checkpoint = paddle.load(checkpoint_path)
+ self.model.set_state_dict(checkpoint['state_dict'])
+ if resume:
+ self.global_step = checkpoint['global_step']
+ self.start_epoch = checkpoint['epoch']
+ self.config['lr_scheduler']['args']['last_epoch'] = self.start_epoch
+ # self.scheduler.load_state_dict(checkpoint['scheduler'])
+ self.optimizer.set_state_dict(checkpoint['optimizer'])
+ if 'metrics' in checkpoint:
+ self.metrics = checkpoint['metrics']
+ self.logger_info("resume from checkpoint {} (epoch {})".format(
+ checkpoint_path, self.start_epoch))
+ else:
+ self.logger_info("finetune from checkpoint {}".format(
+ checkpoint_path))
+
+ def _initialize(self, name, module, *args, **kwargs):
+ module_name = self.config[name]['type']
+ module_args = self.config[name].get('args', {})
+ assert all([k not in module_args for k in kwargs
+ ]), 'Overwriting kwargs given in config file is not allowed'
+ module_args.update(kwargs)
+ return getattr(module, module_name)(*args, **module_args)
+
+ def _initialize_scheduler(self):
+ self.lr_scheduler = self._initialize('lr_scheduler',
+ paddle.optimizer.lr)
+
+ def _initialize_optimizer(self):
+ self.optimizer = self._initialize(
+ 'optimizer',
+ paddle.optimizer,
+ parameters=self.model.parameters(),
+ learning_rate=self.lr_scheduler)
+
+ def inverse_normalize(self, batch_img):
+ if self.UN_Normalize:
+ batch_img[:, 0, :, :] = batch_img[:, 0, :, :] * self.normalize_std[
+ 0] + self.normalize_mean[0]
+ batch_img[:, 1, :, :] = batch_img[:, 1, :, :] * self.normalize_std[
+ 1] + self.normalize_mean[1]
+ batch_img[:, 2, :, :] = batch_img[:, 2, :, :] * self.normalize_std[
+ 2] + self.normalize_mean[2]
+
+ def logger_info(self, s):
+ if paddle.distributed.get_rank() == 0:
+ self.logger.info(s)
diff --git a/benchmark/PaddleOCR_DBNet/config/SynthText.yaml b/benchmark/PaddleOCR_DBNet/config/SynthText.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..61d5da7d3b4b8ec9da0bd79020d95c694ecef513
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/SynthText.yaml
@@ -0,0 +1,40 @@
+name: DBNet
+dataset:
+ train:
+ dataset:
+ type: SynthTextDataset # 数据集类型
+ args:
+ data_path: ''# SynthTextDataset 根目录
+ pre_processes: # 数据的预处理过程,包含augment和标签制作
+ - type: IaaAugment # 使用imgaug进行变换
+ args:
+ - {'type':Fliplr, 'args':{'p':0.5}}
+ - {'type': Affine, 'args':{'rotate':[-10,10]}}
+ - {'type':Resize,'args':{'size':[0.5,3]}}
+ - type: EastRandomCropData
+ args:
+ size: [640,640]
+ max_tries: 50
+ keep_ratio: true
+ - type: MakeBorderMap
+ args:
+ shrink_ratio: 0.4
+ - type: MakeShrinkMap
+ args:
+ shrink_ratio: 0.4
+ min_text_size: 8
+ transforms: # 对图片进行的变换方式
+ - type: ToTensor
+ args: {}
+ - type: Normalize
+ args:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ img_mode: RGB
+ filter_keys: ['img_path','img_name','text_polys','texts','ignore_tags','shape'] # 返回数据之前,从数据字典里删除的key
+ ignore_tags: ['*', '###']
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 0
+ collate_fn: ''
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/config/SynthText_resnet18_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/SynthText_resnet18_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a665e94a7fbaa531b67a385ceeeae3be843e97b6
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/SynthText_resnet18_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,65 @@
+name: DBNet
+base: ['config/SynthText.yaml']
+arch:
+ type: Model
+ backbone:
+ type: resnet18
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: WarmupPolyLR
+ args:
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 10
+ show_images_iter: 50
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path: ./datasets/SynthText
+ img_mode: RGB
+ loader:
+ batch_size: 2
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/config/icdar2015.yaml b/benchmark/PaddleOCR_DBNet/config/icdar2015.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4551b14b24acecf079b677699dceed01f7a68c12
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015.yaml
@@ -0,0 +1,69 @@
+name: DBNet
+dataset:
+ train:
+ dataset:
+ type: ICDAR2015Dataset # 数据集类型
+ args:
+ data_path: # 一个存放 img_path \t gt_path的文件
+ - ''
+ pre_processes: # 数据的预处理过程,包含augment和标签制作
+ - type: IaaAugment # 使用imgaug进行变换
+ args:
+ - {'type':Fliplr, 'args':{'p':0.5}}
+ - {'type': Affine, 'args':{'rotate':[-10,10]}}
+ - {'type':Resize,'args':{'size':[0.5,3]}}
+ - type: EastRandomCropData
+ args:
+ size: [640,640]
+ max_tries: 50
+ keep_ratio: true
+ - type: MakeBorderMap
+ args:
+ shrink_ratio: 0.4
+ thresh_min: 0.3
+ thresh_max: 0.7
+ - type: MakeShrinkMap
+ args:
+ shrink_ratio: 0.4
+ min_text_size: 8
+ transforms: # 对图片进行的变换方式
+ - type: ToTensor
+ args: {}
+ - type: Normalize
+ args:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ img_mode: RGB
+ filter_keys: [img_path,img_name,text_polys,texts,ignore_tags,shape] # 返回数据之前,从数据字典里删除的key
+ ignore_tags: ['*', '###']
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 0
+ collate_fn: ''
+ validate:
+ dataset:
+ type: ICDAR2015Dataset
+ args:
+ data_path:
+ - ''
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ transforms:
+ - type: ToTensor
+ args: {}
+ - type: Normalize
+ args:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ img_mode: RGB
+ filter_keys: []
+ ignore_tags: ['*', '###']
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 0
+ collate_fn: ICDARCollectFN
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/config/icdar2015_dcn_resnet18_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/icdar2015_dcn_resnet18_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..608ef42c1a2085a7450cf1662071f26cd7d472ae
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015_dcn_resnet18_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,82 @@
+name: DBNet
+base: ['config/icdar2015.yaml']
+arch:
+ type: Model
+ backbone:
+ type: deformable_resnet18
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: WarmupPolyLR
+ args:
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 10
+ show_images_iter: 50
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.txt
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.txt
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet18_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet18_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..62c392b9ce67e754a212fa219fdc51e685d52f32
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet18_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,82 @@
+name: DBNet
+base: ['config/icdar2015.yaml']
+arch:
+ type: Model
+ backbone:
+ type: resnet18
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: WarmupPolyLR
+ args:
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 10
+ show_images_iter: 50
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.txt
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.txt
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
diff --git a/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet18_FPN_DBhead_polyLR_finetune.yaml b/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet18_FPN_DBhead_polyLR_finetune.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9b018d5cdd8877055f0c0372f313ab5d1beeb881
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet18_FPN_DBhead_polyLR_finetune.yaml
@@ -0,0 +1,83 @@
+name: DBNet
+base: ['config/icdar2015.yaml']
+arch:
+ type: Model
+ backbone:
+ type: resnet18
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: StepLR
+ args:
+ step_size: 10
+ gama: 0.8
+trainer:
+ seed: 2
+ epochs: 500
+ log_iter: 10
+ show_images_iter: 50
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.txt
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.txt
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
diff --git a/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2a870fd7c75de41c6f7cf20854029dfd7ed38e16
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,79 @@
+name: DBNet
+base: ['config/icdar2015.yaml']
+arch:
+ type: Model
+ backbone:
+ type: resnet50
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+lr_scheduler:
+ type: Polynomial
+ args:
+ learning_rate: 0.001
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 10
+ show_images_iter: 50
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output/fp16_o2
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.txt
+ img_mode: RGB
+ loader:
+ batch_size: 16
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.txt
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
diff --git a/benchmark/PaddleOCR_DBNet/config/open_dataset.yaml b/benchmark/PaddleOCR_DBNet/config/open_dataset.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..97267586c28fbd0145a8b14ed39c5520d6716bee
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/open_dataset.yaml
@@ -0,0 +1,73 @@
+name: DBNet
+dataset:
+ train:
+ dataset:
+ type: DetDataset # 数据集类型
+ args:
+ data_path: # 一个存放 img_path \t gt_path的文件
+ - ''
+ pre_processes: # 数据的预处理过程,包含augment和标签制作
+ - type: IaaAugment # 使用imgaug进行变换
+ args:
+ - {'type':Fliplr, 'args':{'p':0.5}}
+ - {'type': Affine, 'args':{'rotate':[-10,10]}}
+ - {'type':Resize,'args':{'size':[0.5,3]}}
+ - type: EastRandomCropData
+ args:
+ size: [640,640]
+ max_tries: 50
+ keep_ratio: true
+ - type: MakeBorderMap
+ args:
+ shrink_ratio: 0.4
+ thresh_min: 0.3
+ thresh_max: 0.7
+ - type: MakeShrinkMap
+ args:
+ shrink_ratio: 0.4
+ min_text_size: 8
+ transforms: # 对图片进行的变换方式
+ - type: ToTensor
+ args: {}
+ - type: Normalize
+ args:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ filter_keys: [img_path,img_name,text_polys,texts,ignore_tags,shape] # 返回数据之前,从数据字典里删除的key
+ ignore_tags: ['*', '###']
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 0
+ collate_fn: ''
+ validate:
+ dataset:
+ type: DetDataset
+ args:
+ data_path:
+ - ''
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ transforms:
+ - type: ToTensor
+ args: {}
+ - type: Normalize
+ args:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ img_mode: RGB
+ load_char_annotation: false # 是否加载字符级标注
+ expand_one_char: false # 是否对只有一个字符的框进行宽度扩充,扩充后w = w+h
+ filter_keys: []
+ ignore_tags: ['*', '###']
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 0
+ collate_fn: ICDARCollectFN
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/config/open_dataset_dcn_resnet50_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/open_dataset_dcn_resnet50_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6c81738720e4de47ff2e528653eb0069b19dffdd
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/open_dataset_dcn_resnet50_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,86 @@
+name: DBNet
+base: ['config/open_dataset.yaml']
+arch:
+ type: Model
+ backbone:
+ type: deformable_resnet18
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: WarmupPolyLR
+ args:
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 1
+ show_images_iter: 1
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.json
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ loader:
+ batch_size: 2
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.json
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
diff --git a/benchmark/PaddleOCR_DBNet/config/open_dataset_resnest50_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/open_dataset_resnest50_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d47ab06eddfebdd45376a7d60a771923215b932d
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/open_dataset_resnest50_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,86 @@
+name: DBNet
+base: ['config/open_dataset.yaml']
+arch:
+ type: Model
+ backbone:
+ type: resnest50
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: WarmupPolyLR
+ args:
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 1
+ show_images_iter: 1
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.json
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ loader:
+ batch_size: 2
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.json
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
diff --git a/benchmark/PaddleOCR_DBNet/config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml b/benchmark/PaddleOCR_DBNet/config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ff16ddb26dc90b62dabde59630b2450de4fdf326
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/config/open_dataset_resnet18_FPN_DBhead_polyLR.yaml
@@ -0,0 +1,93 @@
+name: DBNet
+base: ['config/open_dataset.yaml']
+arch:
+ type: Model
+ backbone:
+ type: resnet18
+ pretrained: true
+ neck:
+ type: FPN
+ inner_channels: 256
+ head:
+ type: DBHead
+ out_channels: 2
+ k: 50
+post_processing:
+ type: SegDetectorRepresenter
+ args:
+ thresh: 0.3
+ box_thresh: 0.7
+ max_candidates: 1000
+ unclip_ratio: 1.5 # from paper
+metric:
+ type: QuadMetric
+ args:
+ is_output_polygon: false
+loss:
+ type: DBLoss
+ alpha: 1
+ beta: 10
+ ohem_ratio: 3
+optimizer:
+ type: Adam
+ args:
+ lr: 0.001
+ weight_decay: 0
+ amsgrad: true
+lr_scheduler:
+ type: WarmupPolyLR
+ args:
+ warmup_epoch: 3
+trainer:
+ seed: 2
+ epochs: 1200
+ log_iter: 1
+ show_images_iter: 1
+ resume_checkpoint: ''
+ finetune_checkpoint: ''
+ output_dir: output
+ visual_dl: false
+amp:
+ scale_loss: 1024
+ amp_level: O2
+ custom_white_list: []
+ custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+ train:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/train.json
+ transforms: # 对图片进行的变换方式
+ - type: ToTensor
+ args: {}
+ - type: Normalize
+ args:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ loader:
+ batch_size: 2
+ shuffle: true
+ num_workers: 6
+ collate_fn: ''
+ validate:
+ dataset:
+ args:
+ data_path:
+ - ./datasets/test.json
+ pre_processes:
+ - type: ResizeShortSize
+ args:
+ short_size: 736
+ resize_text_polys: false
+ img_mode: RGB
+ load_char_annotation: false
+ expand_one_char: false
+ loader:
+ batch_size: 1
+ shuffle: true
+ num_workers: 6
+ collate_fn: ICDARCollectFN
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/__init__.py b/benchmark/PaddleOCR_DBNet/data_loader/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..afc6e56b51948a3ec237967b9cf360eea984d625
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/__init__.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:52
+# @Author : zhoujun
+import copy
+
+import PIL
+import numpy as np
+import paddle
+from paddle.io import DataLoader, DistributedBatchSampler, BatchSampler
+
+from paddle.vision import transforms
+
+
+def get_dataset(data_path, module_name, transform, dataset_args):
+ """
+ 获取训练dataset
+ :param data_path: dataset文件列表,每个文件内以如下格式存储 ‘path/to/img\tlabel’
+ :param module_name: 所使用的自定义dataset名称,目前只支持data_loaders.ImageDataset
+ :param transform: 该数据集使用的transforms
+ :param dataset_args: module_name的参数
+ :return: 如果data_path列表不为空,返回对于的ConcatDataset对象,否则None
+ """
+ from . import dataset
+ s_dataset = getattr(dataset, module_name)(transform=transform,
+ data_path=data_path,
+ **dataset_args)
+ return s_dataset
+
+
+def get_transforms(transforms_config):
+ tr_list = []
+ for item in transforms_config:
+ if 'args' not in item:
+ args = {}
+ else:
+ args = item['args']
+ cls = getattr(transforms, item['type'])(**args)
+ tr_list.append(cls)
+ tr_list = transforms.Compose(tr_list)
+ return tr_list
+
+
+class ICDARCollectFN:
+ def __init__(self, *args, **kwargs):
+ pass
+
+ def __call__(self, batch):
+ data_dict = {}
+ to_tensor_keys = []
+ for sample in batch:
+ for k, v in sample.items():
+ if k not in data_dict:
+ data_dict[k] = []
+ if isinstance(v, (np.ndarray, paddle.Tensor, PIL.Image.Image)):
+ if k not in to_tensor_keys:
+ to_tensor_keys.append(k)
+ data_dict[k].append(v)
+ for k in to_tensor_keys:
+ data_dict[k] = paddle.stack(data_dict[k], 0)
+ return data_dict
+
+
+def get_dataloader(module_config, distributed=False):
+ if module_config is None:
+ return None
+ config = copy.deepcopy(module_config)
+ dataset_args = config['dataset']['args']
+ if 'transforms' in dataset_args:
+ img_transfroms = get_transforms(dataset_args.pop('transforms'))
+ else:
+ img_transfroms = None
+ # 创建数据集
+ dataset_name = config['dataset']['type']
+ data_path = dataset_args.pop('data_path')
+ if data_path == None:
+ return None
+
+ data_path = [x for x in data_path if x is not None]
+ if len(data_path) == 0:
+ return None
+ if 'collate_fn' not in config['loader'] or config['loader'][
+ 'collate_fn'] is None or len(config['loader']['collate_fn']) == 0:
+ config['loader']['collate_fn'] = None
+ else:
+ config['loader']['collate_fn'] = eval(config['loader']['collate_fn'])()
+
+ _dataset = get_dataset(
+ data_path=data_path,
+ module_name=dataset_name,
+ transform=img_transfroms,
+ dataset_args=dataset_args)
+ sampler = None
+ if distributed:
+ # 3)使用DistributedSampler
+ batch_sampler = DistributedBatchSampler(
+ dataset=_dataset,
+ batch_size=config['loader'].pop('batch_size'),
+ shuffle=config['loader'].pop('shuffle'))
+ else:
+ batch_sampler = BatchSampler(
+ dataset=_dataset,
+ batch_size=config['loader'].pop('batch_size'),
+ shuffle=config['loader'].pop('shuffle'))
+ loader = DataLoader(
+ dataset=_dataset, batch_sampler=batch_sampler, **config['loader'])
+ return loader
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/dataset.py b/benchmark/PaddleOCR_DBNet/data_loader/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..29d3954fe6b89c4585435d0221fa25c28cc8adef
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/dataset.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:54
+# @Author : zhoujun
+import pathlib
+import os
+import cv2
+import numpy as np
+import scipy.io as sio
+from tqdm.auto import tqdm
+
+from base import BaseDataSet
+from utils import order_points_clockwise, get_datalist, load, expand_polygon
+
+
+class ICDAR2015Dataset(BaseDataSet):
+ def __init__(self,
+ data_path: str,
+ img_mode,
+ pre_processes,
+ filter_keys,
+ ignore_tags,
+ transform=None,
+ **kwargs):
+ super().__init__(data_path, img_mode, pre_processes, filter_keys,
+ ignore_tags, transform)
+
+ def load_data(self, data_path: str) -> list:
+ data_list = get_datalist(data_path)
+ t_data_list = []
+ for img_path, label_path in data_list:
+ data = self._get_annotation(label_path)
+ if len(data['text_polys']) > 0:
+ item = {
+ 'img_path': img_path,
+ 'img_name': pathlib.Path(img_path).stem
+ }
+ item.update(data)
+ t_data_list.append(item)
+ else:
+ print('there is no suit bbox in {}'.format(label_path))
+ return t_data_list
+
+ def _get_annotation(self, label_path: str) -> dict:
+ boxes = []
+ texts = []
+ ignores = []
+ with open(label_path, encoding='utf-8', mode='r') as f:
+ for line in f.readlines():
+ params = line.strip().strip('\ufeff').strip(
+ '\xef\xbb\xbf').split(',')
+ try:
+ box = order_points_clockwise(
+ np.array(list(map(float, params[:8]))).reshape(-1, 2))
+ if cv2.contourArea(box) > 0:
+ boxes.append(box)
+ label = params[8]
+ texts.append(label)
+ ignores.append(label in self.ignore_tags)
+ except:
+ print('load label failed on {}'.format(label_path))
+ data = {
+ 'text_polys': np.array(boxes),
+ 'texts': texts,
+ 'ignore_tags': ignores,
+ }
+ return data
+
+
+class DetDataset(BaseDataSet):
+ def __init__(self,
+ data_path: str,
+ img_mode,
+ pre_processes,
+ filter_keys,
+ ignore_tags,
+ transform=None,
+ **kwargs):
+ self.load_char_annotation = kwargs['load_char_annotation']
+ self.expand_one_char = kwargs['expand_one_char']
+ super().__init__(data_path, img_mode, pre_processes, filter_keys,
+ ignore_tags, transform)
+
+ def load_data(self, data_path: str) -> list:
+ """
+ 从json文件中读取出 文本行的坐标和gt,字符的坐标和gt
+ :param data_path:
+ :return:
+ """
+ data_list = []
+ for path in data_path:
+ content = load(path)
+ for gt in tqdm(
+ content['data_list'], desc='read file {}'.format(path)):
+ img_path = os.path.join(content['data_root'], gt['img_name'])
+ polygons = []
+ texts = []
+ illegibility_list = []
+ language_list = []
+ for annotation in gt['annotations']:
+ if len(annotation['polygon']) == 0 or len(annotation[
+ 'text']) == 0:
+ continue
+ if len(annotation['text']) > 1 and self.expand_one_char:
+ annotation['polygon'] = expand_polygon(annotation[
+ 'polygon'])
+ polygons.append(annotation['polygon'])
+ texts.append(annotation['text'])
+ illegibility_list.append(annotation['illegibility'])
+ language_list.append(annotation['language'])
+ if self.load_char_annotation:
+ for char_annotation in annotation['chars']:
+ if len(char_annotation['polygon']) == 0 or len(
+ char_annotation['char']) == 0:
+ continue
+ polygons.append(char_annotation['polygon'])
+ texts.append(char_annotation['char'])
+ illegibility_list.append(char_annotation[
+ 'illegibility'])
+ language_list.append(char_annotation['language'])
+ data_list.append({
+ 'img_path': img_path,
+ 'img_name': gt['img_name'],
+ 'text_polys': np.array(polygons),
+ 'texts': texts,
+ 'ignore_tags': illegibility_list
+ })
+ return data_list
+
+
+class SynthTextDataset(BaseDataSet):
+ def __init__(self,
+ data_path: str,
+ img_mode,
+ pre_processes,
+ filter_keys,
+ transform=None,
+ **kwargs):
+ self.transform = transform
+ self.dataRoot = pathlib.Path(data_path)
+ if not self.dataRoot.exists():
+ raise FileNotFoundError('Dataset folder is not exist.')
+
+ self.targetFilePath = self.dataRoot / 'gt.mat'
+ if not self.targetFilePath.exists():
+ raise FileExistsError('Target file is not exist.')
+ targets = {}
+ sio.loadmat(
+ self.targetFilePath,
+ targets,
+ squeeze_me=True,
+ struct_as_record=False,
+ variable_names=['imnames', 'wordBB', 'txt'])
+
+ self.imageNames = targets['imnames']
+ self.wordBBoxes = targets['wordBB']
+ self.transcripts = targets['txt']
+ super().__init__(data_path, img_mode, pre_processes, filter_keys,
+ transform)
+
+ def load_data(self, data_path: str) -> list:
+ t_data_list = []
+ for imageName, wordBBoxes, texts in zip(
+ self.imageNames, self.wordBBoxes, self.transcripts):
+ item = {}
+ wordBBoxes = np.expand_dims(
+ wordBBoxes, axis=2) if (wordBBoxes.ndim == 2) else wordBBoxes
+ _, _, numOfWords = wordBBoxes.shape
+ text_polys = wordBBoxes.reshape(
+ [8, numOfWords], order='F').T # num_words * 8
+ text_polys = text_polys.reshape(numOfWords, 4,
+ 2) # num_of_words * 4 * 2
+ transcripts = [word for line in texts for word in line.split()]
+ if numOfWords != len(transcripts):
+ continue
+ item['img_path'] = str(self.dataRoot / imageName)
+ item['img_name'] = (self.dataRoot / imageName).stem
+ item['text_polys'] = text_polys
+ item['texts'] = transcripts
+ item['ignore_tags'] = [x in self.ignore_tags for x in transcripts]
+ t_data_list.append(item)
+ return t_data_list
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/__init__.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc055dae5d44876463c5eb1edfea18b96c319ca8
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/__init__.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/4 10:53
+# @Author : zhoujun
+from .iaa_augment import IaaAugment
+from .augment import *
+from .random_crop_data import EastRandomCropData, PSERandomCrop
+from .make_border_map import MakeBorderMap
+from .make_shrink_map import MakeShrinkMap
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py
new file mode 100644
index 0000000000000000000000000000000000000000..e81bc123d914a7f2eb143d3c2504fab0e0f8d63f
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/augment.py
@@ -0,0 +1,304 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:52
+# @Author : zhoujun
+
+import math
+import numbers
+import random
+
+import cv2
+import numpy as np
+from skimage.util import random_noise
+
+
+class RandomNoise:
+ def __init__(self, random_rate):
+ self.random_rate = random_rate
+
+ def __call__(self, data: dict):
+ """
+ 对图片加噪声
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ if random.random() > self.random_rate:
+ return data
+ data['img'] = (random_noise(
+ data['img'], mode='gaussian', clip=True) * 255).astype(im.dtype)
+ return data
+
+
+class RandomScale:
+ def __init__(self, scales, random_rate):
+ """
+ :param scales: 尺度
+ :param ramdon_rate: 随机系数
+ :return:
+ """
+ self.random_rate = random_rate
+ self.scales = scales
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ if random.random() > self.random_rate:
+ return data
+ im = data['img']
+ text_polys = data['text_polys']
+
+ tmp_text_polys = text_polys.copy()
+ rd_scale = float(np.random.choice(self.scales))
+ im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
+ tmp_text_polys *= rd_scale
+
+ data['img'] = im
+ data['text_polys'] = tmp_text_polys
+ return data
+
+
+class RandomRotateImgBox:
+ def __init__(self, degrees, random_rate, same_size=False):
+ """
+ :param degrees: 角度,可以是一个数值或者list
+ :param ramdon_rate: 随机系数
+ :param same_size: 是否保持和原图一样大
+ :return:
+ """
+ if isinstance(degrees, numbers.Number):
+ if degrees < 0:
+ raise ValueError(
+ "If degrees is a single number, it must be positive.")
+ degrees = (-degrees, degrees)
+ elif isinstance(degrees, list) or isinstance(
+ degrees, tuple) or isinstance(degrees, np.ndarray):
+ if len(degrees) != 2:
+ raise ValueError(
+ "If degrees is a sequence, it must be of len 2.")
+ degrees = degrees
+ else:
+ raise Exception(
+ 'degrees must in Number or list or tuple or np.ndarray')
+ self.degrees = degrees
+ self.same_size = same_size
+ self.random_rate = random_rate
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ if random.random() > self.random_rate:
+ return data
+ im = data['img']
+ text_polys = data['text_polys']
+
+ # ---------------------- 旋转图像 ----------------------
+ w = im.shape[1]
+ h = im.shape[0]
+ angle = np.random.uniform(self.degrees[0], self.degrees[1])
+
+ if self.same_size:
+ nw = w
+ nh = h
+ else:
+ # 角度变弧度
+ rangle = np.deg2rad(angle)
+ # 计算旋转之后图像的w, h
+ nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w))
+ nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w))
+ # 构造仿射矩阵
+ rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, 1)
+ # 计算原图中心点到新图中心点的偏移量
+ rot_move = np.dot(rot_mat,
+ np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
+ # 更新仿射矩阵
+ rot_mat[0, 2] += rot_move[0]
+ rot_mat[1, 2] += rot_move[1]
+ # 仿射变换
+ rot_img = cv2.warpAffine(
+ im,
+ rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
+ flags=cv2.INTER_LANCZOS4)
+
+ # ---------------------- 矫正bbox坐标 ----------------------
+ # rot_mat是最终的旋转矩阵
+ # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
+ rot_text_polys = list()
+ for bbox in text_polys:
+ point1 = np.dot(rot_mat, np.array([bbox[0, 0], bbox[0, 1], 1]))
+ point2 = np.dot(rot_mat, np.array([bbox[1, 0], bbox[1, 1], 1]))
+ point3 = np.dot(rot_mat, np.array([bbox[2, 0], bbox[2, 1], 1]))
+ point4 = np.dot(rot_mat, np.array([bbox[3, 0], bbox[3, 1], 1]))
+ rot_text_polys.append([point1, point2, point3, point4])
+ data['img'] = rot_img
+ data['text_polys'] = np.array(rot_text_polys)
+ return data
+
+
+class RandomResize:
+ def __init__(self, size, random_rate, keep_ratio=False):
+ """
+ :param input_size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
+ :param ramdon_rate: 随机系数
+ :param keep_ratio: 是否保持长宽比
+ :return:
+ """
+ if isinstance(size, numbers.Number):
+ if size < 0:
+ raise ValueError(
+ "If input_size is a single number, it must be positive.")
+ size = (size, size)
+ elif isinstance(size, list) or isinstance(size, tuple) or isinstance(
+ size, np.ndarray):
+ if len(size) != 2:
+ raise ValueError(
+ "If input_size is a sequence, it must be of len 2.")
+ size = (size[0], size[1])
+ else:
+ raise Exception(
+ 'input_size must in Number or list or tuple or np.ndarray')
+ self.size = size
+ self.keep_ratio = keep_ratio
+ self.random_rate = random_rate
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ if random.random() > self.random_rate:
+ return data
+ im = data['img']
+ text_polys = data['text_polys']
+
+ if self.keep_ratio:
+ # 将图片短边pad到和长边一样
+ h, w, c = im.shape
+ max_h = max(h, self.size[0])
+ max_w = max(w, self.size[1])
+ im_padded = np.zeros((max_h, max_w, c), dtype=np.uint8)
+ im_padded[:h, :w] = im.copy()
+ im = im_padded
+ text_polys = text_polys.astype(np.float32)
+ h, w, _ = im.shape
+ im = cv2.resize(im, self.size)
+ w_scale = self.size[0] / float(w)
+ h_scale = self.size[1] / float(h)
+ text_polys[:, :, 0] *= w_scale
+ text_polys[:, :, 1] *= h_scale
+
+ data['img'] = im
+ data['text_polys'] = text_polys
+ return data
+
+
+def resize_image(img, short_size):
+ height, width, _ = img.shape
+ if height < width:
+ new_height = short_size
+ new_width = new_height / height * width
+ else:
+ new_width = short_size
+ new_height = new_width / width * height
+ new_height = int(round(new_height / 32) * 32)
+ new_width = int(round(new_width / 32) * 32)
+ resized_img = cv2.resize(img, (new_width, new_height))
+ return resized_img, (new_width / width, new_height / height)
+
+
+class ResizeShortSize:
+ def __init__(self, short_size, resize_text_polys=True):
+ """
+ :param size: resize尺寸,数字或者list的形式,如果为list形式,就是[w,h]
+ :return:
+ """
+ self.short_size = short_size
+ self.resize_text_polys = resize_text_polys
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ im = data['img']
+ text_polys = data['text_polys']
+
+ h, w, _ = im.shape
+ short_edge = min(h, w)
+ if short_edge < self.short_size:
+ # 保证短边 >= short_size
+ scale = self.short_size / short_edge
+ im = cv2.resize(im, dsize=None, fx=scale, fy=scale)
+ scale = (scale, scale)
+ # im, scale = resize_image(im, self.short_size)
+ if self.resize_text_polys:
+ # text_polys *= scale
+ text_polys[:, 0] *= scale[0]
+ text_polys[:, 1] *= scale[1]
+
+ data['img'] = im
+ data['text_polys'] = text_polys
+ return data
+
+
+class HorizontalFlip:
+ def __init__(self, random_rate):
+ """
+
+ :param random_rate: 随机系数
+ """
+ self.random_rate = random_rate
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ if random.random() > self.random_rate:
+ return data
+ im = data['img']
+ text_polys = data['text_polys']
+
+ flip_text_polys = text_polys.copy()
+ flip_im = cv2.flip(im, 1)
+ h, w, _ = flip_im.shape
+ flip_text_polys[:, :, 0] = w - flip_text_polys[:, :, 0]
+
+ data['img'] = flip_im
+ data['text_polys'] = flip_text_polys
+ return data
+
+
+class VerticallFlip:
+ def __init__(self, random_rate):
+ """
+
+ :param random_rate: 随机系数
+ """
+ self.random_rate = random_rate
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ if random.random() > self.random_rate:
+ return data
+ im = data['img']
+ text_polys = data['text_polys']
+
+ flip_text_polys = text_polys.copy()
+ flip_im = cv2.flip(im, 0)
+ h, w, _ = flip_im.shape
+ flip_text_polys[:, :, 1] = h - flip_text_polys[:, :, 1]
+ data['img'] = flip_im
+ data['text_polys'] = flip_text_polys
+ return data
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/iaa_augment.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/iaa_augment.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cf891bbd6fd5763aacbd60749d4dd9b6de89681
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/iaa_augment.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/4 18:06
+# @Author : zhoujun
+import numpy as np
+import imgaug
+import imgaug.augmenters as iaa
+
+
+class AugmenterBuilder(object):
+ def __init__(self):
+ pass
+
+ def build(self, args, root=True):
+ if args is None or len(args) == 0:
+ return None
+ elif isinstance(args, list):
+ if root:
+ sequence = [self.build(value, root=False) for value in args]
+ return iaa.Sequential(sequence)
+ else:
+ return getattr(
+ iaa,
+ args[0])(* [self.to_tuple_if_list(a) for a in args[1:]])
+ elif isinstance(args, dict):
+ cls = getattr(iaa, args['type'])
+ return cls(**{
+ k: self.to_tuple_if_list(v)
+ for k, v in args['args'].items()
+ })
+ else:
+ raise RuntimeError('unknown augmenter arg: ' + str(args))
+
+ def to_tuple_if_list(self, obj):
+ if isinstance(obj, list):
+ return tuple(obj)
+ return obj
+
+
+class IaaAugment():
+ def __init__(self, augmenter_args):
+ self.augmenter_args = augmenter_args
+ self.augmenter = AugmenterBuilder().build(self.augmenter_args)
+
+ def __call__(self, data):
+ image = data['img']
+ shape = image.shape
+
+ if self.augmenter:
+ aug = self.augmenter.to_deterministic()
+ data['img'] = aug.augment_image(image)
+ data = self.may_augment_annotation(aug, data, shape)
+ return data
+
+ def may_augment_annotation(self, aug, data, shape):
+ if aug is None:
+ return data
+
+ line_polys = []
+ for poly in data['text_polys']:
+ new_poly = self.may_augment_poly(aug, shape, poly)
+ line_polys.append(new_poly)
+ data['text_polys'] = np.array(line_polys)
+ return data
+
+ def may_augment_poly(self, aug, img_shape, poly):
+ keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
+ keypoints = aug.augment_keypoints(
+ [imgaug.KeypointsOnImage(
+ keypoints, shape=img_shape)])[0].keypoints
+ poly = [(p.x, p.y) for p in keypoints]
+ return poly
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/make_border_map.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/make_border_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..2985f3c8a01c67efb71b5279edf95dd3f9fe5680
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/make_border_map.py
@@ -0,0 +1,143 @@
+import cv2
+import numpy as np
+np.seterr(divide='ignore', invalid='ignore')
+import pyclipper
+from shapely.geometry import Polygon
+
+
+class MakeBorderMap():
+ def __init__(self, shrink_ratio=0.4, thresh_min=0.3, thresh_max=0.7):
+ self.shrink_ratio = shrink_ratio
+ self.thresh_min = thresh_min
+ self.thresh_max = thresh_max
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ im = data['img']
+ text_polys = data['text_polys']
+ ignore_tags = data['ignore_tags']
+
+ canvas = np.zeros(im.shape[:2], dtype=np.float32)
+ mask = np.zeros(im.shape[:2], dtype=np.float32)
+
+ for i in range(len(text_polys)):
+ if ignore_tags[i]:
+ continue
+ self.draw_border_map(text_polys[i], canvas, mask=mask)
+ canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
+
+ data['threshold_map'] = canvas
+ data['threshold_mask'] = mask
+ return data
+
+ def draw_border_map(self, polygon, canvas, mask):
+ polygon = np.array(polygon)
+ assert polygon.ndim == 2
+ assert polygon.shape[1] == 2
+
+ polygon_shape = Polygon(polygon)
+ if polygon_shape.area <= 0:
+ return
+ distance = polygon_shape.area * (
+ 1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+ subject = [tuple(l) for l in polygon]
+ padding = pyclipper.PyclipperOffset()
+ padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+
+ padded_polygon = np.array(padding.Execute(distance)[0])
+ cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+
+ xmin = padded_polygon[:, 0].min()
+ xmax = padded_polygon[:, 0].max()
+ ymin = padded_polygon[:, 1].min()
+ ymax = padded_polygon[:, 1].max()
+ width = xmax - xmin + 1
+ height = ymax - ymin + 1
+
+ polygon[:, 0] = polygon[:, 0] - xmin
+ polygon[:, 1] = polygon[:, 1] - ymin
+
+ xs = np.broadcast_to(
+ np.linspace(
+ 0, width - 1, num=width).reshape(1, width), (height, width))
+ ys = np.broadcast_to(
+ np.linspace(
+ 0, height - 1, num=height).reshape(height, 1), (height, width))
+
+ distance_map = np.zeros(
+ (polygon.shape[0], height, width), dtype=np.float32)
+ for i in range(polygon.shape[0]):
+ j = (i + 1) % polygon.shape[0]
+ absolute_distance = self.distance(xs, ys, polygon[i], polygon[j])
+ distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+ distance_map = distance_map.min(axis=0)
+
+ xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+ xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+ ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+ ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+ canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+ 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
+ xmin_valid - xmin:xmax_valid - xmax + width],
+ canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+
+ def distance(self, xs, ys, point_1, point_2):
+ '''
+ compute the distance from point to a line
+ ys: coordinates in the first axis
+ xs: coordinates in the second axis
+ point_1, point_2: (x, y), the end of the line
+ '''
+ height, width = xs.shape[:2]
+ square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[
+ 1])
+ square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[
+ 1])
+ square_distance = np.square(point_1[0] - point_2[0]) + np.square(
+ point_1[1] - point_2[1])
+
+ cosin = (square_distance - square_distance_1 - square_distance_2) / (
+ 2 * np.sqrt(square_distance_1 * square_distance_2))
+ square_sin = 1 - np.square(cosin)
+ square_sin = np.nan_to_num(square_sin)
+
+ result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
+ square_distance)
+ result[cosin <
+ 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin
+ < 0]
+ # self.extend_line(point_1, point_2, result)
+ return result
+
+ def extend_line(self, point_1, point_2, result):
+ ex_point_1 = (int(
+ round(point_1[0] + (point_1[0] - point_2[0]) * (
+ 1 + self.shrink_ratio))), int(
+ round(point_1[1] + (point_1[1] - point_2[1]) * (
+ 1 + self.shrink_ratio))))
+ cv2.line(
+ result,
+ tuple(ex_point_1),
+ tuple(point_1),
+ 4096.0,
+ 1,
+ lineType=cv2.LINE_AA,
+ shift=0)
+ ex_point_2 = (int(
+ round(point_2[0] + (point_2[0] - point_1[0]) * (
+ 1 + self.shrink_ratio))), int(
+ round(point_2[1] + (point_2[1] - point_1[1]) * (
+ 1 + self.shrink_ratio))))
+ cv2.line(
+ result,
+ tuple(ex_point_2),
+ tuple(point_2),
+ 4096.0,
+ 1,
+ lineType=cv2.LINE_AA,
+ shift=0)
+ return ex_point_1, ex_point_2
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/make_shrink_map.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/make_shrink_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f268b9dead349538e9f47d5b960feea27f90c51
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/make_shrink_map.py
@@ -0,0 +1,133 @@
+import numpy as np
+import cv2
+
+
+def shrink_polygon_py(polygon, shrink_ratio):
+ """
+ 对框进行缩放,返回去的比例为1/shrink_ratio 即可
+ """
+ cx = polygon[:, 0].mean()
+ cy = polygon[:, 1].mean()
+ polygon[:, 0] = cx + (polygon[:, 0] - cx) * shrink_ratio
+ polygon[:, 1] = cy + (polygon[:, 1] - cy) * shrink_ratio
+ return polygon
+
+
+def shrink_polygon_pyclipper(polygon, shrink_ratio):
+ from shapely.geometry import Polygon
+ import pyclipper
+ polygon_shape = Polygon(polygon)
+ distance = polygon_shape.area * (
+ 1 - np.power(shrink_ratio, 2)) / polygon_shape.length
+ subject = [tuple(l) for l in polygon]
+ padding = pyclipper.PyclipperOffset()
+ padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+ shrinked = padding.Execute(-distance)
+ if shrinked == []:
+ shrinked = np.array(shrinked)
+ else:
+ shrinked = np.array(shrinked[0]).reshape(-1, 2)
+ return shrinked
+
+
+class MakeShrinkMap():
+ r'''
+ Making binary mask from detection data with ICDAR format.
+ Typically following the process of class `MakeICDARData`.
+ '''
+
+ def __init__(self,
+ min_text_size=8,
+ shrink_ratio=0.4,
+ shrink_type='pyclipper'):
+ shrink_func_dict = {
+ 'py': shrink_polygon_py,
+ 'pyclipper': shrink_polygon_pyclipper
+ }
+ self.shrink_func = shrink_func_dict[shrink_type]
+ self.min_text_size = min_text_size
+ self.shrink_ratio = shrink_ratio
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ image = data['img']
+ text_polys = data['text_polys']
+ ignore_tags = data['ignore_tags']
+
+ h, w = image.shape[:2]
+ text_polys, ignore_tags = self.validate_polygons(text_polys,
+ ignore_tags, h, w)
+ gt = np.zeros((h, w), dtype=np.float32)
+ mask = np.ones((h, w), dtype=np.float32)
+ for i in range(len(text_polys)):
+ polygon = text_polys[i]
+ height = max(polygon[:, 1]) - min(polygon[:, 1])
+ width = max(polygon[:, 0]) - min(polygon[:, 0])
+ if ignore_tags[i] or min(height, width) < self.min_text_size:
+ cv2.fillPoly(mask,
+ polygon.astype(np.int32)[np.newaxis, :, :], 0)
+ ignore_tags[i] = True
+ else:
+ shrinked = self.shrink_func(polygon, self.shrink_ratio)
+ if shrinked.size == 0:
+ cv2.fillPoly(mask,
+ polygon.astype(np.int32)[np.newaxis, :, :], 0)
+ ignore_tags[i] = True
+ continue
+ cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+
+ data['shrink_map'] = gt
+ data['shrink_mask'] = mask
+ return data
+
+ def validate_polygons(self, polygons, ignore_tags, h, w):
+ '''
+ polygons (numpy.array, required): of shape (num_instances, num_points, 2)
+ '''
+ if len(polygons) == 0:
+ return polygons, ignore_tags
+ assert len(polygons) == len(ignore_tags)
+ for polygon in polygons:
+ polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
+ polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
+
+ for i in range(len(polygons)):
+ area = self.polygon_area(polygons[i])
+ if abs(area) < 1:
+ ignore_tags[i] = True
+ if area > 0:
+ polygons[i] = polygons[i][::-1, :]
+ return polygons, ignore_tags
+
+ def polygon_area(self, polygon):
+ return cv2.contourArea(polygon)
+ # edge = 0
+ # for i in range(polygon.shape[0]):
+ # next_index = (i + 1) % polygon.shape[0]
+ # edge += (polygon[next_index, 0] - polygon[i, 0]) * (polygon[next_index, 1] - polygon[i, 1])
+ #
+ # return edge / 2.
+
+
+if __name__ == '__main__':
+ from shapely.geometry import Polygon
+ import pyclipper
+
+ polygon = np.array([[0, 0], [100, 10], [100, 100], [10, 90]])
+ a = shrink_polygon_py(polygon, 0.4)
+ print(a)
+ print(shrink_polygon_py(a, 1 / 0.4))
+ b = shrink_polygon_pyclipper(polygon, 0.4)
+ print(b)
+ poly = Polygon(b)
+ distance = poly.area * 1.5 / poly.length
+ offset = pyclipper.PyclipperOffset()
+ offset.AddPath(b, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+ expanded = np.array(offset.Execute(distance))
+ bounding_box = cv2.minAreaRect(expanded)
+ points = cv2.boxPoints(bounding_box)
+ print(points)
diff --git a/benchmark/PaddleOCR_DBNet/data_loader/modules/random_crop_data.py b/benchmark/PaddleOCR_DBNet/data_loader/modules/random_crop_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..fac2e4c07cfca966e6c17231f2ab4bce702f191f
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/data_loader/modules/random_crop_data.py
@@ -0,0 +1,206 @@
+import random
+
+import cv2
+import numpy as np
+
+
+# random crop algorithm similar to https://github.com/argman/EAST
+class EastRandomCropData():
+ def __init__(self,
+ size=(640, 640),
+ max_tries=50,
+ min_crop_side_ratio=0.1,
+ require_original_image=False,
+ keep_ratio=True):
+ self.size = size
+ self.max_tries = max_tries
+ self.min_crop_side_ratio = min_crop_side_ratio
+ self.require_original_image = require_original_image
+ self.keep_ratio = keep_ratio
+
+ def __call__(self, data: dict) -> dict:
+ """
+ 从scales中随机选择一个尺度,对图片和文本框进行缩放
+ :param data: {'img':,'text_polys':,'texts':,'ignore_tags':}
+ :return:
+ """
+ im = data['img']
+ text_polys = data['text_polys']
+ ignore_tags = data['ignore_tags']
+ texts = data['texts']
+ all_care_polys = [
+ text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
+ ]
+ # 计算crop区域
+ crop_x, crop_y, crop_w, crop_h = self.crop_area(im, all_care_polys)
+ # crop 图片 保持比例填充
+ scale_w = self.size[0] / crop_w
+ scale_h = self.size[1] / crop_h
+ scale = min(scale_w, scale_h)
+ h = int(crop_h * scale)
+ w = int(crop_w * scale)
+ if self.keep_ratio:
+ if len(im.shape) == 3:
+ padimg = np.zeros((self.size[1], self.size[0], im.shape[2]),
+ im.dtype)
+ else:
+ padimg = np.zeros((self.size[1], self.size[0]), im.dtype)
+ padimg[:h, :w] = cv2.resize(
+ im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
+ img = padimg
+ else:
+ img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
+ tuple(self.size))
+ # crop 文本框
+ text_polys_crop = []
+ ignore_tags_crop = []
+ texts_crop = []
+ for poly, text, tag in zip(text_polys, texts, ignore_tags):
+ poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+ if not self.is_poly_outside_rect(poly, 0, 0, w, h):
+ text_polys_crop.append(poly)
+ ignore_tags_crop.append(tag)
+ texts_crop.append(text)
+ data['img'] = img
+ data['text_polys'] = np.float32(text_polys_crop)
+ data['ignore_tags'] = ignore_tags_crop
+ data['texts'] = texts_crop
+ return data
+
+ def is_poly_in_rect(self, poly, x, y, w, h):
+ poly = np.array(poly)
+ if poly[:, 0].min() < x or poly[:, 0].max() > x + w:
+ return False
+ if poly[:, 1].min() < y or poly[:, 1].max() > y + h:
+ return False
+ return True
+
+ def is_poly_outside_rect(self, poly, x, y, w, h):
+ poly = np.array(poly)
+ if poly[:, 0].max() < x or poly[:, 0].min() > x + w:
+ return True
+ if poly[:, 1].max() < y or poly[:, 1].min() > y + h:
+ return True
+ return False
+
+ def split_regions(self, axis):
+ regions = []
+ min_axis = 0
+ for i in range(1, axis.shape[0]):
+ if axis[i] != axis[i - 1] + 1:
+ region = axis[min_axis:i]
+ min_axis = i
+ regions.append(region)
+ return regions
+
+ def random_select(self, axis, max_size):
+ xx = np.random.choice(axis, size=2)
+ xmin = np.min(xx)
+ xmax = np.max(xx)
+ xmin = np.clip(xmin, 0, max_size - 1)
+ xmax = np.clip(xmax, 0, max_size - 1)
+ return xmin, xmax
+
+ def region_wise_random_select(self, regions, max_size):
+ selected_index = list(np.random.choice(len(regions), 2))
+ selected_values = []
+ for index in selected_index:
+ axis = regions[index]
+ xx = int(np.random.choice(axis, size=1))
+ selected_values.append(xx)
+ xmin = min(selected_values)
+ xmax = max(selected_values)
+ return xmin, xmax
+
+ def crop_area(self, im, text_polys):
+ h, w = im.shape[:2]
+ h_array = np.zeros(h, dtype=np.int32)
+ w_array = np.zeros(w, dtype=np.int32)
+ for points in text_polys:
+ points = np.round(points, decimals=0).astype(np.int32)
+ minx = np.min(points[:, 0])
+ maxx = np.max(points[:, 0])
+ w_array[minx:maxx] = 1
+ miny = np.min(points[:, 1])
+ maxy = np.max(points[:, 1])
+ h_array[miny:maxy] = 1
+ # ensure the cropped area not across a text
+ h_axis = np.where(h_array == 0)[0]
+ w_axis = np.where(w_array == 0)[0]
+
+ if len(h_axis) == 0 or len(w_axis) == 0:
+ return 0, 0, w, h
+
+ h_regions = self.split_regions(h_axis)
+ w_regions = self.split_regions(w_axis)
+
+ for i in range(self.max_tries):
+ if len(w_regions) > 1:
+ xmin, xmax = self.region_wise_random_select(w_regions, w)
+ else:
+ xmin, xmax = self.random_select(w_axis, w)
+ if len(h_regions) > 1:
+ ymin, ymax = self.region_wise_random_select(h_regions, h)
+ else:
+ ymin, ymax = self.random_select(h_axis, h)
+
+ if xmax - xmin < self.min_crop_side_ratio * w or ymax - ymin < self.min_crop_side_ratio * h:
+ # area too small
+ continue
+ num_poly_in_rect = 0
+ for poly in text_polys:
+ if not self.is_poly_outside_rect(poly, xmin, ymin, xmax - xmin,
+ ymax - ymin):
+ num_poly_in_rect += 1
+ break
+
+ if num_poly_in_rect > 0:
+ return xmin, ymin, xmax - xmin, ymax - ymin
+
+ return 0, 0, w, h
+
+
+class PSERandomCrop():
+ def __init__(self, size):
+ self.size = size
+
+ def __call__(self, data):
+ imgs = data['imgs']
+
+ h, w = imgs[0].shape[0:2]
+ th, tw = self.size
+ if w == tw and h == th:
+ return imgs
+
+ # label中存在文本实例,并且按照概率进行裁剪,使用threshold_label_map控制
+ if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
+ # 文本实例的左上角点
+ tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
+ tl[tl < 0] = 0
+ # 文本实例的右下角点
+ br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
+ br[br < 0] = 0
+ # 保证选到右下角点时,有足够的距离进行crop
+ br[0] = min(br[0], h - th)
+ br[1] = min(br[1], w - tw)
+
+ for _ in range(50000):
+ i = random.randint(tl[0], br[0])
+ j = random.randint(tl[1], br[1])
+ # 保证shrink_label_map有文本
+ if imgs[1][i:i + th, j:j + tw].sum() <= 0:
+ continue
+ else:
+ break
+ else:
+ i = random.randint(0, h - th)
+ j = random.randint(0, w - tw)
+
+ # return i, j, th, tw
+ for idx in range(len(imgs)):
+ if len(imgs[idx].shape) == 3:
+ imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
+ else:
+ imgs[idx] = imgs[idx][i:i + th, j:j + tw]
+ data['imgs'] = imgs
+ return data
diff --git a/benchmark/PaddleOCR_DBNet/environment.yml b/benchmark/PaddleOCR_DBNet/environment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..571dbf2a0462a842e17420cbd2bdba2b56b62131
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/environment.yml
@@ -0,0 +1,21 @@
+name: dbnet
+channels:
+ - conda-forge
+ - defaults
+dependencies:
+ - anyconfig==0.9.10
+ - future==0.18.2
+ - imgaug==0.4.0
+ - matplotlib==3.1.2
+ - numpy==1.17.4
+ - opencv
+ - pyclipper
+ - PyYAML==5.2
+ - scikit-image==0.16.2
+ - Shapely==1.6.4
+ - tensorboard=2
+ - tqdm==4.40.1
+ - ipython
+ - pip
+ - pip:
+ - polygon3
diff --git a/benchmark/PaddleOCR_DBNet/eval.sh b/benchmark/PaddleOCR_DBNet/eval.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b3bf46818610caedca6690a95713a020b678e2bf
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/eval.sh
@@ -0,0 +1 @@
+CUDA_VISIBLE_DEVICES=0 python3 tools/eval.py --model_path ''
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/generate_lists.sh b/benchmark/PaddleOCR_DBNet/generate_lists.sh
new file mode 100644
index 0000000000000000000000000000000000000000..84f408c64330f0aefb684ed738fa2dcdbc4af6da
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/generate_lists.sh
@@ -0,0 +1,17 @@
+#Only use if your file names of the images and txts are identical
+rm ./datasets/train_img.txt
+rm ./datasets/train_gt.txt
+rm ./datasets/test_img.txt
+rm ./datasets/test_gt.txt
+rm ./datasets/train.txt
+rm ./datasets/test.txt
+ls ./datasets/train/img/*.jpg > ./datasets/train_img.txt
+ls ./datasets/train/gt/*.txt > ./datasets/train_gt.txt
+ls ./datasets/test/img/*.jpg > ./datasets/test_img.txt
+ls ./datasets/test/gt/*.txt > ./datasets/test_gt.txt
+paste ./datasets/train_img.txt ./datasets/train_gt.txt > ./datasets/train.txt
+paste ./datasets/test_img.txt ./datasets/test_gt.txt > ./datasets/test.txt
+rm ./datasets/train_img.txt
+rm ./datasets/train_gt.txt
+rm ./datasets/test_img.txt
+rm ./datasets/test_gt.txt
diff --git a/benchmark/PaddleOCR_DBNet/imgs/paper/db.jpg b/benchmark/PaddleOCR_DBNet/imgs/paper/db.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..aa6c7e9890551abb9aaf39fe76db67cb5588507b
Binary files /dev/null and b/benchmark/PaddleOCR_DBNet/imgs/paper/db.jpg differ
diff --git a/benchmark/PaddleOCR_DBNet/models/__init__.py b/benchmark/PaddleOCR_DBNet/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..26ff73ff690bf56c8d5cfb64a37bbffb706da7e2
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:55
+# @Author : zhoujun
+import copy
+from .model import Model
+from .losses import build_loss
+
+__all__ = ['build_loss', 'build_model']
+support_model = ['Model']
+
+
+def build_model(config):
+ """
+ get architecture model class
+ """
+ copy_config = copy.deepcopy(config)
+ arch_type = copy_config.pop('type')
+ assert arch_type in support_model, f'{arch_type} is not developed yet!, only {support_model} are support now'
+ arch_model = eval(arch_type)(copy_config)
+ return arch_model
diff --git a/benchmark/PaddleOCR_DBNet/models/backbone/__init__.py b/benchmark/PaddleOCR_DBNet/models/backbone/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..740c8d5ff09311def6ca465aae40c34261518102
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/backbone/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:54
+# @Author : zhoujun
+
+from .resnet import *
+
+__all__ = ['build_backbone']
+
+support_backbone = [
+ 'resnet18', 'deformable_resnet18', 'deformable_resnet50', 'resnet50',
+ 'resnet34', 'resnet101', 'resnet152'
+]
+
+
+def build_backbone(backbone_name, **kwargs):
+ assert backbone_name in support_backbone, f'all support backbone is {support_backbone}'
+ backbone = eval(backbone_name)(**kwargs)
+ return backbone
diff --git a/benchmark/PaddleOCR_DBNet/models/backbone/resnet.py b/benchmark/PaddleOCR_DBNet/models/backbone/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b30b382d98fec00d396dabc4f12f20ad8c77389
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/backbone/resnet.py
@@ -0,0 +1,375 @@
+import math
+import paddle
+from paddle import nn
+
+BatchNorm2d = nn.BatchNorm2D
+
+__all__ = [
+ 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+ 'deformable_resnet18', 'deformable_resnet50', 'resnet152'
+]
+
+model_urls = {
+ 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+ 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+ 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+ 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+ 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def constant_init(module, constant, bias=0):
+ module.weight = paddle.create_parameter(
+ shape=module.weight.shape,
+ dtype='float32',
+ default_initializer=paddle.nn.initializer.Constant(constant))
+ if hasattr(module, 'bias'):
+ module.bias = paddle.create_parameter(
+ shape=module.bias.shape,
+ dtype='float32',
+ default_initializer=paddle.nn.initializer.Constant(bias))
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2D(
+ in_planes,
+ out_planes,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ bias_attr=False)
+
+
+class BasicBlock(nn.Layer):
+ expansion = 1
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, dcn=None):
+ super(BasicBlock, self).__init__()
+ self.with_dcn = dcn is not None
+ self.conv1 = conv3x3(inplanes, planes, stride)
+ self.bn1 = BatchNorm2d(planes, momentum=0.1)
+ self.relu = nn.ReLU()
+ self.with_modulated_dcn = False
+ if not self.with_dcn:
+ self.conv2 = nn.Conv2D(
+ planes, planes, kernel_size=3, padding=1, bias_attr=False)
+ else:
+ from paddle.version.ops import DeformConv2D
+ deformable_groups = dcn.get('deformable_groups', 1)
+ offset_channels = 18
+ self.conv2_offset = nn.Conv2D(
+ planes,
+ deformable_groups * offset_channels,
+ kernel_size=3,
+ padding=1)
+ self.conv2 = DeformConv2D(
+ planes, planes, kernel_size=3, padding=1, bias_attr=False)
+ self.bn2 = BatchNorm2d(planes, momentum=0.1)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ # out = self.conv2(out)
+ if not self.with_dcn:
+ out = self.conv2(out)
+ else:
+ offset = self.conv2_offset(out)
+ out = self.conv2(out, offset)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Layer):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, dcn=None):
+ super(Bottleneck, self).__init__()
+ self.with_dcn = dcn is not None
+ self.conv1 = nn.Conv2D(inplanes, planes, kernel_size=1, bias_attr=False)
+ self.bn1 = BatchNorm2d(planes, momentum=0.1)
+ self.with_modulated_dcn = False
+ if not self.with_dcn:
+ self.conv2 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=stride,
+ padding=1,
+ bias_attr=False)
+ else:
+ deformable_groups = dcn.get('deformable_groups', 1)
+ from paddle.vision.ops import DeformConv2D
+ offset_channels = 18
+ self.conv2_offset = nn.Conv2D(
+ planes,
+ deformable_groups * offset_channels,
+ stride=stride,
+ kernel_size=3,
+ padding=1)
+ self.conv2 = DeformConv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ padding=1,
+ stride=stride,
+ bias_attr=False)
+ self.bn2 = BatchNorm2d(planes, momentum=0.1)
+ self.conv3 = nn.Conv2D(
+ planes, planes * 4, kernel_size=1, bias_attr=False)
+ self.bn3 = BatchNorm2d(planes * 4, momentum=0.1)
+ self.relu = nn.ReLU()
+ self.downsample = downsample
+ self.stride = stride
+ self.dcn = dcn
+ self.with_dcn = dcn is not None
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ # out = self.conv2(out)
+ if not self.with_dcn:
+ out = self.conv2(out)
+ else:
+ offset = self.conv2_offset(out)
+ out = self.conv2(out, offset)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class ResNet(nn.Layer):
+ def __init__(self, block, layers, in_channels=3, dcn=None):
+ self.dcn = dcn
+ self.inplanes = 64
+ super(ResNet, self).__init__()
+ self.out_channels = []
+ self.conv1 = nn.Conv2D(
+ in_channels,
+ 64,
+ kernel_size=7,
+ stride=2,
+ padding=3,
+ bias_attr=False)
+ self.bn1 = BatchNorm2d(64, momentum=0.1)
+ self.relu = nn.ReLU()
+ self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
+ self.layer1 = self._make_layer(block, 64, layers[0])
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dcn=dcn)
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dcn=dcn)
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dcn=dcn)
+
+ if self.dcn is not None:
+ for m in self.modules():
+ if isinstance(m, Bottleneck) or isinstance(m, BasicBlock):
+ if hasattr(m, 'conv2_offset'):
+ constant_init(m.conv2_offset, 0)
+
+ def _make_layer(self, block, planes, blocks, stride=1, dcn=None):
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2D(
+ self.inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias_attr=False),
+ BatchNorm2d(
+ planes * block.expansion, momentum=0.1), )
+
+ layers = []
+ layers.append(block(self.inplanes, planes, stride, downsample, dcn=dcn))
+ self.inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(block(self.inplanes, planes, dcn=dcn))
+ self.out_channels.append(planes * block.expansion)
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+
+ x2 = self.layer1(x)
+ x3 = self.layer2(x2)
+ x4 = self.layer3(x3)
+ x5 = self.layer4(x4)
+
+ return x2, x3, x4, x5
+
+
+def load_torch_params(paddle_model, torch_patams):
+ paddle_params = paddle_model.state_dict()
+
+ fc_names = ['classifier']
+ for key, torch_value in torch_patams.items():
+ if 'num_batches_tracked' in key:
+ continue
+ key = key.replace("running_var", "_variance").replace(
+ "running_mean", "_mean").replace("module.", "")
+ torch_value = torch_value.detach().cpu().numpy()
+ if key in paddle_params:
+ flag = [i in key for i in fc_names]
+ if any(flag) and "weight" in key: # ignore bias
+ new_shape = [1, 0] + list(range(2, torch_value.ndim))
+ print(
+ f"name: {key}, ori shape: {torch_value.shape}, new shape: {torch_value.transpose(new_shape).shape}"
+ )
+ torch_value = torch_value.transpose(new_shape)
+ paddle_params[key] = torch_value
+ else:
+ print(f'{key} not in paddle')
+ paddle_model.set_state_dict(paddle_params)
+
+
+def load_models(model, model_name):
+ import torch.utils.model_zoo as model_zoo
+ torch_patams = model_zoo.load_url(model_urls[model_name])
+ load_torch_params(model, torch_patams)
+
+
+def resnet18(pretrained=True, **kwargs):
+ """Constructs a ResNet-18 model.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ print('load from imagenet')
+ load_models(model, 'resnet18')
+ return model
+
+
+def deformable_resnet18(pretrained=True, **kwargs):
+ """Constructs a ResNet-18 model.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(
+ BasicBlock, [2, 2, 2, 2], dcn=dict(deformable_groups=1), **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ print('load from imagenet')
+ model.load_state_dict(
+ model_zoo.load_url(model_urls['resnet18']), strict=False)
+ return model
+
+
+def resnet34(pretrained=True, **kwargs):
+ """Constructs a ResNet-34 model.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ model.load_state_dict(
+ model_zoo.load_url(model_urls['resnet34']), strict=False)
+ return model
+
+
+def resnet50(pretrained=True, **kwargs):
+ """Constructs a ResNet-50 model.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ load_models(model, 'resnet50')
+ return model
+
+
+def deformable_resnet50(pretrained=True, **kwargs):
+ """Constructs a ResNet-50 model with deformable conv.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(
+ Bottleneck, [3, 4, 6, 3], dcn=dict(deformable_groups=1), **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ model.load_state_dict(
+ model_zoo.load_url(model_urls['resnet50']), strict=False)
+ return model
+
+
+def resnet101(pretrained=True, **kwargs):
+ """Constructs a ResNet-101 model.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ model.load_state_dict(
+ model_zoo.load_url(model_urls['resnet101']), strict=False)
+ return model
+
+
+def resnet152(pretrained=True, **kwargs):
+ """Constructs a ResNet-152 model.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ """
+ model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+ if pretrained:
+ assert kwargs.get(
+ 'in_channels',
+ 3) == 3, 'in_channels must be 3 whem pretrained is True'
+ model.load_state_dict(
+ model_zoo.load_url(model_urls['resnet152']), strict=False)
+ return model
+
+
+if __name__ == '__main__':
+
+ x = paddle.zeros([2, 3, 640, 640])
+ net = resnet50(pretrained=True)
+ y = net(x)
+ for u in y:
+ print(u.shape)
+
+ print(net.out_channels)
diff --git a/benchmark/PaddleOCR_DBNet/models/basic.py b/benchmark/PaddleOCR_DBNet/models/basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..f661878df7e1a7a9cbadc2f58d532c3af4949589
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/basic.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/6 11:19
+# @Author : zhoujun
+from paddle import nn
+
+
+class ConvBnRelu(nn.Layer):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True,
+ padding_mode='zeros',
+ inplace=True):
+ super().__init__()
+ self.conv = nn.Conv2D(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups,
+ bias_attr=bias,
+ padding_mode=padding_mode)
+ self.bn = nn.BatchNorm2D(out_channels)
+ self.relu = nn.ReLU()
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ x = self.relu(x)
+ return x
diff --git a/benchmark/PaddleOCR_DBNet/models/head/DBHead.py b/benchmark/PaddleOCR_DBNet/models/head/DBHead.py
new file mode 100644
index 0000000000000000000000000000000000000000..29277cec9d3eb60929e2bda2e78d08856008f0b4
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/head/DBHead.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/4 14:54
+# @Author : zhoujun
+import paddle
+from paddle import nn, ParamAttr
+
+
+class DBHead(nn.Layer):
+ def __init__(self, in_channels, out_channels, k=50):
+ super().__init__()
+ self.k = k
+ self.binarize = nn.Sequential(
+ nn.Conv2D(
+ in_channels,
+ in_channels // 4,
+ 3,
+ padding=1,
+ weight_attr=ParamAttr(
+ initializer=nn.initializer.KaimingNormal())),
+ nn.BatchNorm2D(
+ in_channels // 4,
+ weight_attr=ParamAttr(initializer=nn.initializer.Constant(1)),
+ bias_attr=ParamAttr(initializer=nn.initializer.Constant(1e-4))),
+ nn.ReLU(),
+ nn.Conv2DTranspose(
+ in_channels // 4,
+ in_channels // 4,
+ 2,
+ 2,
+ weight_attr=ParamAttr(
+ initializer=nn.initializer.KaimingNormal())),
+ nn.BatchNorm2D(
+ in_channels // 4,
+ weight_attr=ParamAttr(initializer=nn.initializer.Constant(1)),
+ bias_attr=ParamAttr(initializer=nn.initializer.Constant(1e-4))),
+ nn.ReLU(),
+ nn.Conv2DTranspose(
+ in_channels // 4,
+ 1,
+ 2,
+ 2,
+ weight_attr=nn.initializer.KaimingNormal()),
+ nn.Sigmoid())
+
+ self.thresh = self._init_thresh(in_channels)
+
+ def forward(self, x):
+ shrink_maps = self.binarize(x)
+ threshold_maps = self.thresh(x)
+ if self.training:
+ binary_maps = self.step_function(shrink_maps, threshold_maps)
+ y = paddle.concat(
+ (shrink_maps, threshold_maps, binary_maps), axis=1)
+ else:
+ y = paddle.concat((shrink_maps, threshold_maps), axis=1)
+ return y
+
+ def _init_thresh(self,
+ inner_channels,
+ serial=False,
+ smooth=False,
+ bias=False):
+ in_channels = inner_channels
+ if serial:
+ in_channels += 1
+ self.thresh = nn.Sequential(
+ nn.Conv2D(
+ in_channels,
+ inner_channels // 4,
+ 3,
+ padding=1,
+ bias_attr=bias,
+ weight_attr=ParamAttr(
+ initializer=nn.initializer.KaimingNormal())),
+ nn.BatchNorm2D(
+ inner_channels // 4,
+ weight_attr=ParamAttr(initializer=nn.initializer.Constant(1)),
+ bias_attr=ParamAttr(initializer=nn.initializer.Constant(1e-4))),
+ nn.ReLU(),
+ self._init_upsample(
+ inner_channels // 4,
+ inner_channels // 4,
+ smooth=smooth,
+ bias=bias),
+ nn.BatchNorm2D(
+ inner_channels // 4,
+ weight_attr=ParamAttr(initializer=nn.initializer.Constant(1)),
+ bias_attr=ParamAttr(initializer=nn.initializer.Constant(1e-4))),
+ nn.ReLU(),
+ self._init_upsample(
+ inner_channels // 4, 1, smooth=smooth, bias=bias),
+ nn.Sigmoid())
+ return self.thresh
+
+ def _init_upsample(self,
+ in_channels,
+ out_channels,
+ smooth=False,
+ bias=False):
+ if smooth:
+ inter_out_channels = out_channels
+ if out_channels == 1:
+ inter_out_channels = in_channels
+ module_list = [
+ nn.Upsample(
+ scale_factor=2, mode='nearest'), nn.Conv2D(
+ in_channels,
+ inter_out_channels,
+ 3,
+ 1,
+ 1,
+ bias_attr=bias,
+ weight_attr=ParamAttr(
+ initializer=nn.initializer.KaimingNormal()))
+ ]
+ if out_channels == 1:
+ module_list.append(
+ nn.Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=1,
+ bias_attr=True,
+ weight_attr=ParamAttr(
+ initializer=nn.initializer.KaimingNormal())))
+ return nn.Sequential(module_list)
+ else:
+ return nn.Conv2DTranspose(
+ in_channels,
+ out_channels,
+ 2,
+ 2,
+ weight_attr=ParamAttr(
+ initializer=nn.initializer.KaimingNormal()))
+
+ def step_function(self, x, y):
+ return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y)))
diff --git a/benchmark/PaddleOCR_DBNet/models/head/__init__.py b/benchmark/PaddleOCR_DBNet/models/head/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5610c69754ebd1d8d7aa1b69773e91672481e418
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/head/__init__.py
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+# @Time : 2020/6/5 11:35
+# @Author : zhoujun
+from .DBHead import DBHead
+
+__all__ = ['build_head']
+support_head = ['DBHead']
+
+
+def build_head(head_name, **kwargs):
+ assert head_name in support_head, f'all support head is {support_head}'
+ head = eval(head_name)(**kwargs)
+ return head
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/models/losses/DB_loss.py b/benchmark/PaddleOCR_DBNet/models/losses/DB_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..74d240c17b11ae784203a3916b06925a0a2f3af6
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/losses/DB_loss.py
@@ -0,0 +1,49 @@
+import paddle
+from models.losses.basic_loss import BalanceCrossEntropyLoss, MaskL1Loss, DiceLoss
+
+
+class DBLoss(paddle.nn.Layer):
+ def __init__(self,
+ alpha=1.0,
+ beta=10,
+ ohem_ratio=3,
+ reduction='mean',
+ eps=1e-06):
+ """
+ Implement PSE Loss.
+ :param alpha: binary_map loss 前面的系数
+ :param beta: threshold_map loss 前面的系数
+ :param ohem_ratio: OHEM的比例
+ :param reduction: 'mean' or 'sum'对 batch里的loss 算均值或求和
+ """
+ super().__init__()
+ assert reduction in ['mean', 'sum'], " reduction must in ['mean','sum']"
+ self.alpha = alpha
+ self.beta = beta
+ self.bce_loss = BalanceCrossEntropyLoss(negative_ratio=ohem_ratio)
+ self.dice_loss = DiceLoss(eps=eps)
+ self.l1_loss = MaskL1Loss(eps=eps)
+ self.ohem_ratio = ohem_ratio
+ self.reduction = reduction
+
+ def forward(self, pred, batch):
+ shrink_maps = pred[:, 0, :, :]
+ threshold_maps = pred[:, 1, :, :]
+ binary_maps = pred[:, 2, :, :]
+ loss_shrink_maps = self.bce_loss(shrink_maps, batch['shrink_map'],
+ batch['shrink_mask'])
+ loss_threshold_maps = self.l1_loss(
+ threshold_maps, batch['threshold_map'], batch['threshold_mask'])
+ metrics = dict(
+ loss_shrink_maps=loss_shrink_maps,
+ loss_threshold_maps=loss_threshold_maps)
+ if pred.shape[1] > 2:
+ loss_binary_maps = self.dice_loss(binary_maps, batch['shrink_map'],
+ batch['shrink_mask'])
+ metrics['loss_binary_maps'] = loss_binary_maps
+ loss_all = (self.alpha * loss_shrink_maps + self.beta *
+ loss_threshold_maps + loss_binary_maps)
+ metrics['loss'] = loss_all
+ else:
+ metrics['loss'] = loss_shrink_maps
+ return metrics
diff --git a/benchmark/PaddleOCR_DBNet/models/losses/__init__.py b/benchmark/PaddleOCR_DBNet/models/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dc0f1033b66999bb9cc7edb3c39d80836963c56
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/losses/__init__.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+# @Time : 2020/6/5 11:36
+# @Author : zhoujun
+import copy
+from .DB_loss import DBLoss
+
+__all__ = ['build_loss']
+support_loss = ['DBLoss']
+
+
+def build_loss(config):
+ copy_config = copy.deepcopy(config)
+ loss_type = copy_config.pop('type')
+ assert loss_type in support_loss, f'all support loss is {support_loss}'
+ criterion = eval(loss_type)(**copy_config)
+ return criterion
diff --git a/benchmark/PaddleOCR_DBNet/models/losses/basic_loss.py b/benchmark/PaddleOCR_DBNet/models/losses/basic_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e68cb172aea81afc5bd6c5d75dedd6841c59456
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/losses/basic_loss.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/4 14:39
+# @Author : zhoujun
+import paddle
+import paddle.nn as nn
+
+
+class BalanceCrossEntropyLoss(nn.Layer):
+ '''
+ Balanced cross entropy loss.
+ Shape:
+ - Input: :math:`(N, 1, H, W)`
+ - GT: :math:`(N, 1, H, W)`, same shape as the input
+ - Mask: :math:`(N, H, W)`, same spatial shape as the input
+ - Output: scalar.
+
+ '''
+
+ def __init__(self, negative_ratio=3.0, eps=1e-6):
+ super(BalanceCrossEntropyLoss, self).__init__()
+ self.negative_ratio = negative_ratio
+ self.eps = eps
+
+ def forward(self,
+ pred: paddle.Tensor,
+ gt: paddle.Tensor,
+ mask: paddle.Tensor,
+ return_origin=False):
+ '''
+ Args:
+ pred: shape :math:`(N, 1, H, W)`, the prediction of network
+ gt: shape :math:`(N, 1, H, W)`, the target
+ mask: shape :math:`(N, H, W)`, the mask indicates positive regions
+ '''
+ positive = (gt * mask)
+ negative = ((1 - gt) * mask)
+ positive_count = int(positive.sum())
+ negative_count = min(
+ int(negative.sum()), int(positive_count * self.negative_ratio))
+ loss = nn.functional.binary_cross_entropy(pred, gt, reduction='none')
+ positive_loss = loss * positive
+ negative_loss = loss * negative
+ negative_loss, _ = negative_loss.reshape([-1]).topk(negative_count)
+
+ balance_loss = (positive_loss.sum() + negative_loss.sum()) / (
+ positive_count + negative_count + self.eps)
+
+ if return_origin:
+ return balance_loss, loss
+ return balance_loss
+
+
+class DiceLoss(nn.Layer):
+ '''
+ Loss function from https://arxiv.org/abs/1707.03237,
+ where iou computation is introduced heatmap manner to measure the
+ diversity bwtween tow heatmaps.
+ '''
+
+ def __init__(self, eps=1e-6):
+ super(DiceLoss, self).__init__()
+ self.eps = eps
+
+ def forward(self, pred: paddle.Tensor, gt, mask, weights=None):
+ '''
+ pred: one or two heatmaps of shape (N, 1, H, W),
+ the losses of tow heatmaps are added together.
+ gt: (N, 1, H, W)
+ mask: (N, H, W)
+ '''
+ return self._compute(pred, gt, mask, weights)
+
+ def _compute(self, pred, gt, mask, weights):
+ if len(pred.shape) == 4:
+ pred = pred[:, 0, :, :]
+ gt = gt[:, 0, :, :]
+ assert pred.shape == gt.shape
+ assert pred.shape == mask.shape
+ if weights is not None:
+ assert weights.shape == mask.shape
+ mask = weights * mask
+ intersection = (pred * gt * mask).sum()
+
+ union = (pred * mask).sum() + (gt * mask).sum() + self.eps
+ loss = 1 - 2.0 * intersection / union
+ assert loss <= 1
+ return loss
+
+
+class MaskL1Loss(nn.Layer):
+ def __init__(self, eps=1e-6):
+ super(MaskL1Loss, self).__init__()
+ self.eps = eps
+
+ def forward(self, pred: paddle.Tensor, gt, mask):
+ loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
+ return loss
diff --git a/benchmark/PaddleOCR_DBNet/models/model.py b/benchmark/PaddleOCR_DBNet/models/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee24ff5b3d6b5a6c30b64de56a0aa83e9960001e
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/model.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:57
+# @Author : zhoujun
+from addict import Dict
+from paddle import nn
+import paddle.nn.functional as F
+
+from models.backbone import build_backbone
+from models.neck import build_neck
+from models.head import build_head
+
+
+class Model(nn.Layer):
+ def __init__(self, model_config: dict):
+ """
+ PANnet
+ :param model_config: 模型配置
+ """
+ super().__init__()
+ model_config = Dict(model_config)
+ backbone_type = model_config.backbone.pop('type')
+ neck_type = model_config.neck.pop('type')
+ head_type = model_config.head.pop('type')
+ self.backbone = build_backbone(backbone_type, **model_config.backbone)
+ self.neck = build_neck(
+ neck_type,
+ in_channels=self.backbone.out_channels,
+ **model_config.neck)
+ self.head = build_head(
+ head_type, in_channels=self.neck.out_channels, **model_config.head)
+ self.name = f'{backbone_type}_{neck_type}_{head_type}'
+
+ def forward(self, x):
+ _, _, H, W = x.shape
+ backbone_out = self.backbone(x)
+ neck_out = self.neck(backbone_out)
+ y = self.head(neck_out)
+ y = F.interpolate(y, size=(H, W), mode='bilinear', align_corners=True)
+ return y
diff --git a/benchmark/PaddleOCR_DBNet/models/neck/FPN.py b/benchmark/PaddleOCR_DBNet/models/neck/FPN.py
new file mode 100644
index 0000000000000000000000000000000000000000..53a3fa4b80c556ba87fa559494771f72acae0788
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/neck/FPN.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/9/13 10:29
+# @Author : zhoujun
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+
+from models.basic import ConvBnRelu
+
+
+class FPN(nn.Layer):
+ def __init__(self, in_channels, inner_channels=256, **kwargs):
+ """
+ :param in_channels: 基础网络输出的维度
+ :param kwargs:
+ """
+ super().__init__()
+ inplace = True
+ self.conv_out = inner_channels
+ inner_channels = inner_channels // 4
+ # reduce layers
+ self.reduce_conv_c2 = ConvBnRelu(
+ in_channels[0], inner_channels, kernel_size=1, inplace=inplace)
+ self.reduce_conv_c3 = ConvBnRelu(
+ in_channels[1], inner_channels, kernel_size=1, inplace=inplace)
+ self.reduce_conv_c4 = ConvBnRelu(
+ in_channels[2], inner_channels, kernel_size=1, inplace=inplace)
+ self.reduce_conv_c5 = ConvBnRelu(
+ in_channels[3], inner_channels, kernel_size=1, inplace=inplace)
+ # Smooth layers
+ self.smooth_p4 = ConvBnRelu(
+ inner_channels,
+ inner_channels,
+ kernel_size=3,
+ padding=1,
+ inplace=inplace)
+ self.smooth_p3 = ConvBnRelu(
+ inner_channels,
+ inner_channels,
+ kernel_size=3,
+ padding=1,
+ inplace=inplace)
+ self.smooth_p2 = ConvBnRelu(
+ inner_channels,
+ inner_channels,
+ kernel_size=3,
+ padding=1,
+ inplace=inplace)
+
+ self.conv = nn.Sequential(
+ nn.Conv2D(
+ self.conv_out,
+ self.conv_out,
+ kernel_size=3,
+ padding=1,
+ stride=1),
+ nn.BatchNorm2D(self.conv_out),
+ nn.ReLU())
+ self.out_channels = self.conv_out
+
+ def forward(self, x):
+ c2, c3, c4, c5 = x
+ # Top-down
+ p5 = self.reduce_conv_c5(c5)
+ p4 = self._upsample_add(p5, self.reduce_conv_c4(c4))
+ p4 = self.smooth_p4(p4)
+ p3 = self._upsample_add(p4, self.reduce_conv_c3(c3))
+ p3 = self.smooth_p3(p3)
+ p2 = self._upsample_add(p3, self.reduce_conv_c2(c2))
+ p2 = self.smooth_p2(p2)
+
+ x = self._upsample_cat(p2, p3, p4, p5)
+ x = self.conv(x)
+ return x
+
+ def _upsample_add(self, x, y):
+ return F.interpolate(x, size=y.shape[2:]) + y
+
+ def _upsample_cat(self, p2, p3, p4, p5):
+ h, w = p2.shape[2:]
+ p3 = F.interpolate(p3, size=(h, w))
+ p4 = F.interpolate(p4, size=(h, w))
+ p5 = F.interpolate(p5, size=(h, w))
+ return paddle.concat([p2, p3, p4, p5], axis=1)
diff --git a/benchmark/PaddleOCR_DBNet/models/neck/__init__.py b/benchmark/PaddleOCR_DBNet/models/neck/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..76553413784df2d5d87824e71d8de9b95ce7ce7e
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/models/neck/__init__.py
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+# @Time : 2020/6/5 11:34
+# @Author : zhoujun
+from .FPN import FPN
+
+__all__ = ['build_neck']
+support_neck = ['FPN']
+
+
+def build_neck(neck_name, **kwargs):
+ assert neck_name in support_neck, f'all support neck is {support_neck}'
+ neck = eval(neck_name)(**kwargs)
+ return neck
diff --git a/benchmark/PaddleOCR_DBNet/multi_gpu_train.sh b/benchmark/PaddleOCR_DBNet/multi_gpu_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b49a73f1581d8b40c20aeb01fd6ccc37fd6ab24b
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/multi_gpu_train.sh
@@ -0,0 +1,2 @@
+# export NCCL_P2P_DISABLE=1
+CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m paddle.distributed.launch tools/train.py --config_file "config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml"
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/post_processing/__init__.py b/benchmark/PaddleOCR_DBNet/post_processing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f8e43223dfaf509c1bec58e828dd98a4630eb90
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/post_processing/__init__.py
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/5 15:17
+# @Author : zhoujun
+
+from .seg_detector_representer import SegDetectorRepresenter
+
+
+def get_post_processing(config):
+ try:
+ cls = eval(config['type'])(**config['args'])
+ return cls
+ except:
+ return None
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/post_processing/seg_detector_representer.py b/benchmark/PaddleOCR_DBNet/post_processing/seg_detector_representer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1273dcfcce0a73cc76f841354aed2623219a8d8
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/post_processing/seg_detector_representer.py
@@ -0,0 +1,192 @@
+import cv2
+import numpy as np
+import pyclipper
+import paddle
+from shapely.geometry import Polygon
+
+
+class SegDetectorRepresenter():
+ def __init__(self,
+ thresh=0.3,
+ box_thresh=0.7,
+ max_candidates=1000,
+ unclip_ratio=1.5):
+ self.min_size = 3
+ self.thresh = thresh
+ self.box_thresh = box_thresh
+ self.max_candidates = max_candidates
+ self.unclip_ratio = unclip_ratio
+
+ def __call__(self, batch, pred, is_output_polygon=False):
+ '''
+ batch: (image, polygons, ignore_tags
+ batch: a dict produced by dataloaders.
+ image: tensor of shape (N, C, H, W).
+ polygons: tensor of shape (N, K, 4, 2), the polygons of objective regions.
+ ignore_tags: tensor of shape (N, K), indicates whether a region is ignorable or not.
+ shape: the original shape of images.
+ filename: the original filenames of images.
+ pred:
+ binary: text region segmentation map, with shape (N, H, W)
+ thresh: [if exists] thresh hold prediction with shape (N, H, W)
+ thresh_binary: [if exists] binarized with threshhold, (N, H, W)
+ '''
+ if isinstance(pred, paddle.Tensor):
+ pred = pred.numpy()
+ pred = pred[:, 0, :, :]
+ segmentation = self.binarize(pred)
+ boxes_batch = []
+ scores_batch = []
+ for batch_index in range(pred.shape[0]):
+ height, width = batch['shape'][batch_index]
+ if is_output_polygon:
+ boxes, scores = self.polygons_from_bitmap(
+ pred[batch_index], segmentation[batch_index], width, height)
+ else:
+ boxes, scores = self.boxes_from_bitmap(
+ pred[batch_index], segmentation[batch_index], width, height)
+ boxes_batch.append(boxes)
+ scores_batch.append(scores)
+ return boxes_batch, scores_batch
+
+ def binarize(self, pred):
+ return pred > self.thresh
+
+ def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+ '''
+ _bitmap: single map with shape (H, W),
+ whose values are binarized as {0, 1}
+ '''
+
+ assert len(_bitmap.shape) == 2
+ bitmap = _bitmap # The first channel
+ height, width = bitmap.shape
+ boxes = []
+ scores = []
+
+ contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+ cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+
+ for contour in contours[:self.max_candidates]:
+ epsilon = 0.005 * cv2.arcLength(contour, True)
+ approx = cv2.approxPolyDP(contour, epsilon, True)
+ points = approx.reshape((-1, 2))
+ if points.shape[0] < 4:
+ continue
+ # _, sside = self.get_mini_boxes(contour)
+ # if sside < self.min_size:
+ # continue
+ score = self.box_score_fast(pred, contour.squeeze(1))
+ if self.box_thresh > score:
+ continue
+
+ if points.shape[0] > 2:
+ box = self.unclip(points, unclip_ratio=self.unclip_ratio)
+ if len(box) > 1:
+ continue
+ else:
+ continue
+ box = box.reshape(-1, 2)
+ _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
+ if sside < self.min_size + 2:
+ continue
+
+ if not isinstance(dest_width, int):
+ dest_width = dest_width.item()
+ dest_height = dest_height.item()
+
+ box[:, 0] = np.clip(
+ np.round(box[:, 0] / width * dest_width), 0, dest_width)
+ box[:, 1] = np.clip(
+ np.round(box[:, 1] / height * dest_height), 0, dest_height)
+ boxes.append(box)
+ scores.append(score)
+ return boxes, scores
+
+ def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+ '''
+ _bitmap: single map with shape (H, W),
+ whose values are binarized as {0, 1}
+ '''
+
+ assert len(_bitmap.shape) == 2
+ bitmap = _bitmap # The first channel
+ height, width = bitmap.shape
+ contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+ cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+ num_contours = min(len(contours), self.max_candidates)
+ boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
+ scores = np.zeros((num_contours, ), dtype=np.float32)
+
+ for index in range(num_contours):
+ contour = contours[index].squeeze(1)
+ points, sside = self.get_mini_boxes(contour)
+ if sside < self.min_size:
+ continue
+ points = np.array(points)
+ score = self.box_score_fast(pred, contour)
+ if self.box_thresh > score:
+ continue
+
+ box = self.unclip(
+ points, unclip_ratio=self.unclip_ratio).reshape(-1, 1, 2)
+ box, sside = self.get_mini_boxes(box)
+ if sside < self.min_size + 2:
+ continue
+ box = np.array(box)
+ if not isinstance(dest_width, int):
+ dest_width = dest_width.item()
+ dest_height = dest_height.item()
+
+ box[:, 0] = np.clip(
+ np.round(box[:, 0] / width * dest_width), 0, dest_width)
+ box[:, 1] = np.clip(
+ np.round(box[:, 1] / height * dest_height), 0, dest_height)
+ boxes[index, :, :] = box.astype(np.int16)
+ scores[index] = score
+ return boxes, scores
+
+ def unclip(self, box, unclip_ratio=1.5):
+ poly = Polygon(box)
+ distance = poly.area * unclip_ratio / poly.length
+ offset = pyclipper.PyclipperOffset()
+ offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+ expanded = np.array(offset.Execute(distance))
+ return expanded
+
+ def get_mini_boxes(self, contour):
+ bounding_box = cv2.minAreaRect(contour)
+ points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+
+ index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+ if points[1][1] > points[0][1]:
+ index_1 = 0
+ index_4 = 1
+ else:
+ index_1 = 1
+ index_4 = 0
+ if points[3][1] > points[2][1]:
+ index_2 = 2
+ index_3 = 3
+ else:
+ index_2 = 3
+ index_3 = 2
+
+ box = [
+ points[index_1], points[index_2], points[index_3], points[index_4]
+ ]
+ return box, min(bounding_box[1])
+
+ def box_score_fast(self, bitmap, _box):
+ h, w = bitmap.shape[:2]
+ box = _box.copy()
+ xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+ xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+ ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+ ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+ box[:, 0] = box[:, 0] - xmin
+ box[:, 1] = box[:, 1] - ymin
+ cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+ return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
diff --git a/benchmark/PaddleOCR_DBNet/predict.sh b/benchmark/PaddleOCR_DBNet/predict.sh
new file mode 100644
index 0000000000000000000000000000000000000000..37ab148283024c2360b7b13df24e5cc5b5cdaa4f
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/predict.sh
@@ -0,0 +1 @@
+CUDA_VISIBLE_DEVICES=0 python tools/predict.py --model_path model_best.pth --input_folder ./input --output_folder ./output --thre 0.7 --polygon --show --save_result
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/requirement.txt b/benchmark/PaddleOCR_DBNet/requirement.txt
new file mode 100644
index 0000000000000000000000000000000000000000..191819f32c8e5e6d318201f56a84b1faa5e3bce7
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/requirement.txt
@@ -0,0 +1,13 @@
+anyconfig
+future
+imgaug
+matplotlib
+numpy
+opencv-python
+Polygon3
+pyclipper
+PyYAML
+scikit-image
+Shapely
+tqdm
+addict
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/singlel_gpu_train.sh b/benchmark/PaddleOCR_DBNet/singlel_gpu_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f8b9f0e89b25e4e00e129cdfb3c0a771846d8c56
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/singlel_gpu_train.sh
@@ -0,0 +1 @@
+CUDA_VISIBLE_DEVICES=0 python3 tools/train.py --config_file "config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml"
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/test/README.MD b/benchmark/PaddleOCR_DBNet/test/README.MD
new file mode 100644
index 0000000000000000000000000000000000000000..b43c6e9a1990ab290f07bca1187e4cdefd46ff1e
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/test/README.MD
@@ -0,0 +1,8 @@
+Place the images that you want to detect here. You better named them as such:
+img_10.jpg
+img_11.jpg
+img_{img_id}.jpg
+
+For predicting single images, you can change the `img_path` in the `/tools/predict.py` to your image number.
+
+The result will be saved in the output_folder(default is test/output) you give in predict.sh
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/test_tipc/benchmark_train.sh b/benchmark/PaddleOCR_DBNet/test_tipc/benchmark_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..725da8b05988a272c7653a9e508e41e6d888a3b1
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/test_tipc/benchmark_train.sh
@@ -0,0 +1,287 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+# run benchmark sh
+# Usage:
+# bash run_benchmark_train.sh config.txt params
+# or
+# bash run_benchmark_train.sh config.txt
+
+function func_parser_params(){
+ strs=$1
+ IFS="="
+ array=(${strs})
+ tmp=${array[1]}
+ echo ${tmp}
+}
+
+function set_dynamic_epoch(){
+ string=$1
+ num=$2
+ _str=${string:1:6}
+ IFS="C"
+ arr=(${_str})
+ M=${arr[0]}
+ P=${arr[1]}
+ ep=`expr $num \* $M \* $P`
+ echo $ep
+}
+
+function func_sed_params(){
+ filename=$1
+ line=$2
+ param_value=$3
+ params=`sed -n "${line}p" $filename`
+ IFS=":"
+ array=(${params})
+ key=${array[0]}
+ value=${array[1]}
+
+ new_params="${key}:${param_value}"
+ IFS=";"
+ cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
+ eval $cmd
+}
+
+function set_gpu_id(){
+ string=$1
+ _str=${string:1:6}
+ IFS="C"
+ arr=(${_str})
+ M=${arr[0]}
+ P=${arr[1]}
+ gn=`expr $P - 1`
+ gpu_num=`expr $gn / $M`
+ seq=`seq -s "," 0 $gpu_num`
+ echo $seq
+}
+
+function get_repo_name(){
+ IFS=";"
+ cur_dir=$(pwd)
+ IFS="/"
+ arr=(${cur_dir})
+ echo ${arr[-1]}
+}
+
+FILENAME=$1
+# copy FILENAME as new
+new_filename="./test_tipc/benchmark_train.txt"
+cmd=`yes|cp $FILENAME $new_filename`
+FILENAME=$new_filename
+# MODE must be one of ['benchmark_train']
+MODE=$2
+PARAMS=$3
+
+to_static=""
+# parse "to_static" options and modify trainer into "to_static_trainer"
+if [[ $PARAMS =~ "dynamicTostatic" ]] ;then
+ to_static="d2sT_"
+ sed -i 's/trainer:norm_train/trainer:to_static_train/g' $FILENAME
+ # clear PARAM contents
+ if [ $PARAMS = "to_static" ] ;then
+ PARAMS=""
+ fi
+fi
+# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_fp32_DP_N1C8
+# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamicTostatic_bs8_fp32_DP_N1C8
+# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_null_DP_N1C1
+IFS=$'\n'
+# parser params from train_benchmark.txt
+dataline=`cat $FILENAME`
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+model_name=$(func_parser_value "${lines[1]}")
+python_name=$(func_parser_value "${lines[2]}")
+
+# set env
+python=${python_name}
+export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
+
+# 获取benchmark_params所在的行数
+line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
+# for train log parser
+batch_size=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+fp_items=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+epoch=$(func_parser_value "${lines[line_num]}")
+
+line_num=`expr $line_num + 1`
+profile_option_key=$(func_parser_key "${lines[line_num]}")
+profile_option_params=$(func_parser_value "${lines[line_num]}")
+profile_option="${profile_option_key}:${profile_option_params}"
+
+line_num=`expr $line_num + 1`
+flags_value=$(func_parser_value "${lines[line_num]}")
+# set flags
+IFS=";"
+flags_list=(${flags_value})
+for _flag in ${flags_list[*]}; do
+ cmd="export ${_flag}"
+ eval $cmd
+done
+
+# set log_name
+repo_name=$(get_repo_name )
+SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
+mkdir -p "${SAVE_LOG}/benchmark_log/"
+status_log="${SAVE_LOG}/benchmark_log/results.log"
+
+# The number of lines in which train params can be replaced.
+line_python=3
+line_gpuid=4
+line_precision=6
+line_epoch=7
+line_batchsize=9
+line_profile=13
+line_eval_py=24
+line_export_py=30
+
+func_sed_params "$FILENAME" "${line_eval_py}" "null"
+func_sed_params "$FILENAME" "${line_export_py}" "null"
+func_sed_params "$FILENAME" "${line_python}" "$python"
+
+# if params
+if [ ! -n "$PARAMS" ] ;then
+ # PARAMS input is not a word.
+ IFS="|"
+ batch_size_list=(${batch_size})
+ fp_items_list=(${fp_items})
+ device_num_list=(N1C4)
+ run_mode="DP"
+elif [[ ${PARAMS} = "dynamicTostatic" ]];then
+ IFS="|"
+ model_type=$PARAMS
+ batch_size_list=(${batch_size})
+ fp_items_list=(${fp_items})
+ device_num_list=(N1C4)
+ run_mode="DP"
+else
+ # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
+ IFS="_"
+ params_list=(${PARAMS})
+ model_type=${params_list[0]}
+ batch_size=${params_list[1]}
+ batch_size=`echo ${batch_size} | tr -cd "[0-9]" `
+ precision=${params_list[2]}
+ run_mode=${params_list[3]}
+ device_num=${params_list[4]}
+ IFS=";"
+
+ if [ ${precision} = "fp16" ];then
+ precision="amp"
+ fi
+
+ epoch=$(set_dynamic_epoch $device_num $epoch)
+ fp_items_list=($precision)
+ batch_size_list=($batch_size)
+ device_num_list=($device_num)
+fi
+
+IFS="|"
+for batch_size in ${batch_size_list[*]}; do
+ for train_precision in ${fp_items_list[*]}; do
+ for device_num in ${device_num_list[*]}; do
+ # sed batchsize and precision
+ if [ ${train_precision} = "amp" ];then
+ precision="fp16"
+ else
+ precision="fp32"
+ fi
+
+ func_sed_params "$FILENAME" "${line_precision}" "$train_precision"
+ func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
+ func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
+ gpu_id=$(set_gpu_id $device_num)
+
+ if [ ${#gpu_id} -le 1 ];then
+ log_path="$SAVE_LOG/profiling_log"
+ mkdir -p $log_path
+ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
+ func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
+ # set profile_option params
+ tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
+
+ # run test_train_inference_python.sh
+ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+ echo $cmd
+ eval $cmd
+ eval "cat ${log_path}/${log_name}"
+
+ # without profile
+ log_path="$SAVE_LOG/train_log"
+ speed_log_path="$SAVE_LOG/index"
+ mkdir -p $log_path
+ mkdir -p $speed_log_path
+ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
+ speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
+ func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
+ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+ echo $cmd
+ job_bt=`date '+%Y%m%d%H%M%S'`
+ eval $cmd
+ job_et=`date '+%Y%m%d%H%M%S'`
+ export model_run_time=$((${job_et}-${job_bt}))
+ eval "cat ${log_path}/${log_name}"
+
+ # parser log
+ _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
+ cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+ --speed_log_file '${speed_log_path}/${speed_log_name}' \
+ --model_name ${_model_name} \
+ --base_batch_size ${batch_size} \
+ --run_mode ${run_mode} \
+ --fp_item ${precision} \
+ --keyword ips: \
+ --skip_steps 2 \
+ --device_num ${device_num} \
+ --speed_unit samples/s \
+ --convergence_key loss: "
+ echo $cmd
+ eval $cmd
+ last_status=${PIPESTATUS[0]}
+ status_check $last_status "${cmd}" "${status_log}"
+ else
+ IFS=";"
+ unset_env=`unset CUDA_VISIBLE_DEVICES`
+ log_path="$SAVE_LOG/train_log"
+ speed_log_path="$SAVE_LOG/index"
+ mkdir -p $log_path
+ mkdir -p $speed_log_path
+ log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
+ speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
+ func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
+ func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
+ cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+ echo $cmd
+ job_bt=`date '+%Y%m%d%H%M%S'`
+ eval $cmd
+ job_et=`date '+%Y%m%d%H%M%S'`
+ export model_run_time=$((${job_et}-${job_bt}))
+ eval "cat ${log_path}/${log_name}"
+ # parser log
+ _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
+
+ cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+ --speed_log_file '${speed_log_path}/${speed_log_name}' \
+ --model_name ${_model_name} \
+ --base_batch_size ${batch_size} \
+ --run_mode ${run_mode} \
+ --fp_item ${precision} \
+ --keyword ips: \
+ --skip_steps 2 \
+ --device_num ${device_num} \
+ --speed_unit images/s \
+ --convergence_key loss: "
+ echo $cmd
+ eval $cmd
+ last_status=${PIPESTATUS[0]}
+ status_check $last_status "${cmd}" "${status_log}"
+ fi
+ done
+ done
+done
diff --git a/benchmark/PaddleOCR_DBNet/test_tipc/common_func.sh b/benchmark/PaddleOCR_DBNet/test_tipc/common_func.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c123d3cf6e6487ea6b0d5ef1a108e0994a7f1eb4
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/test_tipc/common_func.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+function func_parser_key(){
+ strs=$1
+ IFS=":"
+ array=(${strs})
+ tmp=${array[0]}
+ echo ${tmp}
+}
+
+function func_parser_value(){
+ strs=$1
+ IFS=":"
+ array=(${strs})
+ tmp=${array[1]}
+ echo ${tmp}
+}
+
+function func_set_params(){
+ key=$1
+ value=$2
+ if [ ${key}x = "null"x ];then
+ echo " "
+ elif [[ ${value} = "null" ]] || [[ ${value} = " " ]] || [ ${#value} -le 0 ];then
+ echo " "
+ else
+ echo "${key}=${value}"
+ fi
+}
+
+function func_parser_params(){
+ strs=$1
+ MODE=$2
+ IFS=":"
+ array=(${strs})
+ key=${array[0]}
+ tmp=${array[1]}
+ IFS="|"
+ res=""
+ for _params in ${tmp[*]}; do
+ IFS="="
+ array=(${_params})
+ mode=${array[0]}
+ value=${array[1]}
+ if [[ ${mode} = ${MODE} ]]; then
+ IFS="|"
+ #echo $(func_set_params "${mode}" "${value}")
+ echo $value
+ break
+ fi
+ IFS="|"
+ done
+ echo ${res}
+}
+
+function status_check(){
+ last_status=$1 # the exit code
+ run_command=$2
+ run_log=$3
+ model_name=$4
+ log_path=$5
+ if [ $last_status -eq 0 ]; then
+ echo -e "\033[33m Run successfully with command - ${model_name} - ${run_command} - ${log_path} \033[0m" | tee -a ${run_log}
+ else
+ echo -e "\033[33m Run failed with command - ${model_name} - ${run_command} - ${log_path} \033[0m" | tee -a ${run_log}
+ fi
+}
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/test_tipc/configs/det_res50_db/train_infer_python.txt b/benchmark/PaddleOCR_DBNet/test_tipc/configs/det_res50_db/train_infer_python.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20bb49fe072096662c74005f7b7b25a34a2bf3e7
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/test_tipc/configs/det_res50_db/train_infer_python.txt
@@ -0,0 +1,59 @@
+===========================train_params===========================
+model_name:det_res50_db
+python:python3.7
+gpu_list:0|0,1
+trainer.use_gpu:True|True
+amp:null
+trainer.epochs:lite_train_lite_infer=1|whole_train_whole_infer=300
+trainer.output_dir:./output/
+dataset.train.loader.batch_size:lite_train_lite_infer=8|whole_train_lite_infer=8
+trainer.finetune_checkpoint:null
+train_model_name:checkpoint/model_latest.pth
+train_infer_img_dir:imgs/paper/db.jpg
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py --config_file config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml -o trainer.log_iter=1 trainer.enable_eval=False dataset.train.loader.shuffle=false arch.backbone.pretrained=False
+quant_export:null
+fpgm_export:null
+distill_train:null
+to_static_train:trainer.to_static=true
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+trainer.output_dir:./output/
+trainer.resume_checkpoint:
+norm_export:tools/export_model.py --config_file config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/det_r50_vd_db_v2.0_train/best_accuracy
+infer_export:tools/export_model.py --config_file config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml -o
+infer_quant:False
+inference:tools/infer.py
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--batch_size:1
+--use_tensorrt:False
+--precision:fp32
+--model_dir:
+--img_path:imgs/paper/db.jpg
+--save_log_path:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/test_tipc/prepare.sh b/benchmark/PaddleOCR_DBNet/test_tipc/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cd8f56fd7d3bc07b2d35d1239bf723bb3de1e136
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/test_tipc/prepare.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+FILENAME=$1
+
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer',
+# 'whole_infer', 'klquant_whole_infer',
+# 'cpp_infer', 'serving_infer']
+
+MODE=$2
+
+dataline=$(cat ${FILENAME})
+
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# The training params
+model_name=$(func_parser_value "${lines[1]}")
+
+trainer_list=$(func_parser_value "${lines[14]}")
+
+if [ ${MODE} = "lite_train_lite_infer" ];then
+ python_name_list=$(func_parser_value "${lines[2]}")
+ array=(${python_name_list})
+ python_name=${array[0]}
+ ${python_name} -m pip install -r requirement.txt
+ if [[ ${model_name} =~ "det_res50_db" ]];then
+ wget -nc https://paddle-wheel.bj.bcebos.com/benchmark/resnet50-19c8e357.pth -O /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
+
+ # 下载数据集并解压
+ rm -rf datasets
+ wget -nc https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/benchmark_train/datasets.tar
+ tar xf datasets.tar
+ fi
+elif [ ${MODE} = "benchmark_train" ];then
+ python_name_list=$(func_parser_value "${lines[2]}")
+ array=(${python_name_list})
+ python_name=${array[0]}
+ ${python_name} -m pip install -r requirement.txt
+ if [[ ${model_name} =~ "det_res50_db" ]];then
+ wget -nc https://paddle-wheel.bj.bcebos.com/benchmark/resnet50-19c8e357.pth -O /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
+
+ # 下载数据集并解压
+ rm -rf datasets
+ wget -nc https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/benchmark_train/datasets.tar
+ tar xf datasets.tar
+ # expand gt.txt 2 times
+ # cd ./train_data/icdar2015/text_localization
+ # for i in `seq 2`;do cp train_icdar2015_label.txt dup$i.txt;done
+ # cat dup* > train_icdar2015_label.txt && rm -rf dup*
+ # cd ../../../
+ fi
+fi
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/test_tipc/test_train_inference_python.sh b/benchmark/PaddleOCR_DBNet/test_tipc/test_train_inference_python.sh
new file mode 100644
index 0000000000000000000000000000000000000000..64e5aca1f2840cd86f842b07c72f1d778ea0c2f8
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/test_tipc/test_train_inference_python.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+FILENAME=$1
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer']
+MODE=$2
+
+dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
+
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# The training params
+model_name=$(func_parser_value "${lines[1]}")
+python=$(func_parser_value "${lines[2]}")
+gpu_list=$(func_parser_value "${lines[3]}")
+train_use_gpu_key=$(func_parser_key "${lines[4]}")
+train_use_gpu_value=$(func_parser_value "${lines[4]}")
+autocast_list=$(func_parser_value "${lines[5]}")
+autocast_key=$(func_parser_key "${lines[5]}")
+epoch_key=$(func_parser_key "${lines[6]}")
+epoch_num=$(func_parser_params "${lines[6]}" "${MODE}")
+save_model_key=$(func_parser_key "${lines[7]}")
+train_batch_key=$(func_parser_key "${lines[8]}")
+train_batch_value=$(func_parser_params "${lines[8]}" "${MODE}")
+pretrain_model_key=$(func_parser_key "${lines[9]}")
+pretrain_model_value=$(func_parser_value "${lines[9]}")
+train_model_name=$(func_parser_value "${lines[10]}")
+train_infer_img_dir=$(func_parser_value "${lines[11]}")
+train_param_key1=$(func_parser_key "${lines[12]}")
+train_param_value1=$(func_parser_value "${lines[12]}")
+
+trainer_list=$(func_parser_value "${lines[14]}")
+trainer_norm=$(func_parser_key "${lines[15]}")
+norm_trainer=$(func_parser_value "${lines[15]}")
+pact_key=$(func_parser_key "${lines[16]}")
+pact_trainer=$(func_parser_value "${lines[16]}")
+fpgm_key=$(func_parser_key "${lines[17]}")
+fpgm_trainer=$(func_parser_value "${lines[17]}")
+distill_key=$(func_parser_key "${lines[18]}")
+distill_trainer=$(func_parser_value "${lines[18]}")
+to_static_key=$(func_parser_key "${lines[19]}")
+to_static_value=$(func_parser_value "${lines[19]}")
+trainer_key2=$(func_parser_key "${lines[20]}")
+trainer_value2=$(func_parser_value "${lines[20]}")
+
+eval_py=$(func_parser_value "${lines[23]}")
+eval_key1=$(func_parser_key "${lines[24]}")
+eval_value1=$(func_parser_value "${lines[24]}")
+
+save_infer_key=$(func_parser_key "${lines[27]}")
+export_weight=$(func_parser_key "${lines[28]}")
+norm_export=$(func_parser_value "${lines[29]}")
+pact_export=$(func_parser_value "${lines[30]}")
+fpgm_export=$(func_parser_value "${lines[31]}")
+distill_export=$(func_parser_value "${lines[32]}")
+export_key1=$(func_parser_key "${lines[33]}")
+export_value1=$(func_parser_value "${lines[33]}")
+export_key2=$(func_parser_key "${lines[34]}")
+export_value2=$(func_parser_value "${lines[34]}")
+inference_dir=$(func_parser_value "${lines[35]}")
+
+# parser inference model
+infer_model_dir_list=$(func_parser_value "${lines[36]}")
+infer_export_list=$(func_parser_value "${lines[37]}")
+infer_is_quant=$(func_parser_value "${lines[38]}")
+# parser inference
+inference_py=$(func_parser_value "${lines[39]}")
+use_gpu_key=$(func_parser_key "${lines[40]}")
+use_gpu_list=$(func_parser_value "${lines[40]}")
+use_mkldnn_key=$(func_parser_key "${lines[41]}")
+use_mkldnn_list=$(func_parser_value "${lines[41]}")
+cpu_threads_key=$(func_parser_key "${lines[42]}")
+cpu_threads_list=$(func_parser_value "${lines[42]}")
+batch_size_key=$(func_parser_key "${lines[43]}")
+batch_size_list=$(func_parser_value "${lines[43]}")
+use_trt_key=$(func_parser_key "${lines[44]}")
+use_trt_list=$(func_parser_value "${lines[44]}")
+precision_key=$(func_parser_key "${lines[45]}")
+precision_list=$(func_parser_value "${lines[45]}")
+infer_model_key=$(func_parser_key "${lines[46]}")
+image_dir_key=$(func_parser_key "${lines[47]}")
+infer_img_dir=$(func_parser_value "${lines[47]}")
+save_log_key=$(func_parser_key "${lines[48]}")
+benchmark_key=$(func_parser_key "${lines[49]}")
+benchmark_value=$(func_parser_value "${lines[49]}")
+infer_key1=$(func_parser_key "${lines[50]}")
+infer_value1=$(func_parser_value "${lines[50]}")
+
+LOG_PATH="./test_tipc/output/${model_name}/${MODE}"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results_python.log"
+
+
+function func_inference(){
+ IFS='|'
+ _python=$1
+ _script=$2
+ _model_dir=$3
+ _log_path=$4
+ _img_dir=$5
+ _flag_quant=$6
+ _gpu=$7
+ # inference
+ for use_gpu in ${use_gpu_list[*]}; do
+ if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
+ for use_mkldnn in ${use_mkldnn_list[*]}; do
+ # if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
+ # continue
+ # fi
+ for threads in ${cpu_threads_list[*]}; do
+ for batch_size in ${batch_size_list[*]}; do
+ for precision in ${precision_list[*]}; do
+ if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
+ continue
+ fi # skip when enable fp16 but disable mkldnn
+ if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
+ continue
+ fi # skip when quant model inference but precision is not int8
+ set_precision=$(func_set_params "${precision_key}" "${precision}")
+
+ _save_log_path="${_log_path}/python_infer_cpu_gpus_${_gpu}_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
+ set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
+ set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
+ set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
+ set_mkldnn=$(func_set_params "${use_mkldnn_key}" "${use_mkldnn}")
+ set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
+ set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
+ set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}")
+ set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
+ command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_params0} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 "
+ eval $command
+ last_status=${PIPESTATUS[0]}
+ eval "cat ${_save_log_path}"
+ status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}"
+ done
+ done
+ done
+ done
+ elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
+ for use_trt in ${use_trt_list[*]}; do
+ for precision in ${precision_list[*]}; do
+ if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
+ continue
+ fi
+ if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
+ continue
+ fi
+ if [[ ${use_trt} = "False" && ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
+ continue
+ fi
+ for batch_size in ${batch_size_list[*]}; do
+ _save_log_path="${_log_path}/python_infer_gpu_gpus_${_gpu}_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+ set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
+ set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
+ set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
+ set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
+ set_precision=$(func_set_params "${precision_key}" "${precision}")
+ set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
+ set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}")
+ set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
+ command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} ${set_infer_params0} > ${_save_log_path} 2>&1 "
+ eval $command
+ last_status=${PIPESTATUS[0]}
+ eval "cat ${_save_log_path}"
+ status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}"
+
+ done
+ done
+ done
+ else
+ echo "Does not support hardware other than CPU and GPU Currently!"
+ fi
+ done
+}
+
+if [ ${MODE} = "whole_infer" ]; then
+ GPUID=$3
+ if [ ${#GPUID} -le 0 ];then
+ env=" "
+ else
+ env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+ fi
+ # set CUDA_VISIBLE_DEVICES
+ eval $env
+ export Count=0
+ gpu=0
+ IFS="|"
+ infer_run_exports=(${infer_export_list})
+ infer_quant_flag=(${infer_is_quant})
+ for infer_model in ${infer_model_dir_list[*]}; do
+ # run export
+ if [ ${infer_run_exports[Count]} != "null" ];then
+ save_infer_dir="${infer_model}"
+ set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
+ set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
+ export_log_path="${LOG_PATH}_export_${Count}.log"
+ export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key} > ${export_log_path} 2>&1 "
+ echo ${infer_run_exports[Count]}
+ echo $export_cmd
+ eval $export_cmd
+ status_export=$?
+ status_check $status_export "${export_cmd}" "${status_log}" "${model_name}" "${export_log_path}"
+ else
+ save_infer_dir=${infer_model}
+ fi
+ #run inference
+ is_quant=${infer_quant_flag[Count]}
+ func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} "${gpu}"
+ Count=$(($Count + 1))
+ done
+else
+ IFS="|"
+ export Count=0
+ USE_GPU_KEY=(${train_use_gpu_value})
+ for gpu in ${gpu_list[*]}; do
+ train_use_gpu=${USE_GPU_KEY[Count]}
+ Count=$(($Count + 1))
+ ips=""
+ if [ ${gpu} = "-1" ];then
+ env=""
+ elif [ ${#gpu} -le 1 ];then
+ env="export CUDA_VISIBLE_DEVICES=${gpu}"
+ elif [ ${#gpu} -le 15 ];then
+ IFS=","
+ array=(${gpu})
+ env="export CUDA_VISIBLE_DEVICES=${array[0]}"
+ IFS="|"
+ else
+ IFS=";"
+ array=(${gpu})
+ ips=${array[0]}
+ gpu=${array[1]}
+ IFS="|"
+ env=" "
+ fi
+ for autocast in ${autocast_list[*]}; do
+ if [ ${autocast} = "amp" ]; then
+ set_amp_config="amp.scale_loss=1024.0 amp.use_dynamic_loss_scaling=True amp.amp_level=O2"
+ else
+ set_amp_config="amp=None"
+ fi
+ for trainer in ${trainer_list[*]}; do
+ flag_quant=False
+ if [ ${trainer} = ${pact_key} ]; then
+ run_train=${pact_trainer}
+ run_export=${pact_export}
+ flag_quant=True
+ elif [ ${trainer} = "${fpgm_key}" ]; then
+ run_train=${fpgm_trainer}
+ run_export=${fpgm_export}
+ elif [ ${trainer} = "${distill_key}" ]; then
+ run_train=${distill_trainer}
+ run_export=${distill_export}
+ elif [ ${trainer} = "${to_static_key}" ]; then
+ run_train="${norm_trainer} ${to_static_value}"
+ run_export=${norm_export}
+ elif [[ ${trainer} = ${trainer_key2} ]]; then
+ run_train=${trainer_value2}
+ run_export=${export_value2}
+ else
+ run_train=${norm_trainer}
+ run_export=${norm_export}
+ fi
+
+ if [ ${run_train} = "null" ]; then
+ continue
+ fi
+
+ set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
+ set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
+ set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
+ set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
+ set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
+ # if length of ips >= 15, then it is seen as multi-machine
+ # 15 is the min length of ips info for multi-machine: 0.0.0.0,0.0.0.0
+ if [ ${#ips} -le 15 ];then
+ save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+ nodes=1
+ else
+ IFS=","
+ ips_array=(${ips})
+ IFS="|"
+ nodes=${#ips_array[@]}
+ save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
+ fi
+
+
+ set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
+ if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
+ cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_train_params1}"
+ elif [ ${#ips} -le 15 ];then # train with multi-gpu
+ cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_train_params1}"
+ else # train with multi-machine
+ cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_batchsize} ${set_amp_config} ${set_train_params1}"
+ fi
+ # run train
+ eval $cmd
+ eval "cat ${save_log}/train.log >> ${save_log}.log"
+ status_check $? "${cmd}" "${status_log}" "${model_name}" "${save_log}.log"
+
+ set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
+
+ # run eval
+ if [ ${eval_py} != "null" ]; then
+ eval ${env}
+ set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
+ eval_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_eval.log"
+ eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1} > ${eval_log_path} 2>&1 "
+ eval $eval_cmd
+ status_check $? "${eval_cmd}" "${status_log}" "${model_name}" "${eval_log_path}"
+ fi
+ # run export model
+ if [ ${run_export} != "null" ]; then
+ # run export model
+ save_infer_path="${save_log}"
+ export_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_export.log"
+ set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
+ set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
+ export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} > ${export_log_path} 2>&1 "
+ eval $export_cmd
+ status_check $? "${export_cmd}" "${status_log}" "${model_name}" "${export_log_path}"
+
+ #run inference
+ eval $env
+ save_infer_path="${save_log}"
+ if [[ ${inference_dir} != "null" ]] && [[ ${inference_dir} != '##' ]]; then
+ infer_model_dir="${save_infer_path}/${inference_dir}"
+ else
+ infer_model_dir=${save_infer_path}
+ fi
+ func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" "${gpu}"
+
+ eval "unset CUDA_VISIBLE_DEVICES"
+ fi
+ done # done with: for trainer in ${trainer_list[*]}; do
+ done # done with: for autocast in ${autocast_list[*]}; do
+ done # done with: for gpu in ${gpu_list[*]}; do
+fi # end if [ ${MODE} = "infer" ]; then
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/tools/__init__.py b/benchmark/PaddleOCR_DBNet/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cbf835d7e1654d050df9fca997a774f3d7947ad
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/tools/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/8 13:14
+# @Author : zhoujun
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/tools/eval.py b/benchmark/PaddleOCR_DBNet/tools/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe514ddc0d4f1e09e784f346374e0e5aa2b998bc
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/tools/eval.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+# @Time : 2018/6/11 15:54
+# @Author : zhoujun
+import os
+import sys
+import pathlib
+__dir__ = pathlib.Path(os.path.abspath(__file__))
+sys.path.append(str(__dir__))
+sys.path.append(str(__dir__.parent.parent))
+
+import argparse
+import time
+import paddle
+from tqdm.auto import tqdm
+
+
+class EVAL():
+ def __init__(self, model_path, gpu_id=0):
+ from models import build_model
+ from data_loader import get_dataloader
+ from post_processing import get_post_processing
+ from utils import get_metric
+ self.gpu_id = gpu_id
+ if self.gpu_id is not None and isinstance(
+ self.gpu_id, int) and paddle.device.is_compiled_with_cuda():
+ paddle.device.set_device("gpu:{}".format(self.gpu_id))
+ else:
+ paddle.device.set_device("cpu")
+ checkpoint = paddle.load(model_path)
+ config = checkpoint['config']
+ config['arch']['backbone']['pretrained'] = False
+
+ self.validate_loader = get_dataloader(config['dataset']['validate'],
+ config['distributed'])
+
+ self.model = build_model(config['arch'])
+ self.model.set_state_dict(checkpoint['state_dict'])
+
+ self.post_process = get_post_processing(config['post_processing'])
+ self.metric_cls = get_metric(config['metric'])
+
+ def eval(self):
+ self.model.eval()
+ raw_metrics = []
+ total_frame = 0.0
+ total_time = 0.0
+ for i, batch in tqdm(
+ enumerate(self.validate_loader),
+ total=len(self.validate_loader),
+ desc='test model'):
+ with paddle.no_grad():
+ start = time.time()
+ preds = self.model(batch['img'])
+ boxes, scores = self.post_process(
+ batch,
+ preds,
+ is_output_polygon=self.metric_cls.is_output_polygon)
+ total_frame += batch['img'].shape[0]
+ total_time += time.time() - start
+ raw_metric = self.metric_cls.validate_measure(batch,
+ (boxes, scores))
+ raw_metrics.append(raw_metric)
+ metrics = self.metric_cls.gather_measure(raw_metrics)
+ print('FPS:{}'.format(total_frame / total_time))
+ return {
+ 'recall': metrics['recall'].avg,
+ 'precision': metrics['precision'].avg,
+ 'fmeasure': metrics['fmeasure'].avg
+ }
+
+
+def init_args():
+ parser = argparse.ArgumentParser(description='DBNet.paddle')
+ parser.add_argument(
+ '--model_path',
+ required=False,
+ default='output/DBNet_resnet18_FPN_DBHead/checkpoint/1.pth',
+ type=str)
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == '__main__':
+ args = init_args()
+ eval = EVAL(args.model_path)
+ result = eval.eval()
+ print(result)
diff --git a/benchmark/PaddleOCR_DBNet/tools/export_model.py b/benchmark/PaddleOCR_DBNet/tools/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..59a318a196a6480f904c7ddaed7eab49cbf3f80c
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/tools/export_model.py
@@ -0,0 +1,57 @@
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))
+
+import argparse
+
+import paddle
+from paddle.jit import to_static
+
+from models import build_model
+from utils import Config, ArgsParser
+
+
+def init_args():
+ parser = ArgsParser()
+ args = parser.parse_args()
+ return args
+
+
+def load_checkpoint(model, checkpoint_path):
+ """
+ load checkpoints
+ :param checkpoint_path: Checkpoint path to be loaded
+ """
+ checkpoint = paddle.load(checkpoint_path)
+ model.set_state_dict(checkpoint['state_dict'])
+ print('load checkpoint from {}'.format(checkpoint_path))
+
+
+def main(config):
+ model = build_model(config['arch'])
+ load_checkpoint(model, config['trainer']['resume_checkpoint'])
+ model.eval()
+
+ save_path = config["trainer"]["output_dir"]
+ save_path = os.path.join(save_path, "inference")
+ infer_shape = [3, -1, -1]
+ model = to_static(
+ model,
+ input_spec=[
+ paddle.static.InputSpec(
+ shape=[None] + infer_shape, dtype="float32")
+ ])
+
+ paddle.jit.save(model, save_path)
+ print("inference model is saved to {}".format(save_path))
+
+
+if __name__ == "__main__":
+ args = init_args()
+ assert os.path.exists(args.config_file)
+ config = Config(args.config_file)
+ config.merge_dict(args.opt)
+ main(config.cfg)
diff --git a/benchmark/PaddleOCR_DBNet/tools/infer.py b/benchmark/PaddleOCR_DBNet/tools/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..24e919c33f2352e7e45ea2c2503fa0f94bb9cf58
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/tools/infer.py
@@ -0,0 +1,298 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import pathlib
+__dir__ = pathlib.Path(os.path.abspath(__file__))
+sys.path.append(str(__dir__))
+sys.path.append(str(__dir__.parent.parent))
+
+import cv2
+import paddle
+from paddle import inference
+import numpy as np
+from PIL import Image
+
+from paddle.vision import transforms
+from tools.predict import resize_image
+from post_processing import get_post_processing
+from utils.util import draw_bbox, save_result
+
+
+class InferenceEngine(object):
+ """InferenceEngine
+
+ Inference engina class which contains preprocess, run, postprocess
+ """
+
+ def __init__(self, args):
+ """
+ Args:
+ args: Parameters generated using argparser.
+ Returns: None
+ """
+ super().__init__()
+ self.args = args
+
+ # init inference engine
+ self.predictor, self.config, self.input_tensor, self.output_tensor = self.load_predictor(
+ os.path.join(args.model_dir, "inference.pdmodel"),
+ os.path.join(args.model_dir, "inference.pdiparams"))
+
+ # build transforms
+ self.transforms = transforms.Compose([
+ transforms.ToTensor(), transforms.Normalize(
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ ])
+
+ # wamrup
+ if self.args.warmup > 0:
+ for idx in range(args.warmup):
+ print(idx)
+ x = np.random.rand(1, 3, self.args.crop_size,
+ self.args.crop_size).astype("float32")
+ self.input_tensor.copy_from_cpu(x)
+ self.predictor.run()
+ self.output_tensor.copy_to_cpu()
+
+ self.post_process = get_post_processing({
+ 'type': 'SegDetectorRepresenter',
+ 'args': {
+ 'thresh': 0.3,
+ 'box_thresh': 0.7,
+ 'max_candidates': 1000,
+ 'unclip_ratio': 1.5
+ }
+ })
+
+ def load_predictor(self, model_file_path, params_file_path):
+ """load_predictor
+ initialize the inference engine
+ Args:
+ model_file_path: inference model path (*.pdmodel)
+ model_file_path: inference parmaeter path (*.pdiparams)
+ Return:
+ predictor: Predictor created using Paddle Inference.
+ config: Configuration of the predictor.
+ input_tensor: Input tensor of the predictor.
+ output_tensor: Output tensor of the predictor.
+ """
+ args = self.args
+ config = inference.Config(model_file_path, params_file_path)
+ if args.use_gpu:
+ config.enable_use_gpu(1000, 0)
+ if args.use_tensorrt:
+ config.enable_tensorrt_engine(
+ workspace_size=1 << 30,
+ precision_mode=precision,
+ max_batch_size=args.max_batch_size,
+ min_subgraph_size=args.
+ min_subgraph_size, # skip the minmum trt subgraph
+ use_calib_mode=False)
+
+ # collect shape
+ trt_shape_f = os.path.join(model_dir, "_trt_dynamic_shape.txt")
+
+ if not os.path.exists(trt_shape_f):
+ config.collect_shape_range_info(trt_shape_f)
+ logger.info(
+ f"collect dynamic shape info into : {trt_shape_f}")
+ try:
+ config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f,
+ True)
+ except Exception as E:
+ logger.info(E)
+ logger.info("Please keep your paddlepaddle-gpu >= 2.3.0!")
+ else:
+ config.disable_gpu()
+ # The thread num should not be greater than the number of cores in the CPU.
+ if args.enable_mkldnn:
+ # cache 10 different shapes for mkldnn to avoid memory leak
+ config.set_mkldnn_cache_capacity(10)
+ config.enable_mkldnn()
+ if args.precision == "fp16":
+ config.enable_mkldnn_bfloat16()
+ if hasattr(args, "cpu_threads"):
+ config.set_cpu_math_library_num_threads(args.cpu_threads)
+ else:
+ # default cpu threads as 10
+ config.set_cpu_math_library_num_threads(10)
+
+ # enable memory optim
+ config.enable_memory_optim()
+ config.disable_glog_info()
+
+ config.switch_use_feed_fetch_ops(False)
+ config.switch_ir_optim(True)
+
+ # create predictor
+ predictor = inference.create_predictor(config)
+
+ # get input and output tensor property
+ input_names = predictor.get_input_names()
+ input_tensor = predictor.get_input_handle(input_names[0])
+
+ output_names = predictor.get_output_names()
+ output_tensor = predictor.get_output_handle(output_names[0])
+
+ return predictor, config, input_tensor, output_tensor
+
+ def preprocess(self, img_path, short_size):
+ """preprocess
+ Preprocess to the input.
+ Args:
+ img_path: Image path.
+ Returns: Input data after preprocess.
+ """
+ img = cv2.imread(img_path, 1)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ h, w = img.shape[:2]
+ img = resize_image(img, short_size)
+ img = self.transforms(img)
+ img = np.expand_dims(img, axis=0)
+ shape_info = {'shape': [(h, w)]}
+ return img, shape_info
+
+ def postprocess(self, x, shape_info, is_output_polygon):
+ """postprocess
+ Postprocess to the inference engine output.
+ Args:
+ x: Inference engine output.
+ Returns: Output data after argmax.
+ """
+ box_list, score_list = self.post_process(
+ shape_info, x, is_output_polygon=is_output_polygon)
+ box_list, score_list = box_list[0], score_list[0]
+ if len(box_list) > 0:
+ if is_output_polygon:
+ idx = [x.sum() > 0 for x in box_list]
+ box_list = [box_list[i] for i, v in enumerate(idx) if v]
+ score_list = [score_list[i] for i, v in enumerate(idx) if v]
+ else:
+ idx = box_list.reshape(box_list.shape[0], -1).sum(
+ axis=1) > 0 # 去掉全为0的框
+ box_list, score_list = box_list[idx], score_list[idx]
+ else:
+ box_list, score_list = [], []
+ return box_list, score_list
+
+ def run(self, x):
+ """run
+ Inference process using inference engine.
+ Args:
+ x: Input data after preprocess.
+ Returns: Inference engine output
+ """
+ self.input_tensor.copy_from_cpu(x)
+ self.predictor.run()
+ output = self.output_tensor.copy_to_cpu()
+ return output
+
+
+def get_args(add_help=True):
+ """
+ parse args
+ """
+ import argparse
+
+ def str2bool(v):
+ return v.lower() in ("true", "t", "1")
+
+ parser = argparse.ArgumentParser(
+ description="PaddlePaddle Classification Training", add_help=add_help)
+
+ parser.add_argument("--model_dir", default=None, help="inference model dir")
+ parser.add_argument("--batch_size", type=int, default=1)
+ parser.add_argument(
+ "--short_size", default=1024, type=int, help="short size")
+ parser.add_argument("--img_path", default="./images/demo.jpg")
+
+ parser.add_argument(
+ "--benchmark", default=False, type=str2bool, help="benchmark")
+ parser.add_argument("--warmup", default=0, type=int, help="warmup iter")
+ parser.add_argument(
+ '--polygon', action='store_true', help='output polygon or box')
+
+ parser.add_argument("--use_gpu", type=str2bool, default=True)
+ parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+ parser.add_argument("--precision", type=str, default="fp32")
+ parser.add_argument("--gpu_mem", type=int, default=500)
+ parser.add_argument("--gpu_id", type=int, default=0)
+ parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
+ parser.add_argument("--cpu_threads", type=int, default=10)
+
+ args = parser.parse_args()
+ return args
+
+
+def main(args):
+ """
+ Main inference function.
+ Args:
+ args: Parameters generated using argparser.
+ Returns:
+ class_id: Class index of the input.
+ prob: : Probability of the input.
+ """
+ inference_engine = InferenceEngine(args)
+
+ # init benchmark
+ if args.benchmark:
+ import auto_log
+ autolog = auto_log.AutoLogger(
+ model_name="db",
+ batch_size=args.batch_size,
+ inference_config=inference_engine.config,
+ gpu_ids="auto" if args.use_gpu else None)
+
+ # enable benchmark
+ if args.benchmark:
+ autolog.times.start()
+
+ # preprocess
+ img, shape_info = inference_engine.preprocess(args.img_path,
+ args.short_size)
+
+ if args.benchmark:
+ autolog.times.stamp()
+
+ output = inference_engine.run(img)
+
+ if args.benchmark:
+ autolog.times.stamp()
+
+ # postprocess
+ box_list, score_list = inference_engine.postprocess(output, shape_info,
+ args.polygon)
+
+ if args.benchmark:
+ autolog.times.stamp()
+ autolog.times.end(stamp=True)
+ autolog.report()
+
+ img = draw_bbox(cv2.imread(args.img_path)[:, :, ::-1], box_list)
+ # 保存结果到路径
+ os.makedirs('output', exist_ok=True)
+ img_path = pathlib.Path(args.img_path)
+ output_path = os.path.join('output', img_path.stem + '_infer_result.jpg')
+ cv2.imwrite(output_path, img[:, :, ::-1])
+ save_result(
+ output_path.replace('_infer_result.jpg', '.txt'), box_list, score_list,
+ args.polygon)
+
+
+if __name__ == "__main__":
+ args = get_args()
+ main(args)
diff --git a/benchmark/PaddleOCR_DBNet/tools/predict.py b/benchmark/PaddleOCR_DBNet/tools/predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..51beffd1706c0c12100a0d7fea98c7532b1272b6
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/tools/predict.py
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/24 12:06
+# @Author : zhoujun
+
+import os
+import sys
+import pathlib
+__dir__ = pathlib.Path(os.path.abspath(__file__))
+sys.path.append(str(__dir__))
+sys.path.append(str(__dir__.parent.parent))
+
+import time
+import cv2
+import paddle
+
+from data_loader import get_transforms
+from models import build_model
+from post_processing import get_post_processing
+
+
+def resize_image(img, short_size):
+ height, width, _ = img.shape
+ if height < width:
+ new_height = short_size
+ new_width = new_height / height * width
+ else:
+ new_width = short_size
+ new_height = new_width / width * height
+ new_height = int(round(new_height / 32) * 32)
+ new_width = int(round(new_width / 32) * 32)
+ resized_img = cv2.resize(img, (new_width, new_height))
+ return resized_img
+
+
+class PaddleModel:
+ def __init__(self, model_path, post_p_thre=0.7, gpu_id=None):
+ '''
+ 初始化模型
+ :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件)
+ :param gpu_id: 在哪一块gpu上运行
+ '''
+ self.gpu_id = gpu_id
+
+ if self.gpu_id is not None and isinstance(
+ self.gpu_id, int) and paddle.device.is_compiled_with_cuda():
+ paddle.device.set_device("gpu:{}".format(self.gpu_id))
+ else:
+ paddle.device.set_device("cpu")
+ checkpoint = paddle.load(model_path)
+
+ config = checkpoint['config']
+ config['arch']['backbone']['pretrained'] = False
+ self.model = build_model(config['arch'])
+ self.post_process = get_post_processing(config['post_processing'])
+ self.post_process.box_thresh = post_p_thre
+ self.img_mode = config['dataset']['train']['dataset']['args'][
+ 'img_mode']
+ self.model.set_state_dict(checkpoint['state_dict'])
+ self.model.eval()
+
+ self.transform = []
+ for t in config['dataset']['train']['dataset']['args']['transforms']:
+ if t['type'] in ['ToTensor', 'Normalize']:
+ self.transform.append(t)
+ self.transform = get_transforms(self.transform)
+
+ def predict(self,
+ img_path: str,
+ is_output_polygon=False,
+ short_size: int=1024):
+ '''
+ 对传入的图像进行预测,支持图像地址,opecv 读取图片,偏慢
+ :param img_path: 图像地址
+ :param is_numpy:
+ :return:
+ '''
+ assert os.path.exists(img_path), 'file is not exists'
+ img = cv2.imread(img_path, 1 if self.img_mode != 'GRAY' else 0)
+ if self.img_mode == 'RGB':
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+ h, w = img.shape[:2]
+ img = resize_image(img, short_size)
+ # 将图片由(w,h)变为(1,img_channel,h,w)
+ tensor = self.transform(img)
+ tensor = tensor.unsqueeze_(0)
+
+ batch = {'shape': [(h, w)]}
+ with paddle.no_grad():
+ start = time.time()
+ preds = self.model(tensor)
+ box_list, score_list = self.post_process(
+ batch, preds, is_output_polygon=is_output_polygon)
+ box_list, score_list = box_list[0], score_list[0]
+ if len(box_list) > 0:
+ if is_output_polygon:
+ idx = [x.sum() > 0 for x in box_list]
+ box_list = [box_list[i] for i, v in enumerate(idx) if v]
+ score_list = [score_list[i] for i, v in enumerate(idx) if v]
+ else:
+ idx = box_list.reshape(box_list.shape[0], -1).sum(
+ axis=1) > 0 # 去掉全为0的框
+ box_list, score_list = box_list[idx], score_list[idx]
+ else:
+ box_list, score_list = [], []
+ t = time.time() - start
+ return preds[0, 0, :, :].detach().cpu().numpy(), box_list, score_list, t
+
+
+def save_depoly(net, input, save_path):
+ input_spec = [
+ paddle.static.InputSpec(
+ shape=[None, 3, None, None], dtype="float32")
+ ]
+ net = paddle.jit.to_static(net, input_spec=input_spec)
+
+ # save static model for inference directly
+ paddle.jit.save(net, save_path)
+
+
+def init_args():
+ import argparse
+ parser = argparse.ArgumentParser(description='DBNet.paddle')
+ parser.add_argument('--model_path', default=r'model_best.pth', type=str)
+ parser.add_argument(
+ '--input_folder',
+ default='./test/input',
+ type=str,
+ help='img path for predict')
+ parser.add_argument(
+ '--output_folder',
+ default='./test/output',
+ type=str,
+ help='img path for output')
+ parser.add_argument('--gpu', default=0, type=int, help='gpu for inference')
+ parser.add_argument(
+ '--thre', default=0.3, type=float, help='the thresh of post_processing')
+ parser.add_argument(
+ '--polygon', action='store_true', help='output polygon or box')
+ parser.add_argument('--show', action='store_true', help='show result')
+ parser.add_argument(
+ '--save_result',
+ action='store_true',
+ help='save box and score to txt file')
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == '__main__':
+ import pathlib
+ from tqdm import tqdm
+ import matplotlib.pyplot as plt
+ from utils.util import show_img, draw_bbox, save_result, get_image_file_list
+
+ args = init_args()
+ print(args)
+ # 初始化网络
+ model = PaddleModel(args.model_path, post_p_thre=args.thre, gpu_id=args.gpu)
+ img_folder = pathlib.Path(args.input_folder)
+ for img_path in tqdm(get_image_file_list(args.input_folder)):
+ preds, boxes_list, score_list, t = model.predict(
+ img_path, is_output_polygon=args.polygon)
+ img = draw_bbox(cv2.imread(img_path)[:, :, ::-1], boxes_list)
+ if args.show:
+ show_img(preds)
+ show_img(img, title=os.path.basename(img_path))
+ plt.show()
+ # 保存结果到路径
+ os.makedirs(args.output_folder, exist_ok=True)
+ img_path = pathlib.Path(img_path)
+ output_path = os.path.join(args.output_folder,
+ img_path.stem + '_result.jpg')
+ pred_path = os.path.join(args.output_folder,
+ img_path.stem + '_pred.jpg')
+ cv2.imwrite(output_path, img[:, :, ::-1])
+ cv2.imwrite(pred_path, preds * 255)
+ save_result(
+ output_path.replace('_result.jpg', '.txt'), boxes_list, score_list,
+ args.polygon)
diff --git a/benchmark/PaddleOCR_DBNet/tools/train.py b/benchmark/PaddleOCR_DBNet/tools/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..403d6185fc28c5037bcb29d5f188ec0692c9499b
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/tools/train.py
@@ -0,0 +1,61 @@
+import os
+import sys
+import pathlib
+__dir__ = pathlib.Path(os.path.abspath(__file__))
+sys.path.append(str(__dir__))
+sys.path.append(str(__dir__.parent.parent))
+
+import paddle
+import paddle.distributed as dist
+from utils import Config, ArgsParser
+
+
+def init_args():
+ parser = ArgsParser()
+ args = parser.parse_args()
+ return args
+
+
+def main(config, profiler_options):
+ from models import build_model, build_loss
+ from data_loader import get_dataloader
+ from trainer import Trainer
+ from post_processing import get_post_processing
+ from utils import get_metric
+ if paddle.device.cuda.device_count() > 1:
+ dist.init_parallel_env()
+ config['distributed'] = True
+ else:
+ config['distributed'] = False
+ train_loader = get_dataloader(config['dataset']['train'],
+ config['distributed'])
+ assert train_loader is not None
+ if 'validate' in config['dataset']:
+ validate_loader = get_dataloader(config['dataset']['validate'], False)
+ else:
+ validate_loader = None
+ criterion = build_loss(config['loss'])
+ config['arch']['backbone']['in_channels'] = 3 if config['dataset']['train'][
+ 'dataset']['args']['img_mode'] != 'GRAY' else 1
+ model = build_model(config['arch'])
+ # set @to_static for benchmark, skip this by default.
+ post_p = get_post_processing(config['post_processing'])
+ metric = get_metric(config['metric'])
+ trainer = Trainer(
+ config=config,
+ model=model,
+ criterion=criterion,
+ train_loader=train_loader,
+ post_process=post_p,
+ metric_cls=metric,
+ validate_loader=validate_loader,
+ profiler_options=profiler_options)
+ trainer.train()
+
+
+if __name__ == '__main__':
+ args = init_args()
+ assert os.path.exists(args.config_file)
+ config = Config(args.config_file)
+ config.merge_dict(args.opt)
+ main(config.cfg, args.profiler_options)
diff --git a/benchmark/PaddleOCR_DBNet/trainer/__init__.py b/benchmark/PaddleOCR_DBNet/trainer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..76c7392d142c96d5ec715b528ea47e9001cbec4b
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/trainer/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:58
+# @Author : zhoujun
+from .trainer import Trainer
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/trainer/trainer.py b/benchmark/PaddleOCR_DBNet/trainer/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..34b259f3d182672d727fb50eebf14c27870cb836
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/trainer/trainer.py
@@ -0,0 +1,230 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:58
+# @Author : zhoujun
+import time
+
+import paddle
+from tqdm import tqdm
+
+from base import BaseTrainer
+from utils import runningScore, cal_text_score, Polynomial, profiler
+
+
+class Trainer(BaseTrainer):
+ def __init__(self,
+ config,
+ model,
+ criterion,
+ train_loader,
+ validate_loader,
+ metric_cls,
+ post_process=None,
+ profiler_options=None):
+ super(Trainer, self).__init__(config, model, criterion, train_loader,
+ validate_loader, metric_cls, post_process)
+ self.profiler_options = profiler_options
+ self.enable_eval = config['trainer'].get('enable_eval', True)
+
+ def _train_epoch(self, epoch):
+ self.model.train()
+ total_samples = 0
+ train_reader_cost = 0.0
+ train_batch_cost = 0.0
+ reader_start = time.time()
+ epoch_start = time.time()
+ train_loss = 0.
+ running_metric_text = runningScore(2)
+
+ for i, batch in enumerate(self.train_loader):
+ profiler.add_profiler_step(self.profiler_options)
+ if i >= self.train_loader_len:
+ break
+ self.global_step += 1
+ lr = self.optimizer.get_lr()
+
+ cur_batch_size = batch['img'].shape[0]
+
+ train_reader_cost += time.time() - reader_start
+ if self.amp:
+ with paddle.amp.auto_cast(
+ enable='gpu' in paddle.device.get_device(),
+ custom_white_list=self.amp.get('custom_white_list', []),
+ custom_black_list=self.amp.get('custom_black_list', []),
+ level=self.amp.get('level', 'O2')):
+ preds = self.model(batch['img'])
+ loss_dict = self.criterion(preds.astype(paddle.float32), batch)
+ scaled_loss = self.amp['scaler'].scale(loss_dict['loss'])
+ scaled_loss.backward()
+ self.amp['scaler'].minimize(self.optimizer, scaled_loss)
+ else:
+ preds = self.model(batch['img'])
+ loss_dict = self.criterion(preds, batch)
+ # backward
+ loss_dict['loss'].backward()
+ self.optimizer.step()
+ self.lr_scheduler.step()
+ self.optimizer.clear_grad()
+
+ train_batch_time = time.time() - reader_start
+ train_batch_cost += train_batch_time
+ total_samples += cur_batch_size
+
+ # acc iou
+ score_shrink_map = cal_text_score(
+ preds[:, 0, :, :],
+ batch['shrink_map'],
+ batch['shrink_mask'],
+ running_metric_text,
+ thred=self.config['post_processing']['args']['thresh'])
+
+ # loss 和 acc 记录到日志
+ loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item())
+ for idx, (key, value) in enumerate(loss_dict.items()):
+ loss_dict[key] = value.item()
+ if key == 'loss':
+ continue
+ loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
+ if idx < len(loss_dict) - 1:
+ loss_str += ', '
+
+ train_loss += loss_dict['loss']
+ acc = score_shrink_map['Mean Acc']
+ iou_shrink_map = score_shrink_map['Mean IoU']
+
+ if self.global_step % self.log_iter == 0:
+ self.logger_info(
+ '[{}/{}], [{}/{}], global_step: {}, ips: {:.1f} samples/sec, avg_reader_cost: {:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, acc: {:.4f}, iou_shrink_map: {:.4f}, {}lr:{:.6}, time:{:.2f}'.
+ format(epoch, self.epochs, i + 1, self.train_loader_len,
+ self.global_step, total_samples / train_batch_cost,
+ train_reader_cost / self.log_iter, train_batch_cost /
+ self.log_iter, total_samples / self.log_iter, acc,
+ iou_shrink_map, loss_str, lr, train_batch_cost))
+ total_samples = 0
+ train_reader_cost = 0.0
+ train_batch_cost = 0.0
+
+ if self.visualdl_enable and paddle.distributed.get_rank() == 0:
+ # write tensorboard
+ for key, value in loss_dict.items():
+ self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value,
+ self.global_step)
+ self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc,
+ self.global_step)
+ self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map',
+ iou_shrink_map, self.global_step)
+ self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
+ reader_start = time.time()
+ return {
+ 'train_loss': train_loss / self.train_loader_len,
+ 'lr': lr,
+ 'time': time.time() - epoch_start,
+ 'epoch': epoch
+ }
+
+ def _eval(self, epoch):
+ self.model.eval()
+ raw_metrics = []
+ total_frame = 0.0
+ total_time = 0.0
+ for i, batch in tqdm(
+ enumerate(self.validate_loader),
+ total=len(self.validate_loader),
+ desc='test model'):
+ with paddle.no_grad():
+ start = time.time()
+ if self.amp:
+ with paddle.amp.auto_cast(
+ enable='gpu' in paddle.device.get_device(),
+ custom_white_list=self.amp.get('custom_white_list',
+ []),
+ custom_black_list=self.amp.get('custom_black_list',
+ []),
+ level=self.amp.get('level', 'O2')):
+ preds = self.model(batch['img'])
+ preds = preds.astype(paddle.float32)
+ else:
+ preds = self.model(batch['img'])
+ boxes, scores = self.post_process(
+ batch,
+ preds,
+ is_output_polygon=self.metric_cls.is_output_polygon)
+ total_frame += batch['img'].shape[0]
+ total_time += time.time() - start
+ raw_metric = self.metric_cls.validate_measure(batch,
+ (boxes, scores))
+ raw_metrics.append(raw_metric)
+ metrics = self.metric_cls.gather_measure(raw_metrics)
+ self.logger_info('FPS:{}'.format(total_frame / total_time))
+ return metrics['recall'].avg, metrics['precision'].avg, metrics[
+ 'fmeasure'].avg
+
+ def _on_epoch_finish(self):
+ self.logger_info('[{}/{}], train_loss: {:.4f}, time: {:.4f}, lr: {}'.
+ format(self.epoch_result['epoch'], self.epochs, self.
+ epoch_result['train_loss'], self.epoch_result[
+ 'time'], self.epoch_result['lr']))
+ net_save_path = '{}/model_latest.pth'.format(self.checkpoint_dir)
+ net_save_path_best = '{}/model_best.pth'.format(self.checkpoint_dir)
+
+ if paddle.distributed.get_rank() == 0:
+ self._save_checkpoint(self.epoch_result['epoch'], net_save_path)
+ save_best = False
+ if self.validate_loader is not None and self.metric_cls is not None and self.enable_eval: # 使用f1作为最优模型指标
+ recall, precision, hmean = self._eval(self.epoch_result[
+ 'epoch'])
+
+ if self.visualdl_enable:
+ self.writer.add_scalar('EVAL/recall', recall,
+ self.global_step)
+ self.writer.add_scalar('EVAL/precision', precision,
+ self.global_step)
+ self.writer.add_scalar('EVAL/hmean', hmean,
+ self.global_step)
+ self.logger_info(
+ 'test: recall: {:.6f}, precision: {:.6f}, hmean: {:.6f}'.
+ format(recall, precision, hmean))
+
+ if hmean >= self.metrics['hmean']:
+ save_best = True
+ self.metrics['train_loss'] = self.epoch_result['train_loss']
+ self.metrics['hmean'] = hmean
+ self.metrics['precision'] = precision
+ self.metrics['recall'] = recall
+ self.metrics['best_model_epoch'] = self.epoch_result[
+ 'epoch']
+ else:
+ if self.epoch_result['train_loss'] <= self.metrics[
+ 'train_loss']:
+ save_best = True
+ self.metrics['train_loss'] = self.epoch_result['train_loss']
+ self.metrics['best_model_epoch'] = self.epoch_result[
+ 'epoch']
+ best_str = 'current best, '
+ for k, v in self.metrics.items():
+ best_str += '{}: {:.6f}, '.format(k, v)
+ self.logger_info(best_str)
+ if save_best:
+ import shutil
+ shutil.copy(net_save_path, net_save_path_best)
+ self.logger_info("Saving current best: {}".format(
+ net_save_path_best))
+ else:
+ self.logger_info("Saving checkpoint: {}".format(net_save_path))
+
+ def _on_train_finish(self):
+ if self.enable_eval:
+ for k, v in self.metrics.items():
+ self.logger_info('{}:{}'.format(k, v))
+ self.logger_info('finish train')
+
+ def _initialize_scheduler(self):
+ if self.config['lr_scheduler']['type'] == 'Polynomial':
+ self.config['lr_scheduler']['args']['epochs'] = self.config[
+ 'trainer']['epochs']
+ self.config['lr_scheduler']['args']['step_each_epoch'] = len(
+ self.train_loader)
+ self.lr_scheduler = Polynomial(
+ **self.config['lr_scheduler']['args'])()
+ else:
+ self.lr_scheduler = self._initialize('lr_scheduler',
+ paddle.optimizer.lr)
diff --git a/benchmark/PaddleOCR_DBNet/utils/__init__.py b/benchmark/PaddleOCR_DBNet/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..194e0b82ff6576c3880914fad0492fde276cff33
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/__init__.py
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:58
+# @Author : zhoujun
+from .util import *
+from .metrics import *
+from .schedulers import *
+from .cal_recall.script import cal_recall_precison_f1
+from .ocr_metric import get_metric
diff --git a/benchmark/PaddleOCR_DBNet/utils/cal_recall/__init__.py b/benchmark/PaddleOCR_DBNet/utils/cal_recall/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0db38a8a37f0a3d8fbd8c12a3e54457e41cf9360
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/cal_recall/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+# @Time : 1/16/19 6:40 AM
+# @Author : zhoujun
+from .script import cal_recall_precison_f1
+__all__ = ['cal_recall_precison_f1']
diff --git a/benchmark/PaddleOCR_DBNet/utils/cal_recall/rrc_evaluation_funcs.py b/benchmark/PaddleOCR_DBNet/utils/cal_recall/rrc_evaluation_funcs.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e12ee66a07118c07559eebd655f5173e046696e
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/cal_recall/rrc_evaluation_funcs.py
@@ -0,0 +1,479 @@
+#!/usr/bin/env python2
+#encoding: UTF-8
+import json
+import sys
+sys.path.append('./')
+import zipfile
+import re
+import sys
+import os
+import codecs
+import traceback
+import numpy as np
+from utils import order_points_clockwise
+
+
+def print_help():
+ sys.stdout.write(
+ 'Usage: python %s.py -g= -s= [-o= -p=]'
+ % sys.argv[0])
+ sys.exit(2)
+
+
+def load_zip_file_keys(file, fileNameRegExp=''):
+ """
+ Returns an array with the entries of the ZIP file that match with the regular expression.
+ The key's are the names or the file or the capturing group definied in the fileNameRegExp
+ """
+ try:
+ archive = zipfile.ZipFile(file, mode='r', allowZip64=True)
+ except:
+ raise Exception('Error loading the ZIP archive.')
+
+ pairs = []
+
+ for name in archive.namelist():
+ addFile = True
+ keyName = name
+ if fileNameRegExp != "":
+ m = re.match(fileNameRegExp, name)
+ if m == None:
+ addFile = False
+ else:
+ if len(m.groups()) > 0:
+ keyName = m.group(1)
+
+ if addFile:
+ pairs.append(keyName)
+
+ return pairs
+
+
+def load_zip_file(file, fileNameRegExp='', allEntries=False):
+ """
+ Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+ The key's are the names or the file or the capturing group definied in the fileNameRegExp
+ allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+ """
+ try:
+ archive = zipfile.ZipFile(file, mode='r', allowZip64=True)
+ except:
+ raise Exception('Error loading the ZIP archive')
+
+ pairs = []
+ for name in archive.namelist():
+ addFile = True
+ keyName = name
+ if fileNameRegExp != "":
+ m = re.match(fileNameRegExp, name)
+ if m == None:
+ addFile = False
+ else:
+ if len(m.groups()) > 0:
+ keyName = m.group(1)
+
+ if addFile:
+ pairs.append([keyName, archive.read(name)])
+ else:
+ if allEntries:
+ raise Exception('ZIP entry not valid: %s' % name)
+
+ return dict(pairs)
+
+
+def load_folder_file(file, fileNameRegExp='', allEntries=False):
+ """
+ Returns an array with the contents (filtered by fileNameRegExp) of a ZIP file.
+ The key's are the names or the file or the capturing group definied in the fileNameRegExp
+ allEntries validates that all entries in the ZIP file pass the fileNameRegExp
+ """
+ pairs = []
+ for name in os.listdir(file):
+ addFile = True
+ keyName = name
+ if fileNameRegExp != "":
+ m = re.match(fileNameRegExp, name)
+ if m == None:
+ addFile = False
+ else:
+ if len(m.groups()) > 0:
+ keyName = m.group(1)
+
+ if addFile:
+ pairs.append([keyName, open(os.path.join(file, name)).read()])
+ else:
+ if allEntries:
+ raise Exception('ZIP entry not valid: %s' % name)
+
+ return dict(pairs)
+
+
+def decode_utf8(raw):
+ """
+ Returns a Unicode object on success, or None on failure
+ """
+ try:
+ raw = codecs.decode(raw, 'utf-8', 'replace')
+ #extracts BOM if exists
+ raw = raw.encode('utf8')
+ if raw.startswith(codecs.BOM_UTF8):
+ raw = raw.replace(codecs.BOM_UTF8, '', 1)
+ return raw.decode('utf-8')
+ except:
+ return None
+
+
+def validate_lines_in_file(fileName,
+ file_contents,
+ CRLF=True,
+ LTRB=True,
+ withTranscription=False,
+ withConfidence=False,
+ imWidth=0,
+ imHeight=0):
+ """
+ This function validates that all lines of the file calling the Line validation function for each line
+ """
+ utf8File = decode_utf8(file_contents)
+ if (utf8File is None):
+ raise Exception("The file %s is not UTF-8" % fileName)
+
+ lines = utf8File.split("\r\n" if CRLF else "\n")
+ for line in lines:
+ line = line.replace("\r", "").replace("\n", "")
+ if (line != ""):
+ try:
+ validate_tl_line(line, LTRB, withTranscription, withConfidence,
+ imWidth, imHeight)
+ except Exception as e:
+ raise Exception(
+ ("Line in sample not valid. Sample: %s Line: %s Error: %s" %
+ (fileName, line, str(e))).encode('utf-8', 'replace'))
+
+
+def validate_tl_line(line,
+ LTRB=True,
+ withTranscription=True,
+ withConfidence=True,
+ imWidth=0,
+ imHeight=0):
+ """
+ Validate the format of the line. If the line is not valid an exception will be raised.
+ If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+ Posible values are:
+ LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
+ LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
+ """
+ get_tl_line_values(line, LTRB, withTranscription, withConfidence, imWidth,
+ imHeight)
+
+
+def get_tl_line_values(line,
+ LTRB=True,
+ withTranscription=False,
+ withConfidence=False,
+ imWidth=0,
+ imHeight=0):
+ """
+ Validate the format of the line. If the line is not valid an exception will be raised.
+ If maxWidth and maxHeight are specified, all points must be inside the imgage bounds.
+ Posible values are:
+ LTRB=True: xmin,ymin,xmax,ymax[,confidence][,transcription]
+ LTRB=False: x1,y1,x2,y2,x3,y3,x4,y4[,confidence][,transcription]
+ Returns values from a textline. Points , [Confidences], [Transcriptions]
+ """
+ confidence = 0.0
+ transcription = ""
+ points = []
+
+ numPoints = 4
+
+ if LTRB:
+
+ numPoints = 4
+
+ if withTranscription and withConfidence:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',
+ line)
+ if m == None:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',
+ line)
+ raise Exception(
+ "Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence,transcription"
+ )
+ elif withConfidence:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: xmin,ymin,xmax,ymax,confidence"
+ )
+ elif withTranscription:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,(.*)$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: xmin,ymin,xmax,ymax,transcription"
+ )
+ else:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*,?\s*$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: xmin,ymin,xmax,ymax")
+
+ xmin = int(m.group(1))
+ ymin = int(m.group(2))
+ xmax = int(m.group(3))
+ ymax = int(m.group(4))
+ if (xmax < xmin):
+ raise Exception("Xmax value (%s) not valid (Xmax < Xmin)." % (xmax))
+ if (ymax < ymin):
+ raise Exception("Ymax value (%s) not valid (Ymax < Ymin)." %
+ (ymax))
+
+ points = [float(m.group(i)) for i in range(1, (numPoints + 1))]
+
+ if (imWidth > 0 and imHeight > 0):
+ validate_point_inside_bounds(xmin, ymin, imWidth, imHeight)
+ validate_point_inside_bounds(xmax, ymax, imWidth, imHeight)
+
+ else:
+
+ numPoints = 8
+
+ if withTranscription and withConfidence:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*,(.*)$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence,transcription"
+ )
+ elif withConfidence:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*([0-1].?[0-9]*)\s*$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,confidence"
+ )
+ elif withTranscription:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,(.*)$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4,transcription"
+ )
+ else:
+ m = re.match(
+ r'^\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*,\s*(-?[0-9]+)\s*$',
+ line)
+ if m == None:
+ raise Exception(
+ "Format incorrect. Should be: x1,y1,x2,y2,x3,y3,x4,y4")
+
+ points = [float(m.group(i)) for i in range(1, (numPoints + 1))]
+
+ points = order_points_clockwise(np.array(points).reshape(-1,
+ 2)).reshape(-1)
+ validate_clockwise_points(points)
+
+ if (imWidth > 0 and imHeight > 0):
+ validate_point_inside_bounds(points[0], points[1], imWidth,
+ imHeight)
+ validate_point_inside_bounds(points[2], points[3], imWidth,
+ imHeight)
+ validate_point_inside_bounds(points[4], points[5], imWidth,
+ imHeight)
+ validate_point_inside_bounds(points[6], points[7], imWidth,
+ imHeight)
+
+ if withConfidence:
+ try:
+ confidence = float(m.group(numPoints + 1))
+ except ValueError:
+ raise Exception("Confidence value must be a float")
+
+ if withTranscription:
+ posTranscription = numPoints + (2 if withConfidence else 1)
+ transcription = m.group(posTranscription)
+ m2 = re.match(r'^\s*\"(.*)\"\s*$', transcription)
+ if m2 != None: #Transcription with double quotes, we extract the value and replace escaped characters
+ transcription = m2.group(1).replace("\\\\", "\\").replace("\\\"",
+ "\"")
+
+ return points, confidence, transcription
+
+
+def validate_point_inside_bounds(x, y, imWidth, imHeight):
+ if (x < 0 or x > imWidth):
+ raise Exception("X value (%s) not valid. Image dimensions: (%s,%s)" %
+ (xmin, imWidth, imHeight))
+ if (y < 0 or y > imHeight):
+ raise Exception(
+ "Y value (%s) not valid. Image dimensions: (%s,%s) Sample: %s Line:%s"
+ % (ymin, imWidth, imHeight))
+
+
+def validate_clockwise_points(points):
+ """
+ Validates that the points that the 4 points that dlimite a polygon are in clockwise order.
+ """
+
+ if len(points) != 8:
+ raise Exception("Points list not valid." + str(len(points)))
+
+ point = [[int(points[0]), int(points[1])],
+ [int(points[2]), int(points[3])],
+ [int(points[4]), int(points[5])],
+ [int(points[6]), int(points[7])]]
+ edge = [(point[1][0] - point[0][0]) * (point[1][1] + point[0][1]),
+ (point[2][0] - point[1][0]) * (point[2][1] + point[1][1]),
+ (point[3][0] - point[2][0]) * (point[3][1] + point[2][1]),
+ (point[0][0] - point[3][0]) * (point[0][1] + point[3][1])]
+
+ summatory = edge[0] + edge[1] + edge[2] + edge[3]
+ if summatory > 0:
+ raise Exception(
+ "Points are not clockwise. The coordinates of bounding quadrilaterals have to be given in clockwise order. Regarding the correct interpretation of 'clockwise' remember that the image coordinate system used is the standard one, with the image origin at the upper left, the X axis extending to the right and Y axis extending downwards."
+ )
+
+
+def get_tl_line_values_from_file_contents(content,
+ CRLF=True,
+ LTRB=True,
+ withTranscription=False,
+ withConfidence=False,
+ imWidth=0,
+ imHeight=0,
+ sort_by_confidences=True):
+ """
+ Returns all points, confindences and transcriptions of a file in lists. Valid line formats:
+ xmin,ymin,xmax,ymax,[confidence],[transcription]
+ x1,y1,x2,y2,x3,y3,x4,y4,[confidence],[transcription]
+ """
+ pointsList = []
+ transcriptionsList = []
+ confidencesList = []
+
+ lines = content.split("\r\n" if CRLF else "\n")
+ for line in lines:
+ line = line.replace("\r", "").replace("\n", "")
+ if (line != ""):
+ points, confidence, transcription = get_tl_line_values(
+ line, LTRB, withTranscription, withConfidence, imWidth,
+ imHeight)
+ pointsList.append(points)
+ transcriptionsList.append(transcription)
+ confidencesList.append(confidence)
+
+ if withConfidence and len(confidencesList) > 0 and sort_by_confidences:
+ import numpy as np
+ sorted_ind = np.argsort(-np.array(confidencesList))
+ confidencesList = [confidencesList[i] for i in sorted_ind]
+ pointsList = [pointsList[i] for i in sorted_ind]
+ transcriptionsList = [transcriptionsList[i] for i in sorted_ind]
+
+ return pointsList, confidencesList, transcriptionsList
+
+
+def main_evaluation(p,
+ default_evaluation_params_fn,
+ validate_data_fn,
+ evaluate_method_fn,
+ show_result=True,
+ per_sample=True):
+ """
+ This process validates a method, evaluates it and if it succed generates a ZIP file with a JSON entry for each sample.
+ Params:
+ p: Dictionary of parmeters with the GT/submission locations. If None is passed, the parameters send by the system are used.
+ default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+ validate_data_fn: points to a method that validates the corrct format of the submission
+ evaluate_method_fn: points to a function that evaluated the submission and return a Dictionary with the results
+ """
+ evalParams = default_evaluation_params_fn()
+ if 'p' in p.keys():
+ evalParams.update(p['p'] if isinstance(p['p'], dict) else json.loads(p[
+ 'p'][1:-1]))
+
+ resDict = {
+ 'calculated': True,
+ 'Message': '',
+ 'method': '{}',
+ 'per_sample': '{}'
+ }
+ try:
+ # validate_data_fn(p['g'], p['s'], evalParams)
+ evalData = evaluate_method_fn(p['g'], p['s'], evalParams)
+ resDict.update(evalData)
+
+ except Exception as e:
+ traceback.print_exc()
+ resDict['Message'] = str(e)
+ resDict['calculated'] = False
+
+ if 'o' in p:
+ if not os.path.exists(p['o']):
+ os.makedirs(p['o'])
+
+ resultsOutputname = p['o'] + '/results.zip'
+ outZip = zipfile.ZipFile(resultsOutputname, mode='w', allowZip64=True)
+
+ del resDict['per_sample']
+ if 'output_items' in resDict.keys():
+ del resDict['output_items']
+
+ outZip.writestr('method.json', json.dumps(resDict))
+
+ if not resDict['calculated']:
+ if show_result:
+ sys.stderr.write('Error!\n' + resDict['Message'] + '\n\n')
+ if 'o' in p:
+ outZip.close()
+ return resDict
+
+ if 'o' in p:
+ if per_sample == True:
+ for k, v in evalData['per_sample'].iteritems():
+ outZip.writestr(k + '.json', json.dumps(v))
+
+ if 'output_items' in evalData.keys():
+ for k, v in evalData['output_items'].iteritems():
+ outZip.writestr(k, v)
+
+ outZip.close()
+
+ if show_result:
+ sys.stdout.write("Calculated!")
+ sys.stdout.write(json.dumps(resDict['method']))
+
+ return resDict
+
+
+def main_validation(default_evaluation_params_fn, validate_data_fn):
+ """
+ This process validates a method
+ Params:
+ default_evaluation_params_fn: points to a function that returns a dictionary with the default parameters used for the evaluation
+ validate_data_fn: points to a method that validates the corrct format of the submission
+ """
+ try:
+ p = dict([s[1:].split('=') for s in sys.argv[1:]])
+ evalParams = default_evaluation_params_fn()
+ if 'p' in p.keys():
+ evalParams.update(p['p'] if isinstance(p['p'], dict) else
+ json.loads(p['p'][1:-1]))
+
+ validate_data_fn(p['g'], p['s'], evalParams)
+ print('SUCCESS')
+ sys.exit(0)
+ except Exception as e:
+ print(str(e))
+ sys.exit(101)
diff --git a/benchmark/PaddleOCR_DBNet/utils/cal_recall/script.py b/benchmark/PaddleOCR_DBNet/utils/cal_recall/script.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b2f3916f62f191ae1b6b658edad9963243babf7
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/cal_recall/script.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from collections import namedtuple
+from . import rrc_evaluation_funcs
+import Polygon as plg
+import numpy as np
+
+
+def default_evaluation_params():
+ """
+ default_evaluation_params: Default parameters to use for the validation and evaluation.
+ """
+ return {
+ 'IOU_CONSTRAINT': 0.5,
+ 'AREA_PRECISION_CONSTRAINT': 0.5,
+ 'GT_SAMPLE_NAME_2_ID': 'gt_img_([0-9]+).txt',
+ 'DET_SAMPLE_NAME_2_ID': 'res_img_([0-9]+).txt',
+ 'LTRB':
+ False, # LTRB:2points(left,top,right,bottom) or 4 points(x1,y1,x2,y2,x3,y3,x4,y4)
+ 'CRLF': False, # Lines are delimited by Windows CRLF format
+ 'CONFIDENCES':
+ False, # Detections must include confidence value. AP will be calculated
+ 'PER_SAMPLE_RESULTS':
+ True # Generate per sample results and produce data for visualization
+ }
+
+
+def validate_data(gtFilePath, submFilePath, evaluationParams):
+ """
+ Method validate_data: validates that all files in the results folder are correct (have the correct name contents).
+ Validates also that there are no missing files in the folder.
+ If some error detected, the method raises the error
+ """
+ gt = rrc_evaluation_funcs.load_folder_file(
+ gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+
+ subm = rrc_evaluation_funcs.load_folder_file(
+ submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+
+ # Validate format of GroundTruth
+ for k in gt:
+ rrc_evaluation_funcs.validate_lines_in_file(
+ k, gt[k], evaluationParams['CRLF'], evaluationParams['LTRB'], True)
+
+ # Validate format of results
+ for k in subm:
+ if (k in gt) == False:
+ raise Exception("The sample %s not present in GT" % k)
+
+ rrc_evaluation_funcs.validate_lines_in_file(
+ k, subm[k], evaluationParams['CRLF'], evaluationParams['LTRB'],
+ False, evaluationParams['CONFIDENCES'])
+
+
+def evaluate_method(gtFilePath, submFilePath, evaluationParams):
+ """
+ Method evaluate_method: evaluate method and returns the results
+ Results. Dictionary with the following values:
+ - method (required) Global method metrics. Ex: { 'Precision':0.8,'Recall':0.9 }
+ - samples (optional) Per sample metrics. Ex: {'sample1' : { 'Precision':0.8,'Recall':0.9 } , 'sample2' : { 'Precision':0.8,'Recall':0.9 }
+ """
+
+ def polygon_from_points(points):
+ """
+ Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
+ """
+ resBoxes = np.empty([1, 8], dtype='int32')
+ resBoxes[0, 0] = int(points[0])
+ resBoxes[0, 4] = int(points[1])
+ resBoxes[0, 1] = int(points[2])
+ resBoxes[0, 5] = int(points[3])
+ resBoxes[0, 2] = int(points[4])
+ resBoxes[0, 6] = int(points[5])
+ resBoxes[0, 3] = int(points[6])
+ resBoxes[0, 7] = int(points[7])
+ pointMat = resBoxes[0].reshape([2, 4]).T
+ return plg.Polygon(pointMat)
+
+ def rectangle_to_polygon(rect):
+ resBoxes = np.empty([1, 8], dtype='int32')
+ resBoxes[0, 0] = int(rect.xmin)
+ resBoxes[0, 4] = int(rect.ymax)
+ resBoxes[0, 1] = int(rect.xmin)
+ resBoxes[0, 5] = int(rect.ymin)
+ resBoxes[0, 2] = int(rect.xmax)
+ resBoxes[0, 6] = int(rect.ymin)
+ resBoxes[0, 3] = int(rect.xmax)
+ resBoxes[0, 7] = int(rect.ymax)
+
+ pointMat = resBoxes[0].reshape([2, 4]).T
+
+ return plg.Polygon(pointMat)
+
+ def rectangle_to_points(rect):
+ points = [
+ int(rect.xmin), int(rect.ymax), int(rect.xmax), int(rect.ymax),
+ int(rect.xmax), int(rect.ymin), int(rect.xmin), int(rect.ymin)
+ ]
+ return points
+
+ def get_union(pD, pG):
+ areaA = pD.area()
+ areaB = pG.area()
+ return areaA + areaB - get_intersection(pD, pG)
+
+ def get_intersection_over_union(pD, pG):
+ try:
+ return get_intersection(pD, pG) / get_union(pD, pG)
+ except:
+ return 0
+
+ def get_intersection(pD, pG):
+ pInt = pD & pG
+ if len(pInt) == 0:
+ return 0
+ return pInt.area()
+
+ def compute_ap(confList, matchList, numGtCare):
+ correct = 0
+ AP = 0
+ if len(confList) > 0:
+ confList = np.array(confList)
+ matchList = np.array(matchList)
+ sorted_ind = np.argsort(-confList)
+ confList = confList[sorted_ind]
+ matchList = matchList[sorted_ind]
+ for n in range(len(confList)):
+ match = matchList[n]
+ if match:
+ correct += 1
+ AP += float(correct) / (n + 1)
+
+ if numGtCare > 0:
+ AP /= numGtCare
+
+ return AP
+
+ perSampleMetrics = {}
+
+ matchedSum = 0
+
+ Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+
+ gt = rrc_evaluation_funcs.load_folder_file(
+ gtFilePath, evaluationParams['GT_SAMPLE_NAME_2_ID'])
+ subm = rrc_evaluation_funcs.load_folder_file(
+ submFilePath, evaluationParams['DET_SAMPLE_NAME_2_ID'], True)
+
+ numGlobalCareGt = 0
+ numGlobalCareDet = 0
+
+ arrGlobalConfidences = []
+ arrGlobalMatches = []
+
+ for resFile in gt:
+
+ gtFile = gt[resFile] # rrc_evaluation_funcs.decode_utf8(gt[resFile])
+ recall = 0
+ precision = 0
+ hmean = 0
+
+ detMatched = 0
+
+ iouMat = np.empty([1, 1])
+
+ gtPols = []
+ detPols = []
+
+ gtPolPoints = []
+ detPolPoints = []
+
+ # Array of Ground Truth Polygons' keys marked as don't Care
+ gtDontCarePolsNum = []
+ # Array of Detected Polygons' matched with a don't Care GT
+ detDontCarePolsNum = []
+
+ pairs = []
+ detMatchedNums = []
+
+ arrSampleConfidences = []
+ arrSampleMatch = []
+ sampleAP = 0
+
+ evaluationLog = ""
+
+ pointsList, _, transcriptionsList = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(
+ gtFile, evaluationParams['CRLF'], evaluationParams['LTRB'], True,
+ False)
+ for n in range(len(pointsList)):
+ points = pointsList[n]
+ transcription = transcriptionsList[n]
+ dontCare = transcription == "###"
+ if evaluationParams['LTRB']:
+ gtRect = Rectangle(*points)
+ gtPol = rectangle_to_polygon(gtRect)
+ else:
+ gtPol = polygon_from_points(points)
+ gtPols.append(gtPol)
+ gtPolPoints.append(points)
+ if dontCare:
+ gtDontCarePolsNum.append(len(gtPols) - 1)
+
+ evaluationLog += "GT polygons: " + str(len(gtPols)) + (
+ " (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
+ if len(gtDontCarePolsNum) > 0 else "\n")
+
+ if resFile in subm:
+
+ detFile = subm[
+ resFile] # rrc_evaluation_funcs.decode_utf8(subm[resFile])
+
+ pointsList, confidencesList, _ = rrc_evaluation_funcs.get_tl_line_values_from_file_contents(
+ detFile, evaluationParams['CRLF'], evaluationParams['LTRB'],
+ False, evaluationParams['CONFIDENCES'])
+ for n in range(len(pointsList)):
+ points = pointsList[n]
+
+ if evaluationParams['LTRB']:
+ detRect = Rectangle(*points)
+ detPol = rectangle_to_polygon(detRect)
+ else:
+ detPol = polygon_from_points(points)
+ detPols.append(detPol)
+ detPolPoints.append(points)
+ if len(gtDontCarePolsNum) > 0:
+ for dontCarePol in gtDontCarePolsNum:
+ dontCarePol = gtPols[dontCarePol]
+ intersected_area = get_intersection(dontCarePol, detPol)
+ pdDimensions = detPol.area()
+ precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+ if (precision >
+ evaluationParams['AREA_PRECISION_CONSTRAINT']):
+ detDontCarePolsNum.append(len(detPols) - 1)
+ break
+
+ evaluationLog += "DET polygons: " + str(len(detPols)) + (
+ " (" + str(len(detDontCarePolsNum)) + " don't care)\n"
+ if len(detDontCarePolsNum) > 0 else "\n")
+
+ if len(gtPols) > 0 and len(detPols) > 0:
+ # Calculate IoU and precision matrixs
+ outputShape = [len(gtPols), len(detPols)]
+ iouMat = np.empty(outputShape)
+ gtRectMat = np.zeros(len(gtPols), np.int8)
+ detRectMat = np.zeros(len(detPols), np.int8)
+ for gtNum in range(len(gtPols)):
+ for detNum in range(len(detPols)):
+ pG = gtPols[gtNum]
+ pD = detPols[detNum]
+ iouMat[gtNum, detNum] = get_intersection_over_union(pD,
+ pG)
+
+ for gtNum in range(len(gtPols)):
+ for detNum in range(len(detPols)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
+ if iouMat[gtNum, detNum] > evaluationParams[
+ 'IOU_CONSTRAINT']:
+ gtRectMat[gtNum] = 1
+ detRectMat[detNum] = 1
+ detMatched += 1
+ pairs.append({'gt': gtNum, 'det': detNum})
+ detMatchedNums.append(detNum)
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(detNum) + "\n"
+
+ if evaluationParams['CONFIDENCES']:
+ for detNum in range(len(detPols)):
+ if detNum not in detDontCarePolsNum:
+ # we exclude the don't care detections
+ match = detNum in detMatchedNums
+
+ arrSampleConfidences.append(confidencesList[detNum])
+ arrSampleMatch.append(match)
+
+ arrGlobalConfidences.append(confidencesList[detNum])
+ arrGlobalMatches.append(match)
+
+ numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+ numDetCare = (len(detPols) - len(detDontCarePolsNum))
+ if numGtCare == 0:
+ recall = float(1)
+ precision = float(0) if numDetCare > 0 else float(1)
+ sampleAP = precision
+ else:
+ recall = float(detMatched) / numGtCare
+ precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
+ if evaluationParams['CONFIDENCES'] and evaluationParams[
+ 'PER_SAMPLE_RESULTS']:
+ sampleAP = compute_ap(arrSampleConfidences, arrSampleMatch,
+ numGtCare)
+
+ hmean = 0 if (precision + recall) == 0 else 2.0 * precision * recall / (
+ precision + recall)
+
+ matchedSum += detMatched
+ numGlobalCareGt += numGtCare
+ numGlobalCareDet += numDetCare
+
+ if evaluationParams['PER_SAMPLE_RESULTS']:
+ perSampleMetrics[resFile] = {
+ 'precision': precision,
+ 'recall': recall,
+ 'hmean': hmean,
+ 'pairs': pairs,
+ 'AP': sampleAP,
+ 'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
+ 'gtPolPoints': gtPolPoints,
+ 'detPolPoints': detPolPoints,
+ 'gtDontCare': gtDontCarePolsNum,
+ 'detDontCare': detDontCarePolsNum,
+ 'evaluationParams': evaluationParams,
+ 'evaluationLog': evaluationLog
+ }
+
+ # Compute MAP and MAR
+ AP = 0
+ if evaluationParams['CONFIDENCES']:
+ AP = compute_ap(arrGlobalConfidences, arrGlobalMatches, numGlobalCareGt)
+
+ methodRecall = 0 if numGlobalCareGt == 0 else float(
+ matchedSum) / numGlobalCareGt
+ methodPrecision = 0 if numGlobalCareDet == 0 else float(
+ matchedSum) / numGlobalCareDet
+ methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
+ methodRecall + methodPrecision)
+
+ methodMetrics = {
+ 'precision': methodPrecision,
+ 'recall': methodRecall,
+ 'hmean': methodHmean,
+ 'AP': AP
+ }
+
+ resDict = {
+ 'calculated': True,
+ 'Message': '',
+ 'method': methodMetrics,
+ 'per_sample': perSampleMetrics
+ }
+
+ return resDict
+
+
+def cal_recall_precison_f1(gt_path, result_path, show_result=False):
+ p = {'g': gt_path, 's': result_path}
+ result = rrc_evaluation_funcs.main_evaluation(p, default_evaluation_params,
+ validate_data,
+ evaluate_method, show_result)
+ return result['method']
diff --git a/benchmark/PaddleOCR_DBNet/utils/compute_mean_std.py b/benchmark/PaddleOCR_DBNet/utils/compute_mean_std.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d0ab5cd23d66e4070c336a93abebbedad6028b1
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/compute_mean_std.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/7 14:46
+# @Author : zhoujun
+
+import numpy as np
+import cv2
+import os
+import random
+from tqdm import tqdm
+# calculate means and std
+train_txt_path = './train_val_list.txt'
+
+CNum = 10000 # 挑选多少图片进行计算
+
+img_h, img_w = 640, 640
+imgs = np.zeros([img_w, img_h, 3, 1])
+means, stdevs = [], []
+
+with open(train_txt_path, 'r') as f:
+ lines = f.readlines()
+ random.shuffle(lines) # shuffle , 随机挑选图片
+
+ for i in tqdm(range(CNum)):
+ img_path = lines[i].split('\t')[0]
+
+ img = cv2.imread(img_path)
+ img = cv2.resize(img, (img_h, img_w))
+ img = img[:, :, :, np.newaxis]
+
+ imgs = np.concatenate((imgs, img), axis=3)
+# print(i)
+
+imgs = imgs.astype(np.float32) / 255.
+
+for i in tqdm(range(3)):
+ pixels = imgs[:, :, i, :].ravel() # 拉成一行
+ means.append(np.mean(pixels))
+ stdevs.append(np.std(pixels))
+
+# cv2 读取的图像格式为BGR,PIL/Skimage读取到的都是RGB不用转
+means.reverse() # BGR --> RGB
+stdevs.reverse()
+
+print("normMean = {}".format(means))
+print("normStd = {}".format(stdevs))
+print('transforms.Normalize(normMean = {}, normStd = {})'.format(means, stdevs))
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/utils/make_trainfile.py b/benchmark/PaddleOCR_DBNet/utils/make_trainfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b7ae70ff7aeea46b7b36ab4a177baa0108d72c3
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/make_trainfile.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/24 12:06
+# @Author : zhoujun
+import os
+import glob
+import pathlib
+
+data_path = r'test'
+# data_path/img 存放图片
+# data_path/gt 存放标签文件
+
+f_w = open(os.path.join(data_path, 'test.txt'), 'w', encoding='utf8')
+for img_path in glob.glob(data_path + '/img/*.jpg', recursive=True):
+ d = pathlib.Path(img_path)
+ label_path = os.path.join(data_path, 'gt', ('gt_' + str(d.stem) + '.txt'))
+ if os.path.exists(img_path) and os.path.exists(label_path):
+ print(img_path, label_path)
+ else:
+ print('不存在', img_path, label_path)
+ f_w.write('{}\t{}\n'.format(img_path, label_path))
+f_w.close()
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/utils/metrics.py b/benchmark/PaddleOCR_DBNet/utils/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9c54b8d2e8d6acc01aef62a43c27f018f333435
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/metrics.py
@@ -0,0 +1,58 @@
+# Adapted from score written by wkentaro
+# https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py
+
+import numpy as np
+
+
+class runningScore(object):
+ def __init__(self, n_classes):
+ self.n_classes = n_classes
+ self.confusion_matrix = np.zeros((n_classes, n_classes))
+
+ def _fast_hist(self, label_true, label_pred, n_class):
+ mask = (label_true >= 0) & (label_true < n_class)
+
+ if np.sum((label_pred[mask] < 0)) > 0:
+ print(label_pred[label_pred < 0])
+ hist = np.bincount(
+ n_class * label_true[mask].astype(int) + label_pred[mask],
+ minlength=n_class**2).reshape(n_class, n_class)
+ return hist
+
+ def update(self, label_trues, label_preds):
+ # print label_trues.dtype, label_preds.dtype
+ for lt, lp in zip(label_trues, label_preds):
+ try:
+ self.confusion_matrix += self._fast_hist(lt.flatten(),
+ lp.flatten(),
+ self.n_classes)
+ except:
+ pass
+
+ def get_scores(self):
+ """Returns accuracy score evaluation result.
+ - overall accuracy
+ - mean accuracy
+ - mean IU
+ - fwavacc
+ """
+ hist = self.confusion_matrix
+ acc = np.diag(hist).sum() / (hist.sum() + 0.0001)
+ acc_cls = np.diag(hist) / (hist.sum(axis=1) + 0.0001)
+ acc_cls = np.nanmean(acc_cls)
+ iu = np.diag(hist) / (
+ hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist) + 0.0001)
+ mean_iu = np.nanmean(iu)
+ freq = hist.sum(axis=1) / (hist.sum() + 0.0001)
+ fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+ cls_iu = dict(zip(range(self.n_classes), iu))
+
+ return {
+ 'Overall Acc': acc,
+ 'Mean Acc': acc_cls,
+ 'FreqW Acc': fwavacc,
+ 'Mean IoU': mean_iu,
+ }, cls_iu
+
+ def reset(self):
+ self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/__init__.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e7c51cf0651a25b83e29016f4126461828ff887
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/__init__.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/5 15:36
+# @Author : zhoujun
+from .icdar2015 import QuadMetric
+
+
+def get_metric(config):
+ try:
+ if 'args' not in config:
+ args = {}
+ else:
+ args = config['args']
+ if isinstance(args, dict):
+ cls = eval(config['type'])(**args)
+ else:
+ cls = eval(config['type'])(args)
+ return cls
+ except:
+ return None
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/__init__.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..375ae557e9fdf1120fa79b412de21434c1d71896
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/12/5 15:36
+# @Author : zhoujun
+
+from .quad_metric import QuadMetric
\ No newline at end of file
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/__init__.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/deteval.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/deteval.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5dcfc4b96a9802200e08467cbd937483960a0fc
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/deteval.py
@@ -0,0 +1,389 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import math
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+
+
+class DetectionDetEvalEvaluator(object):
+ def __init__(self,
+ area_recall_constraint=0.8,
+ area_precision_constraint=0.4,
+ ev_param_ind_center_diff_thr=1,
+ mtype_oo_o=1.0,
+ mtype_om_o=0.8,
+ mtype_om_m=1.0):
+
+ self.area_recall_constraint = area_recall_constraint
+ self.area_precision_constraint = area_precision_constraint
+ self.ev_param_ind_center_diff_thr = ev_param_ind_center_diff_thr
+ self.mtype_oo_o = mtype_oo_o
+ self.mtype_om_o = mtype_om_o
+ self.mtype_om_m = mtype_om_m
+
+ def evaluate_image(self, gt, pred):
+ def get_union(pD, pG):
+ return Polygon(pD).union(Polygon(pG)).area
+
+ def get_intersection_over_union(pD, pG):
+ return get_intersection(pD, pG) / get_union(pD, pG)
+
+ def get_intersection(pD, pG):
+ return Polygon(pD).intersection(Polygon(pG)).area
+
+ def one_to_one_match(row, col):
+ cont = 0
+ for j in range(len(recallMat[0])):
+ if recallMat[row,
+ j] >= self.area_recall_constraint and precisionMat[
+ row, j] >= self.area_precision_constraint:
+ cont = cont + 1
+ if (cont != 1):
+ return False
+ cont = 0
+ for i in range(len(recallMat)):
+ if recallMat[
+ i, col] >= self.area_recall_constraint and precisionMat[
+ i, col] >= self.area_precision_constraint:
+ cont = cont + 1
+ if (cont != 1):
+ return False
+
+ if recallMat[row,
+ col] >= self.area_recall_constraint and precisionMat[
+ row, col] >= self.area_precision_constraint:
+ return True
+ return False
+
+ def num_overlaps_gt(gtNum):
+ cont = 0
+ for detNum in range(len(detRects)):
+ if detNum not in detDontCareRectsNum:
+ if recallMat[gtNum, detNum] > 0:
+ cont = cont + 1
+ return cont
+
+ def num_overlaps_det(detNum):
+ cont = 0
+ for gtNum in range(len(recallMat)):
+ if gtNum not in gtDontCareRectsNum:
+ if recallMat[gtNum, detNum] > 0:
+ cont = cont + 1
+ return cont
+
+ def is_single_overlap(row, col):
+ if num_overlaps_gt(row) == 1 and num_overlaps_det(col) == 1:
+ return True
+ else:
+ return False
+
+ def one_to_many_match(gtNum):
+ many_sum = 0
+ detRects = []
+ for detNum in range(len(recallMat[0])):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and detNum not in detDontCareRectsNum:
+ if precisionMat[gtNum,
+ detNum] >= self.area_precision_constraint:
+ many_sum += recallMat[gtNum, detNum]
+ detRects.append(detNum)
+ if round(many_sum, 4) >= self.area_recall_constraint:
+ return True, detRects
+ else:
+ return False, []
+
+ def many_to_one_match(detNum):
+ many_sum = 0
+ gtRects = []
+ for gtNum in range(len(recallMat)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCareRectsNum:
+ if recallMat[gtNum, detNum] >= self.area_recall_constraint:
+ many_sum += precisionMat[gtNum, detNum]
+ gtRects.append(gtNum)
+ if round(many_sum, 4) >= self.area_precision_constraint:
+ return True, gtRects
+ else:
+ return False, []
+
+ def center_distance(r1, r2):
+ return ((np.mean(r1, axis=0) - np.mean(r2, axis=0))**2).sum()**0.5
+
+ def diag(r):
+ r = np.array(r)
+ return ((r[:, 0].max() - r[:, 0].min())**2 +
+ (r[:, 1].max() - r[:, 1].min())**2)**0.5
+
+ perSampleMetrics = {}
+
+ recall = 0
+ precision = 0
+ hmean = 0
+ recallAccum = 0.
+ precisionAccum = 0.
+ gtRects = []
+ detRects = []
+ gtPolPoints = []
+ detPolPoints = []
+ gtDontCareRectsNum = [
+ ] #Array of Ground Truth Rectangles' keys marked as don't Care
+ detDontCareRectsNum = [
+ ] #Array of Detected Rectangles' matched with a don't Care GT
+ pairs = []
+ evaluationLog = ""
+
+ recallMat = np.empty([1, 1])
+ precisionMat = np.empty([1, 1])
+
+ for n in range(len(gt)):
+ points = gt[n]['points']
+ # transcription = gt[n]['text']
+ dontCare = gt[n]['ignore']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ gtRects.append(points)
+ gtPolPoints.append(points)
+ if dontCare:
+ gtDontCareRectsNum.append(len(gtRects) - 1)
+
+ evaluationLog += "GT rectangles: " + str(len(gtRects)) + (
+ " (" + str(len(gtDontCareRectsNum)) + " don't care)\n"
+ if len(gtDontCareRectsNum) > 0 else "\n")
+
+ for n in range(len(pred)):
+ points = pred[n]['points']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ detRect = points
+ detRects.append(detRect)
+ detPolPoints.append(points)
+ if len(gtDontCareRectsNum) > 0:
+ for dontCareRectNum in gtDontCareRectsNum:
+ dontCareRect = gtRects[dontCareRectNum]
+ intersected_area = get_intersection(dontCareRect, detRect)
+ rdDimensions = Polygon(detRect).area
+ if (rdDimensions == 0):
+ precision = 0
+ else:
+ precision = intersected_area / rdDimensions
+ if (precision > self.area_precision_constraint):
+ detDontCareRectsNum.append(len(detRects) - 1)
+ break
+
+ evaluationLog += "DET rectangles: " + str(len(detRects)) + (
+ " (" + str(len(detDontCareRectsNum)) + " don't care)\n"
+ if len(detDontCareRectsNum) > 0 else "\n")
+
+ if len(gtRects) == 0:
+ recall = 1
+ precision = 0 if len(detRects) > 0 else 1
+
+ if len(detRects) > 0:
+ #Calculate recall and precision matrixs
+ outputShape = [len(gtRects), len(detRects)]
+ recallMat = np.empty(outputShape)
+ precisionMat = np.empty(outputShape)
+ gtRectMat = np.zeros(len(gtRects), np.int8)
+ detRectMat = np.zeros(len(detRects), np.int8)
+ for gtNum in range(len(gtRects)):
+ for detNum in range(len(detRects)):
+ rG = gtRects[gtNum]
+ rD = detRects[detNum]
+ intersected_area = get_intersection(rG, rD)
+ rgDimensions = Polygon(rG).area
+ rdDimensions = Polygon(rD).area
+ recallMat[
+ gtNum,
+ detNum] = 0 if rgDimensions == 0 else intersected_area / rgDimensions
+ precisionMat[
+ gtNum,
+ detNum] = 0 if rdDimensions == 0 else intersected_area / rdDimensions
+
+ # Find one-to-one matches
+ evaluationLog += "Find one-to-one matches\n"
+ for gtNum in range(len(gtRects)):
+ for detNum in range(len(detRects)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCareRectsNum and detNum not in detDontCareRectsNum:
+ match = one_to_one_match(gtNum, detNum)
+ if match is True:
+ #in deteval we have to make other validation before mark as one-to-one
+ if is_single_overlap(gtNum, detNum) is True:
+ rG = gtRects[gtNum]
+ rD = detRects[detNum]
+ normDist = center_distance(rG, rD)
+ normDist /= diag(rG) + diag(rD)
+ normDist *= 2.0
+ if normDist < self.ev_param_ind_center_diff_thr:
+ gtRectMat[gtNum] = 1
+ detRectMat[detNum] = 1
+ recallAccum += self.mtype_oo_o
+ precisionAccum += self.mtype_oo_o
+ pairs.append({
+ 'gt': gtNum,
+ 'det': detNum,
+ 'type': 'OO'
+ })
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(
+ detNum) + "\n"
+ else:
+ evaluationLog += "Match Discarded GT #" + str(
+ gtNum) + " with Det #" + str(
+ detNum) + " normDist: " + str(
+ normDist) + " \n"
+ else:
+ evaluationLog += "Match Discarded GT #" + str(
+ gtNum) + " with Det #" + str(
+ detNum) + " not single overlap\n"
+ # Find one-to-many matches
+ evaluationLog += "Find one-to-many matches\n"
+ for gtNum in range(len(gtRects)):
+ if gtNum not in gtDontCareRectsNum:
+ match, matchesDet = one_to_many_match(gtNum)
+ if match is True:
+ evaluationLog += "num_overlaps_gt=" + str(
+ num_overlaps_gt(gtNum))
+ #in deteval we have to make other validation before mark as one-to-one
+ if num_overlaps_gt(gtNum) >= 2:
+ gtRectMat[gtNum] = 1
+ recallAccum += (self.mtype_oo_o
+ if len(matchesDet) == 1 else
+ self.mtype_om_o)
+ precisionAccum += (self.mtype_oo_o
+ if len(matchesDet) == 1 else
+ self.mtype_om_o *
+ len(matchesDet))
+ pairs.append({
+ 'gt': gtNum,
+ 'det': matchesDet,
+ 'type': 'OO' if len(matchesDet) == 1 else 'OM'
+ })
+ for detNum in matchesDet:
+ detRectMat[detNum] = 1
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(matchesDet) + "\n"
+ else:
+ evaluationLog += "Match Discarded GT #" + str(
+ gtNum) + " with Det #" + str(
+ matchesDet) + " not single overlap\n"
+
+ # Find many-to-one matches
+ evaluationLog += "Find many-to-one matches\n"
+ for detNum in range(len(detRects)):
+ if detNum not in detDontCareRectsNum:
+ match, matchesGt = many_to_one_match(detNum)
+ if match is True:
+ #in deteval we have to make other validation before mark as one-to-one
+ if num_overlaps_det(detNum) >= 2:
+ detRectMat[detNum] = 1
+ recallAccum += (self.mtype_oo_o
+ if len(matchesGt) == 1 else
+ self.mtype_om_m * len(matchesGt))
+ precisionAccum += (self.mtype_oo_o
+ if len(matchesGt) == 1 else
+ self.mtype_om_m)
+ pairs.append({
+ 'gt': matchesGt,
+ 'det': detNum,
+ 'type': 'OO' if len(matchesGt) == 1 else 'MO'
+ })
+ for gtNum in matchesGt:
+ gtRectMat[gtNum] = 1
+ evaluationLog += "Match GT #" + str(
+ matchesGt) + " with Det #" + str(detNum) + "\n"
+ else:
+ evaluationLog += "Match Discarded GT #" + str(
+ matchesGt) + " with Det #" + str(
+ detNum) + " not single overlap\n"
+
+ numGtCare = (len(gtRects) - len(gtDontCareRectsNum))
+ if numGtCare == 0:
+ recall = float(1)
+ precision = float(0) if len(detRects) > 0 else float(1)
+ else:
+ recall = float(recallAccum) / numGtCare
+ precision = float(0) if (
+ len(detRects) - len(detDontCareRectsNum)
+ ) == 0 else float(precisionAccum) / (
+ len(detRects) - len(detDontCareRectsNum))
+ hmean = 0 if (precision + recall
+ ) == 0 else 2.0 * precision * recall / (
+ precision + recall)
+
+ numGtCare = len(gtRects) - len(gtDontCareRectsNum)
+ numDetCare = len(detRects) - len(detDontCareRectsNum)
+
+ perSampleMetrics = {
+ 'precision': precision,
+ 'recall': recall,
+ 'hmean': hmean,
+ 'pairs': pairs,
+ 'recallMat': [] if len(detRects) > 100 else recallMat.tolist(),
+ 'precisionMat': []
+ if len(detRects) > 100 else precisionMat.tolist(),
+ 'gtPolPoints': gtPolPoints,
+ 'detPolPoints': detPolPoints,
+ 'gtCare': numGtCare,
+ 'detCare': numDetCare,
+ 'gtDontCare': gtDontCareRectsNum,
+ 'detDontCare': detDontCareRectsNum,
+ 'recallAccum': recallAccum,
+ 'precisionAccum': precisionAccum,
+ 'evaluationLog': evaluationLog
+ }
+
+ return perSampleMetrics
+
+ def combine_results(self, results):
+ numGt = 0
+ numDet = 0
+ methodRecallSum = 0
+ methodPrecisionSum = 0
+
+ for result in results:
+ numGt += result['gtCare']
+ numDet += result['detCare']
+ methodRecallSum += result['recallAccum']
+ methodPrecisionSum += result['precisionAccum']
+
+ methodRecall = 0 if numGt == 0 else methodRecallSum / numGt
+ methodPrecision = 0 if numDet == 0 else methodPrecisionSum / numDet
+ methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
+ methodRecall + methodPrecision)
+
+ methodMetrics = {
+ 'precision': methodPrecision,
+ 'recall': methodRecall,
+ 'hmean': methodHmean
+ }
+
+ return methodMetrics
+
+
+if __name__ == '__main__':
+ evaluator = DetectionDetEvalEvaluator()
+ gts = [[{
+ 'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
+ 'text': 1234,
+ 'ignore': False,
+ }, {
+ 'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
+ 'text': 5678,
+ 'ignore': True,
+ }]]
+ preds = [[{
+ 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+ 'text': 123,
+ 'ignore': False,
+ }]]
+ results = []
+ for gt, pred in zip(gts, preds):
+ results.append(evaluator.evaluate_image(gt, pred))
+ metrics = evaluator.combine_results(results)
+ print(metrics)
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/icdar2013.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/icdar2013.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e8c86aae334dfdc1f35db91772e09c164e29d22
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/icdar2013.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import math
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+
+
+class DetectionICDAR2013Evaluator(object):
+ def __init__(self,
+ area_recall_constraint=0.8,
+ area_precision_constraint=0.4,
+ ev_param_ind_center_diff_thr=1,
+ mtype_oo_o=1.0,
+ mtype_om_o=0.8,
+ mtype_om_m=1.0):
+
+ self.area_recall_constraint = area_recall_constraint
+ self.area_precision_constraint = area_precision_constraint
+ self.ev_param_ind_center_diff_thr = ev_param_ind_center_diff_thr
+ self.mtype_oo_o = mtype_oo_o
+ self.mtype_om_o = mtype_om_o
+ self.mtype_om_m = mtype_om_m
+
+ def evaluate_image(self, gt, pred):
+ def get_union(pD, pG):
+ return Polygon(pD).union(Polygon(pG)).area
+
+ def get_intersection_over_union(pD, pG):
+ return get_intersection(pD, pG) / get_union(pD, pG)
+
+ def get_intersection(pD, pG):
+ return Polygon(pD).intersection(Polygon(pG)).area
+
+ def one_to_one_match(row, col):
+ cont = 0
+ for j in range(len(recallMat[0])):
+ if recallMat[row,
+ j] >= self.area_recall_constraint and precisionMat[
+ row, j] >= self.area_precision_constraint:
+ cont = cont + 1
+ if (cont != 1):
+ return False
+ cont = 0
+ for i in range(len(recallMat)):
+ if recallMat[
+ i, col] >= self.area_recall_constraint and precisionMat[
+ i, col] >= self.area_precision_constraint:
+ cont = cont + 1
+ if (cont != 1):
+ return False
+
+ if recallMat[row,
+ col] >= self.area_recall_constraint and precisionMat[
+ row, col] >= self.area_precision_constraint:
+ return True
+ return False
+
+ def one_to_many_match(gtNum):
+ many_sum = 0
+ detRects = []
+ for detNum in range(len(recallMat[0])):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and detNum not in detDontCareRectsNum:
+ if precisionMat[gtNum,
+ detNum] >= self.area_precision_constraint:
+ many_sum += recallMat[gtNum, detNum]
+ detRects.append(detNum)
+ if round(many_sum, 4) >= self.area_recall_constraint:
+ return True, detRects
+ else:
+ return False, []
+
+ def many_to_one_match(detNum):
+ many_sum = 0
+ gtRects = []
+ for gtNum in range(len(recallMat)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCareRectsNum:
+ if recallMat[gtNum, detNum] >= self.area_recall_constraint:
+ many_sum += precisionMat[gtNum, detNum]
+ gtRects.append(gtNum)
+ if round(many_sum, 4) >= self.area_precision_constraint:
+ return True, gtRects
+ else:
+ return False, []
+
+ def center_distance(r1, r2):
+ return ((np.mean(r1, axis=0) - np.mean(r2, axis=0))**2).sum()**0.5
+
+ def diag(r):
+ r = np.array(r)
+ return ((r[:, 0].max() - r[:, 0].min())**2 +
+ (r[:, 1].max() - r[:, 1].min())**2)**0.5
+
+ perSampleMetrics = {}
+
+ recall = 0
+ precision = 0
+ hmean = 0
+ recallAccum = 0.
+ precisionAccum = 0.
+ gtRects = []
+ detRects = []
+ gtPolPoints = []
+ detPolPoints = []
+ gtDontCareRectsNum = [
+ ] #Array of Ground Truth Rectangles' keys marked as don't Care
+ detDontCareRectsNum = [
+ ] #Array of Detected Rectangles' matched with a don't Care GT
+ pairs = []
+ evaluationLog = ""
+
+ recallMat = np.empty([1, 1])
+ precisionMat = np.empty([1, 1])
+
+ for n in range(len(gt)):
+ points = gt[n]['points']
+ # transcription = gt[n]['text']
+ dontCare = gt[n]['ignore']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ gtRects.append(points)
+ gtPolPoints.append(points)
+ if dontCare:
+ gtDontCareRectsNum.append(len(gtRects) - 1)
+
+ evaluationLog += "GT rectangles: " + str(len(gtRects)) + (
+ " (" + str(len(gtDontCareRectsNum)) + " don't care)\n"
+ if len(gtDontCareRectsNum) > 0 else "\n")
+
+ for n in range(len(pred)):
+ points = pred[n]['points']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ detRect = points
+ detRects.append(detRect)
+ detPolPoints.append(points)
+ if len(gtDontCareRectsNum) > 0:
+ for dontCareRectNum in gtDontCareRectsNum:
+ dontCareRect = gtRects[dontCareRectNum]
+ intersected_area = get_intersection(dontCareRect, detRect)
+ rdDimensions = Polygon(detRect).area
+ if (rdDimensions == 0):
+ precision = 0
+ else:
+ precision = intersected_area / rdDimensions
+ if (precision > self.area_precision_constraint):
+ detDontCareRectsNum.append(len(detRects) - 1)
+ break
+
+ evaluationLog += "DET rectangles: " + str(len(detRects)) + (
+ " (" + str(len(detDontCareRectsNum)) + " don't care)\n"
+ if len(detDontCareRectsNum) > 0 else "\n")
+
+ if len(gtRects) == 0:
+ recall = 1
+ precision = 0 if len(detRects) > 0 else 1
+
+ if len(detRects) > 0:
+ #Calculate recall and precision matrixs
+ outputShape = [len(gtRects), len(detRects)]
+ recallMat = np.empty(outputShape)
+ precisionMat = np.empty(outputShape)
+ gtRectMat = np.zeros(len(gtRects), np.int8)
+ detRectMat = np.zeros(len(detRects), np.int8)
+ for gtNum in range(len(gtRects)):
+ for detNum in range(len(detRects)):
+ rG = gtRects[gtNum]
+ rD = detRects[detNum]
+ intersected_area = get_intersection(rG, rD)
+ rgDimensions = Polygon(rG).area
+ rdDimensions = Polygon(rD).area
+ recallMat[
+ gtNum,
+ detNum] = 0 if rgDimensions == 0 else intersected_area / rgDimensions
+ precisionMat[
+ gtNum,
+ detNum] = 0 if rdDimensions == 0 else intersected_area / rdDimensions
+
+ # Find one-to-one matches
+ evaluationLog += "Find one-to-one matches\n"
+ for gtNum in range(len(gtRects)):
+ for detNum in range(len(detRects)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCareRectsNum and detNum not in detDontCareRectsNum:
+ match = one_to_one_match(gtNum, detNum)
+ if match is True:
+ #in deteval we have to make other validation before mark as one-to-one
+ rG = gtRects[gtNum]
+ rD = detRects[detNum]
+ normDist = center_distance(rG, rD)
+ normDist /= diag(rG) + diag(rD)
+ normDist *= 2.0
+ if normDist < self.ev_param_ind_center_diff_thr:
+ gtRectMat[gtNum] = 1
+ detRectMat[detNum] = 1
+ recallAccum += self.mtype_oo_o
+ precisionAccum += self.mtype_oo_o
+ pairs.append({
+ 'gt': gtNum,
+ 'det': detNum,
+ 'type': 'OO'
+ })
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(detNum) + "\n"
+ else:
+ evaluationLog += "Match Discarded GT #" + str(
+ gtNum) + " with Det #" + str(
+ detNum) + " normDist: " + str(
+ normDist) + " \n"
+ # Find one-to-many matches
+ evaluationLog += "Find one-to-many matches\n"
+ for gtNum in range(len(gtRects)):
+ if gtNum not in gtDontCareRectsNum:
+ match, matchesDet = one_to_many_match(gtNum)
+ if match is True:
+ evaluationLog += "num_overlaps_gt=" + str(
+ num_overlaps_gt(gtNum))
+ gtRectMat[gtNum] = 1
+ recallAccum += (self.mtype_oo_o if len(matchesDet) == 1
+ else self.mtype_om_o)
+ precisionAccum += (self.mtype_oo_o
+ if len(matchesDet) == 1 else
+ self.mtype_om_o * len(matchesDet))
+ pairs.append({
+ 'gt': gtNum,
+ 'det': matchesDet,
+ 'type': 'OO' if len(matchesDet) == 1 else 'OM'
+ })
+ for detNum in matchesDet:
+ detRectMat[detNum] = 1
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(matchesDet) + "\n"
+
+ # Find many-to-one matches
+ evaluationLog += "Find many-to-one matches\n"
+ for detNum in range(len(detRects)):
+ if detNum not in detDontCareRectsNum:
+ match, matchesGt = many_to_one_match(detNum)
+ if match is True:
+ detRectMat[detNum] = 1
+ recallAccum += (self.mtype_oo_o if len(matchesGt) == 1
+ else self.mtype_om_m * len(matchesGt))
+ precisionAccum += (self.mtype_oo_o
+ if len(matchesGt) == 1 else
+ self.mtype_om_m)
+ pairs.append({
+ 'gt': matchesGt,
+ 'det': detNum,
+ 'type': 'OO' if len(matchesGt) == 1 else 'MO'
+ })
+ for gtNum in matchesGt:
+ gtRectMat[gtNum] = 1
+ evaluationLog += "Match GT #" + str(
+ matchesGt) + " with Det #" + str(detNum) + "\n"
+
+ numGtCare = (len(gtRects) - len(gtDontCareRectsNum))
+ if numGtCare == 0:
+ recall = float(1)
+ precision = float(0) if len(detRects) > 0 else float(1)
+ else:
+ recall = float(recallAccum) / numGtCare
+ precision = float(0) if (
+ len(detRects) - len(detDontCareRectsNum)
+ ) == 0 else float(precisionAccum) / (
+ len(detRects) - len(detDontCareRectsNum))
+ hmean = 0 if (precision + recall
+ ) == 0 else 2.0 * precision * recall / (
+ precision + recall)
+
+ numGtCare = len(gtRects) - len(gtDontCareRectsNum)
+ numDetCare = len(detRects) - len(detDontCareRectsNum)
+
+ perSampleMetrics = {
+ 'precision': precision,
+ 'recall': recall,
+ 'hmean': hmean,
+ 'pairs': pairs,
+ 'recallMat': [] if len(detRects) > 100 else recallMat.tolist(),
+ 'precisionMat': []
+ if len(detRects) > 100 else precisionMat.tolist(),
+ 'gtPolPoints': gtPolPoints,
+ 'detPolPoints': detPolPoints,
+ 'gtCare': numGtCare,
+ 'detCare': numDetCare,
+ 'gtDontCare': gtDontCareRectsNum,
+ 'detDontCare': detDontCareRectsNum,
+ 'recallAccum': recallAccum,
+ 'precisionAccum': precisionAccum,
+ 'evaluationLog': evaluationLog
+ }
+
+ return perSampleMetrics
+
+ def combine_results(self, results):
+ numGt = 0
+ numDet = 0
+ methodRecallSum = 0
+ methodPrecisionSum = 0
+
+ for result in results:
+ numGt += result['gtCare']
+ numDet += result['detCare']
+ methodRecallSum += result['recallAccum']
+ methodPrecisionSum += result['precisionAccum']
+
+ methodRecall = 0 if numGt == 0 else methodRecallSum / numGt
+ methodPrecision = 0 if numDet == 0 else methodPrecisionSum / numDet
+ methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
+ methodRecall + methodPrecision)
+
+ methodMetrics = {
+ 'precision': methodPrecision,
+ 'recall': methodRecall,
+ 'hmean': methodHmean
+ }
+
+ return methodMetrics
+
+
+if __name__ == '__main__':
+ evaluator = DetectionICDAR2013Evaluator()
+ gts = [[{
+ 'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
+ 'text': 1234,
+ 'ignore': False,
+ }, {
+ 'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
+ 'text': 5678,
+ 'ignore': True,
+ }]]
+ preds = [[{
+ 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+ 'text': 123,
+ 'ignore': False,
+ }]]
+ results = []
+ for gt, pred in zip(gts, preds):
+ results.append(evaluator.evaluate_image(gt, pred))
+ metrics = evaluator.combine_results(results)
+ print(metrics)
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/iou.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/iou.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f9533b3c37e7d11ce2aaa276211baae14d1fb97
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/iou.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+import cv2
+
+
+def iou_rotate(box_a, box_b, method='union'):
+ rect_a = cv2.minAreaRect(box_a)
+ rect_b = cv2.minAreaRect(box_b)
+ r1 = cv2.rotatedRectangleIntersection(rect_a, rect_b)
+ if r1[0] == 0:
+ return 0
+ else:
+ inter_area = cv2.contourArea(r1[1])
+ area_a = cv2.contourArea(box_a)
+ area_b = cv2.contourArea(box_b)
+ union_area = area_a + area_b - inter_area
+ if union_area == 0 or inter_area == 0:
+ return 0
+ if method == 'union':
+ iou = inter_area / union_area
+ elif method == 'intersection':
+ iou = inter_area / min(area_a, area_b)
+ else:
+ raise NotImplementedError
+ return iou
+
+
+class DetectionIoUEvaluator(object):
+ def __init__(self,
+ is_output_polygon=False,
+ iou_constraint=0.5,
+ area_precision_constraint=0.5):
+ self.is_output_polygon = is_output_polygon
+ self.iou_constraint = iou_constraint
+ self.area_precision_constraint = area_precision_constraint
+
+ def evaluate_image(self, gt, pred):
+ def get_union(pD, pG):
+ return Polygon(pD).union(Polygon(pG)).area
+
+ def get_intersection_over_union(pD, pG):
+ return get_intersection(pD, pG) / get_union(pD, pG)
+
+ def get_intersection(pD, pG):
+ return Polygon(pD).intersection(Polygon(pG)).area
+
+ def compute_ap(confList, matchList, numGtCare):
+ correct = 0
+ AP = 0
+ if len(confList) > 0:
+ confList = np.array(confList)
+ matchList = np.array(matchList)
+ sorted_ind = np.argsort(-confList)
+ confList = confList[sorted_ind]
+ matchList = matchList[sorted_ind]
+ for n in range(len(confList)):
+ match = matchList[n]
+ if match:
+ correct += 1
+ AP += float(correct) / (n + 1)
+
+ if numGtCare > 0:
+ AP /= numGtCare
+
+ return AP
+
+ perSampleMetrics = {}
+
+ matchedSum = 0
+
+ Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
+
+ numGlobalCareGt = 0
+ numGlobalCareDet = 0
+
+ arrGlobalConfidences = []
+ arrGlobalMatches = []
+
+ recall = 0
+ precision = 0
+ hmean = 0
+
+ detMatched = 0
+
+ iouMat = np.empty([1, 1])
+
+ gtPols = []
+ detPols = []
+
+ gtPolPoints = []
+ detPolPoints = []
+
+ # Array of Ground Truth Polygons' keys marked as don't Care
+ gtDontCarePolsNum = []
+ # Array of Detected Polygons' matched with a don't Care GT
+ detDontCarePolsNum = []
+
+ pairs = []
+ detMatchedNums = []
+
+ arrSampleConfidences = []
+ arrSampleMatch = []
+
+ evaluationLog = ""
+
+ for n in range(len(gt)):
+ points = gt[n]['points']
+ # transcription = gt[n]['text']
+ dontCare = gt[n]['ignore']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ gtPol = points
+ gtPols.append(gtPol)
+ gtPolPoints.append(points)
+ if dontCare:
+ gtDontCarePolsNum.append(len(gtPols) - 1)
+
+ evaluationLog += "GT polygons: " + str(len(gtPols)) + (
+ " (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
+ if len(gtDontCarePolsNum) > 0 else "\n")
+
+ for n in range(len(pred)):
+ points = pred[n]['points']
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ detPol = points
+ detPols.append(detPol)
+ detPolPoints.append(points)
+ if len(gtDontCarePolsNum) > 0:
+ for dontCarePol in gtDontCarePolsNum:
+ dontCarePol = gtPols[dontCarePol]
+ intersected_area = get_intersection(dontCarePol, detPol)
+ pdDimensions = Polygon(detPol).area
+ precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
+ if (precision > self.area_precision_constraint):
+ detDontCarePolsNum.append(len(detPols) - 1)
+ break
+
+ evaluationLog += "DET polygons: " + str(len(detPols)) + (
+ " (" + str(len(detDontCarePolsNum)) + " don't care)\n"
+ if len(detDontCarePolsNum) > 0 else "\n")
+
+ if len(gtPols) > 0 and len(detPols) > 0:
+ # Calculate IoU and precision matrixs
+ outputShape = [len(gtPols), len(detPols)]
+ iouMat = np.empty(outputShape)
+ gtRectMat = np.zeros(len(gtPols), np.int8)
+ detRectMat = np.zeros(len(detPols), np.int8)
+ if self.is_output_polygon:
+ for gtNum in range(len(gtPols)):
+ for detNum in range(len(detPols)):
+ pG = gtPols[gtNum]
+ pD = detPols[detNum]
+ iouMat[gtNum, detNum] = get_intersection_over_union(pD,
+ pG)
+ else:
+ # gtPols = np.float32(gtPols)
+ # detPols = np.float32(detPols)
+ for gtNum in range(len(gtPols)):
+ for detNum in range(len(detPols)):
+ pG = np.float32(gtPols[gtNum])
+ pD = np.float32(detPols[detNum])
+ iouMat[gtNum, detNum] = iou_rotate(pD, pG)
+ for gtNum in range(len(gtPols)):
+ for detNum in range(len(detPols)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
+ if iouMat[gtNum, detNum] > self.iou_constraint:
+ gtRectMat[gtNum] = 1
+ detRectMat[detNum] = 1
+ detMatched += 1
+ pairs.append({'gt': gtNum, 'det': detNum})
+ detMatchedNums.append(detNum)
+ evaluationLog += "Match GT #" + \
+ str(gtNum) + " with Det #" + str(detNum) + "\n"
+
+ numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
+ numDetCare = (len(detPols) - len(detDontCarePolsNum))
+ if numGtCare == 0:
+ recall = float(1)
+ precision = float(0) if numDetCare > 0 else float(1)
+ else:
+ recall = float(detMatched) / numGtCare
+ precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
+
+ hmean = 0 if (precision + recall) == 0 else 2.0 * \
+ precision * recall / (precision + recall)
+
+ matchedSum += detMatched
+ numGlobalCareGt += numGtCare
+ numGlobalCareDet += numDetCare
+
+ perSampleMetrics = {
+ 'precision': precision,
+ 'recall': recall,
+ 'hmean': hmean,
+ 'pairs': pairs,
+ 'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
+ 'gtPolPoints': gtPolPoints,
+ 'detPolPoints': detPolPoints,
+ 'gtCare': numGtCare,
+ 'detCare': numDetCare,
+ 'gtDontCare': gtDontCarePolsNum,
+ 'detDontCare': detDontCarePolsNum,
+ 'detMatched': detMatched,
+ 'evaluationLog': evaluationLog
+ }
+
+ return perSampleMetrics
+
+ def combine_results(self, results):
+ numGlobalCareGt = 0
+ numGlobalCareDet = 0
+ matchedSum = 0
+ for result in results:
+ numGlobalCareGt += result['gtCare']
+ numGlobalCareDet += result['detCare']
+ matchedSum += result['detMatched']
+
+ methodRecall = 0 if numGlobalCareGt == 0 else float(
+ matchedSum) / numGlobalCareGt
+ methodPrecision = 0 if numGlobalCareDet == 0 else float(
+ matchedSum) / numGlobalCareDet
+ methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \
+ methodRecall * methodPrecision / (
+ methodRecall + methodPrecision)
+
+ methodMetrics = {
+ 'precision': methodPrecision,
+ 'recall': methodRecall,
+ 'hmean': methodHmean
+ }
+
+ return methodMetrics
+
+
+if __name__ == '__main__':
+ evaluator = DetectionIoUEvaluator()
+ preds = [[{
+ 'points': [(0.1, 0.1), (0.5, 0), (0.5, 1), (0, 1)],
+ 'text': 1234,
+ 'ignore': False,
+ }, {
+ 'points': [(0.5, 0.1), (1, 0), (1, 1), (0.5, 1)],
+ 'text': 5678,
+ 'ignore': False,
+ }]]
+ gts = [[{
+ 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+ 'text': 123,
+ 'ignore': False,
+ }]]
+ results = []
+ for gt, pred in zip(gts, preds):
+ results.append(evaluator.evaluate_image(gt, pred))
+ metrics = evaluator.combine_results(results)
+ print(metrics)
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/mtwi2018.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/mtwi2018.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e319aacf5a395a121e94bd2e9d123cec9279e7e
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/detection/mtwi2018.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import math
+from collections import namedtuple
+import numpy as np
+from shapely.geometry import Polygon
+
+
+class DetectionMTWI2018Evaluator(object):
+ def __init__(
+ self,
+ area_recall_constraint=0.7,
+ area_precision_constraint=0.7,
+ ev_param_ind_center_diff_thr=1, ):
+
+ self.area_recall_constraint = area_recall_constraint
+ self.area_precision_constraint = area_precision_constraint
+ self.ev_param_ind_center_diff_thr = ev_param_ind_center_diff_thr
+
+ def evaluate_image(self, gt, pred):
+ def get_union(pD, pG):
+ return Polygon(pD).union(Polygon(pG)).area
+
+ def get_intersection_over_union(pD, pG):
+ return get_intersection(pD, pG) / get_union(pD, pG)
+
+ def get_intersection(pD, pG):
+ return Polygon(pD).intersection(Polygon(pG)).area
+
+ def one_to_one_match(row, col):
+ cont = 0
+ for j in range(len(recallMat[0])):
+ if recallMat[row,
+ j] >= self.area_recall_constraint and precisionMat[
+ row, j] >= self.area_precision_constraint:
+ cont = cont + 1
+ if (cont != 1):
+ return False
+ cont = 0
+ for i in range(len(recallMat)):
+ if recallMat[
+ i, col] >= self.area_recall_constraint and precisionMat[
+ i, col] >= self.area_precision_constraint:
+ cont = cont + 1
+ if (cont != 1):
+ return False
+
+ if recallMat[row,
+ col] >= self.area_recall_constraint and precisionMat[
+ row, col] >= self.area_precision_constraint:
+ return True
+ return False
+
+ def one_to_many_match(gtNum):
+ many_sum = 0
+ detRects = []
+ for detNum in range(len(recallMat[0])):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and detNum not in detDontCareRectsNum:
+ if precisionMat[gtNum,
+ detNum] >= self.area_precision_constraint:
+ many_sum += recallMat[gtNum, detNum]
+ detRects.append(detNum)
+ if round(many_sum, 4) >= self.area_recall_constraint:
+ return True, detRects
+ else:
+ return False, []
+
+ def many_to_one_match(detNum):
+ many_sum = 0
+ gtRects = []
+ for gtNum in range(len(recallMat)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCareRectsNum:
+ if recallMat[gtNum, detNum] >= self.area_recall_constraint:
+ many_sum += precisionMat[gtNum, detNum]
+ gtRects.append(gtNum)
+ if round(many_sum, 4) >= self.area_precision_constraint:
+ return True, gtRects
+ else:
+ return False, []
+
+ def center_distance(r1, r2):
+ return ((np.mean(r1, axis=0) - np.mean(r2, axis=0))**2).sum()**0.5
+
+ def diag(r):
+ r = np.array(r)
+ return ((r[:, 0].max() - r[:, 0].min())**2 +
+ (r[:, 1].max() - r[:, 1].min())**2)**0.5
+
+ perSampleMetrics = {}
+
+ recall = 0
+ precision = 0
+ hmean = 0
+ recallAccum = 0.
+ precisionAccum = 0.
+ gtRects = []
+ detRects = []
+ gtPolPoints = []
+ detPolPoints = []
+ gtDontCareRectsNum = [
+ ] #Array of Ground Truth Rectangles' keys marked as don't Care
+ detDontCareRectsNum = [
+ ] #Array of Detected Rectangles' matched with a don't Care GT
+ pairs = []
+ evaluationLog = ""
+
+ recallMat = np.empty([1, 1])
+ precisionMat = np.empty([1, 1])
+
+ for n in range(len(gt)):
+ points = gt[n]['points']
+ # transcription = gt[n]['text']
+ dontCare = gt[n]['ignore']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ gtRects.append(points)
+ gtPolPoints.append(points)
+ if dontCare:
+ gtDontCareRectsNum.append(len(gtRects) - 1)
+
+ evaluationLog += "GT rectangles: " + str(len(gtRects)) + (
+ " (" + str(len(gtDontCareRectsNum)) + " don't care)\n"
+ if len(gtDontCareRectsNum) > 0 else "\n")
+
+ for n in range(len(pred)):
+ points = pred[n]['points']
+
+ if not Polygon(points).is_valid or not Polygon(points).is_simple:
+ continue
+
+ detRect = points
+ detRects.append(detRect)
+ detPolPoints.append(points)
+ if len(gtDontCareRectsNum) > 0:
+ for dontCareRectNum in gtDontCareRectsNum:
+ dontCareRect = gtRects[dontCareRectNum]
+ intersected_area = get_intersection(dontCareRect, detRect)
+ rdDimensions = Polygon(detRect).area
+ if (rdDimensions == 0):
+ precision = 0
+ else:
+ precision = intersected_area / rdDimensions
+ if (precision > 0.5):
+ detDontCareRectsNum.append(len(detRects) - 1)
+ break
+
+ evaluationLog += "DET rectangles: " + str(len(detRects)) + (
+ " (" + str(len(detDontCareRectsNum)) + " don't care)\n"
+ if len(detDontCareRectsNum) > 0 else "\n")
+
+ if len(gtRects) == 0:
+ recall = 1
+ precision = 0 if len(detRects) > 0 else 1
+
+ if len(detRects) > 0:
+ #Calculate recall and precision matrixs
+ outputShape = [len(gtRects), len(detRects)]
+ recallMat = np.empty(outputShape)
+ precisionMat = np.empty(outputShape)
+ gtRectMat = np.zeros(len(gtRects), np.int8)
+ detRectMat = np.zeros(len(detRects), np.int8)
+ for gtNum in range(len(gtRects)):
+ for detNum in range(len(detRects)):
+ rG = gtRects[gtNum]
+ rD = detRects[detNum]
+ intersected_area = get_intersection(rG, rD)
+ rgDimensions = Polygon(rG).area
+ rdDimensions = Polygon(rD).area
+ recallMat[
+ gtNum,
+ detNum] = 0 if rgDimensions == 0 else intersected_area / rgDimensions
+ precisionMat[
+ gtNum,
+ detNum] = 0 if rdDimensions == 0 else intersected_area / rdDimensions
+
+ # Find one-to-one matches
+ evaluationLog += "Find one-to-one matches\n"
+ for gtNum in range(len(gtRects)):
+ for detNum in range(len(detRects)):
+ if gtRectMat[gtNum] == 0 and detRectMat[
+ detNum] == 0 and gtNum not in gtDontCareRectsNum and detNum not in detDontCareRectsNum:
+ match = one_to_one_match(gtNum, detNum)
+ if match is True:
+ #in deteval we have to make other validation before mark as one-to-one
+ rG = gtRects[gtNum]
+ rD = detRects[detNum]
+ normDist = center_distance(rG, rD)
+ normDist /= diag(rG) + diag(rD)
+ normDist *= 2.0
+ if normDist < self.ev_param_ind_center_diff_thr:
+ gtRectMat[gtNum] = 1
+ detRectMat[detNum] = 1
+ recallAccum += 1.0
+ precisionAccum += 1.0
+ pairs.append({
+ 'gt': gtNum,
+ 'det': detNum,
+ 'type': 'OO'
+ })
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(detNum) + "\n"
+ else:
+ evaluationLog += "Match Discarded GT #" + str(
+ gtNum) + " with Det #" + str(
+ detNum) + " normDist: " + str(
+ normDist) + " \n"
+ # Find one-to-many matches
+ evaluationLog += "Find one-to-many matches\n"
+ for gtNum in range(len(gtRects)):
+ if gtNum not in gtDontCareRectsNum:
+ match, matchesDet = one_to_many_match(gtNum)
+ if match is True:
+ gtRectMat[gtNum] = 1
+ recallAccum += 1.0
+ precisionAccum += len(matchesDet) / (
+ 1 + math.log(len(matchesDet)))
+ pairs.append({
+ 'gt': gtNum,
+ 'det': matchesDet,
+ 'type': 'OO' if len(matchesDet) == 1 else 'OM'
+ })
+ for detNum in matchesDet:
+ detRectMat[detNum] = 1
+ evaluationLog += "Match GT #" + str(
+ gtNum) + " with Det #" + str(matchesDet) + "\n"
+
+ # Find many-to-one matches
+ evaluationLog += "Find many-to-one matches\n"
+ for detNum in range(len(detRects)):
+ if detNum not in detDontCareRectsNum:
+ match, matchesGt = many_to_one_match(detNum)
+ if match is True:
+ detRectMat[detNum] = 1
+ recallAccum += len(matchesGt) / (
+ 1 + math.log(len(matchesGt)))
+ precisionAccum += 1.0
+ pairs.append({
+ 'gt': matchesGt,
+ 'det': detNum,
+ 'type': 'OO' if len(matchesGt) == 1 else 'MO'
+ })
+ for gtNum in matchesGt:
+ gtRectMat[gtNum] = 1
+ evaluationLog += "Match GT #" + str(
+ matchesGt) + " with Det #" + str(detNum) + "\n"
+
+ numGtCare = (len(gtRects) - len(gtDontCareRectsNum))
+ if numGtCare == 0:
+ recall = float(1)
+ precision = float(0) if len(detRects) > 0 else float(1)
+ else:
+ recall = float(recallAccum) / numGtCare
+ precision = float(0) if (
+ len(detRects) - len(detDontCareRectsNum)
+ ) == 0 else float(precisionAccum) / (
+ len(detRects) - len(detDontCareRectsNum))
+ hmean = 0 if (precision + recall
+ ) == 0 else 2.0 * precision * recall / (
+ precision + recall)
+
+ numGtCare = len(gtRects) - len(gtDontCareRectsNum)
+ numDetCare = len(detRects) - len(detDontCareRectsNum)
+
+ perSampleMetrics = {
+ 'precision': precision,
+ 'recall': recall,
+ 'hmean': hmean,
+ 'pairs': pairs,
+ 'recallMat': [] if len(detRects) > 100 else recallMat.tolist(),
+ 'precisionMat': []
+ if len(detRects) > 100 else precisionMat.tolist(),
+ 'gtPolPoints': gtPolPoints,
+ 'detPolPoints': detPolPoints,
+ 'gtCare': numGtCare,
+ 'detCare': numDetCare,
+ 'gtDontCare': gtDontCareRectsNum,
+ 'detDontCare': detDontCareRectsNum,
+ 'recallAccum': recallAccum,
+ 'precisionAccum': precisionAccum,
+ 'evaluationLog': evaluationLog
+ }
+
+ return perSampleMetrics
+
+ def combine_results(self, results):
+ numGt = 0
+ numDet = 0
+ methodRecallSum = 0
+ methodPrecisionSum = 0
+
+ for result in results:
+ numGt += result['gtCare']
+ numDet += result['detCare']
+ methodRecallSum += result['recallAccum']
+ methodPrecisionSum += result['precisionAccum']
+
+ methodRecall = 0 if numGt == 0 else methodRecallSum / numGt
+ methodPrecision = 0 if numDet == 0 else methodPrecisionSum / numDet
+ methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * methodRecall * methodPrecision / (
+ methodRecall + methodPrecision)
+
+ methodMetrics = {
+ 'precision': methodPrecision,
+ 'recall': methodRecall,
+ 'hmean': methodHmean
+ }
+
+ return methodMetrics
+
+
+if __name__ == '__main__':
+ evaluator = DetectionICDAR2013Evaluator()
+ gts = [[{
+ 'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
+ 'text': 1234,
+ 'ignore': False,
+ }, {
+ 'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
+ 'text': 5678,
+ 'ignore': True,
+ }]]
+ preds = [[{
+ 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
+ 'text': 123,
+ 'ignore': False,
+ }]]
+ results = []
+ for gt, pred in zip(gts, preds):
+ results.append(evaluator.evaluate_image(gt, pred))
+ metrics = evaluator.combine_results(results)
+ print(metrics)
diff --git a/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/quad_metric.py b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/quad_metric.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7e403a31c16af6b2e0e533139f2257cf6135c8a
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/ocr_metric/icdar2015/quad_metric.py
@@ -0,0 +1,98 @@
+import numpy as np
+
+from .detection.iou import DetectionIoUEvaluator
+
+
+class AverageMeter(object):
+ """Computes and stores the average and current value"""
+
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.val = 0
+ self.avg = 0
+ self.sum = 0
+ self.count = 0
+
+ def update(self, val, n=1):
+ self.val = val
+ self.sum += val * n
+ self.count += n
+ self.avg = self.sum / self.count
+ return self
+
+
+class QuadMetric():
+ def __init__(self, is_output_polygon=False):
+ self.is_output_polygon = is_output_polygon
+ self.evaluator = DetectionIoUEvaluator(
+ is_output_polygon=is_output_polygon)
+
+ def measure(self, batch, output, box_thresh=0.6):
+ '''
+ batch: (image, polygons, ignore_tags
+ batch: a dict produced by dataloaders.
+ image: tensor of shape (N, C, H, W).
+ polygons: tensor of shape (N, K, 4, 2), the polygons of objective regions.
+ ignore_tags: tensor of shape (N, K), indicates whether a region is ignorable or not.
+ shape: the original shape of images.
+ filename: the original filenames of images.
+ output: (polygons, ...)
+ '''
+ results = []
+ gt_polyons_batch = batch['text_polys']
+ ignore_tags_batch = batch['ignore_tags']
+ pred_polygons_batch = np.array(output[0])
+ pred_scores_batch = np.array(output[1])
+ for polygons, pred_polygons, pred_scores, ignore_tags in zip(
+ gt_polyons_batch, pred_polygons_batch, pred_scores_batch,
+ ignore_tags_batch):
+ gt = [
+ dict(
+ points=np.int64(polygons[i]), ignore=ignore_tags[i])
+ for i in range(len(polygons))
+ ]
+ if self.is_output_polygon:
+ pred = [
+ dict(points=pred_polygons[i])
+ for i in range(len(pred_polygons))
+ ]
+ else:
+ pred = []
+ # print(pred_polygons.shape)
+ for i in range(pred_polygons.shape[0]):
+ if pred_scores[i] >= box_thresh:
+ # print(pred_polygons[i,:,:].tolist())
+ pred.append(
+ dict(points=pred_polygons[i, :, :].astype(np.int)))
+ # pred = [dict(points=pred_polygons[i,:,:].tolist()) if pred_scores[i] >= box_thresh for i in range(pred_polygons.shape[0])]
+ results.append(self.evaluator.evaluate_image(gt, pred))
+ return results
+
+ def validate_measure(self, batch, output, box_thresh=0.6):
+ return self.measure(batch, output, box_thresh)
+
+ def evaluate_measure(self, batch, output):
+ return self.measure(batch, output), np.linspace(
+ 0, batch['image'].shape[0]).tolist()
+
+ def gather_measure(self, raw_metrics):
+ raw_metrics = [
+ image_metrics
+ for batch_metrics in raw_metrics for image_metrics in batch_metrics
+ ]
+
+ result = self.evaluator.combine_results(raw_metrics)
+
+ precision = AverageMeter()
+ recall = AverageMeter()
+ fmeasure = AverageMeter()
+
+ precision.update(result['precision'], n=len(raw_metrics))
+ recall.update(result['recall'], n=len(raw_metrics))
+ fmeasure_score = 2 * precision.val * recall.val / (
+ precision.val + recall.val + 1e-8)
+ fmeasure.update(fmeasure_score)
+
+ return {'precision': precision, 'recall': recall, 'fmeasure': fmeasure}
diff --git a/benchmark/PaddleOCR_DBNet/utils/profiler.py b/benchmark/PaddleOCR_DBNet/utils/profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..e64afd6a0d8cfd860920916acfbf168d58dfff2d
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/profiler.py
@@ -0,0 +1,110 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle
+
+# A global variable to record the number of calling times for profiler
+# functions. It is used to specify the tracing range of training steps.
+_profiler_step_id = 0
+
+# A global variable to avoid parsing from string every time.
+_profiler_options = None
+
+
+class ProfilerOptions(object):
+ '''
+ Use a string to initialize a ProfilerOptions.
+ The string should be in the format: "key1=value1;key2=value;key3=value3".
+ For example:
+ "profile_path=model.profile"
+ "batch_range=[50, 60]; profile_path=model.profile"
+ "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
+ ProfilerOptions supports following key-value pair:
+ batch_range - a integer list, e.g. [100, 110].
+ state - a string, the optional values are 'CPU', 'GPU' or 'All'.
+ sorted_key - a string, the optional values are 'calls', 'total',
+ 'max', 'min' or 'ave.
+ tracer_option - a string, the optional values are 'Default', 'OpDetail',
+ 'AllOpDetail'.
+ profile_path - a string, the path to save the serialized profile data,
+ which can be used to generate a timeline.
+ exit_on_finished - a boolean.
+ '''
+
+ def __init__(self, options_str):
+ assert isinstance(options_str, str)
+
+ self._options = {
+ 'batch_range': [10, 20],
+ 'state': 'All',
+ 'sorted_key': 'total',
+ 'tracer_option': 'Default',
+ 'profile_path': '/tmp/profile',
+ 'exit_on_finished': True
+ }
+ self._parse_from_string(options_str)
+
+ def _parse_from_string(self, options_str):
+ for kv in options_str.replace(' ', '').split(';'):
+ key, value = kv.split('=')
+ if key == 'batch_range':
+ value_list = value.replace('[', '').replace(']', '').split(',')
+ value_list = list(map(int, value_list))
+ if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
+ 1] > value_list[0]:
+ self._options[key] = value_list
+ elif key == 'exit_on_finished':
+ self._options[key] = value.lower() in ("yes", "true", "t", "1")
+ elif key in [
+ 'state', 'sorted_key', 'tracer_option', 'profile_path'
+ ]:
+ self._options[key] = value
+
+ def __getitem__(self, name):
+ if self._options.get(name, None) is None:
+ raise ValueError(
+ "ProfilerOptions does not have an option named %s." % name)
+ return self._options[name]
+
+
+def add_profiler_step(options_str=None):
+ '''
+ Enable the operator-level timing using PaddlePaddle's profiler.
+ The profiler uses a independent variable to count the profiler steps.
+ One call of this function is treated as a profiler step.
+
+ Args:
+ profiler_options - a string to initialize the ProfilerOptions.
+ Default is None, and the profiler is disabled.
+ '''
+ if options_str is None:
+ return
+
+ global _profiler_step_id
+ global _profiler_options
+
+ if _profiler_options is None:
+ _profiler_options = ProfilerOptions(options_str)
+
+ if _profiler_step_id == _profiler_options['batch_range'][0]:
+ paddle.utils.profiler.start_profiler(_profiler_options['state'],
+ _profiler_options['tracer_option'])
+ elif _profiler_step_id == _profiler_options['batch_range'][1]:
+ paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
+ _profiler_options['profile_path'])
+ if _profiler_options['exit_on_finished']:
+ sys.exit(0)
+
+ _profiler_step_id += 1
diff --git a/benchmark/PaddleOCR_DBNet/utils/schedulers.py b/benchmark/PaddleOCR_DBNet/utils/schedulers.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b6fb7d285594c06ec146c301bc5deb26d4e9c26
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/schedulers.py
@@ -0,0 +1,64 @@
+from paddle.optimizer import lr
+import logging
+__all__ = ['Polynomial']
+
+
+class Polynomial(object):
+ """
+ Polynomial learning rate decay
+ Args:
+ learning_rate (float): The initial learning rate. It is a python float number.
+ epochs(int): The decay epoch size. It determines the decay cycle, when by_epoch is set to true, it will change to epochs=epochs*step_each_epoch.
+ step_each_epoch: all steps in each epoch.
+ end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
+ power(float, optional): Power of polynomial. Default: 1.0.
+ warmup_epoch(int): The epoch numbers for LinearWarmup. Default: 0, , when by_epoch is set to true, it will change to warmup_epoch=warmup_epoch*step_each_epoch.
+ warmup_start_lr(float): Initial learning rate of warm up. Default: 0.0.
+ last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+ by_epoch: Whether the set parameter is based on epoch or iter, when set to true,, epochs and warmup_epoch will be automatically multiplied by step_each_epoch. Default: True
+ """
+
+ def __init__(self,
+ learning_rate,
+ epochs,
+ step_each_epoch,
+ end_lr=0.0,
+ power=1.0,
+ warmup_epoch=0,
+ warmup_start_lr=0.0,
+ last_epoch=-1,
+ by_epoch=True,
+ **kwargs):
+ super().__init__()
+ if warmup_epoch >= epochs:
+ msg = f"When using warm up, the value of \"epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
+ logging.warning(msg)
+ warmup_epoch = epochs
+ self.learning_rate = learning_rate
+ self.epochs = epochs
+ self.end_lr = end_lr
+ self.power = power
+ self.last_epoch = last_epoch
+ self.warmup_epoch = warmup_epoch
+ self.warmup_start_lr = warmup_start_lr
+
+ if by_epoch:
+ self.epochs *= step_each_epoch
+ self.warmup_epoch = int(self.warmup_epoch * step_each_epoch)
+
+ def __call__(self):
+ learning_rate = lr.PolynomialDecay(
+ learning_rate=self.learning_rate,
+ decay_steps=self.epochs,
+ end_lr=self.end_lr,
+ power=self.power,
+ last_epoch=self.
+ last_epoch) if self.epochs > 0 else self.learning_rate
+ if self.warmup_epoch > 0:
+ learning_rate = lr.LinearWarmup(
+ learning_rate=learning_rate,
+ warmup_steps=self.warmup_epoch,
+ start_lr=self.warmup_start_lr,
+ end_lr=self.learning_rate,
+ last_epoch=self.last_epoch)
+ return learning_rate
diff --git a/benchmark/PaddleOCR_DBNet/utils/util.py b/benchmark/PaddleOCR_DBNet/utils/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..39bae764092fdca068a1792e065350209b45191a
--- /dev/null
+++ b/benchmark/PaddleOCR_DBNet/utils/util.py
@@ -0,0 +1,367 @@
+# -*- coding: utf-8 -*-
+# @Time : 2019/8/23 21:59
+# @Author : zhoujun
+import json
+import pathlib
+import time
+import os
+import glob
+import cv2
+import yaml
+from typing import Mapping
+import matplotlib.pyplot as plt
+import numpy as np
+
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+
+def _check_image_file(path):
+ img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'pdf'}
+ return any([path.lower().endswith(e) for e in img_end])
+
+
+def get_image_file_list(img_file):
+ imgs_lists = []
+ if img_file is None or not os.path.exists(img_file):
+ raise Exception("not found any img file in {}".format(img_file))
+
+ img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'pdf'}
+ if os.path.isfile(img_file) and _check_image_file(img_file):
+ imgs_lists.append(img_file)
+ elif os.path.isdir(img_file):
+ for single_file in os.listdir(img_file):
+ file_path = os.path.join(img_file, single_file)
+ if os.path.isfile(file_path) and _check_image_file(file_path):
+ imgs_lists.append(file_path)
+ if len(imgs_lists) == 0:
+ raise Exception("not found any img file in {}".format(img_file))
+ imgs_lists = sorted(imgs_lists)
+ return imgs_lists
+
+
+def setup_logger(log_file_path: str=None):
+ import logging
+ logging._warn_preinit_stderr = 0
+ logger = logging.getLogger('DBNet.paddle')
+ formatter = logging.Formatter(
+ '%(asctime)s %(name)s %(levelname)s: %(message)s')
+ ch = logging.StreamHandler()
+ ch.setFormatter(formatter)
+ logger.addHandler(ch)
+ if log_file_path is not None:
+ file_handle = logging.FileHandler(log_file_path)
+ file_handle.setFormatter(formatter)
+ logger.addHandler(file_handle)
+ logger.setLevel(logging.DEBUG)
+ return logger
+
+
+# --exeTime
+def exe_time(func):
+ def newFunc(*args, **args2):
+ t0 = time.time()
+ back = func(*args, **args2)
+ print("{} cost {:.3f}s".format(func.__name__, time.time() - t0))
+ return back
+
+ return newFunc
+
+
+def load(file_path: str):
+ file_path = pathlib.Path(file_path)
+ func_dict = {'.txt': _load_txt, '.json': _load_json, '.list': _load_txt}
+ assert file_path.suffix in func_dict
+ return func_dict[file_path.suffix](file_path)
+
+
+def _load_txt(file_path: str):
+ with open(file_path, 'r', encoding='utf8') as f:
+ content = [
+ x.strip().strip('\ufeff').strip('\xef\xbb\xbf')
+ for x in f.readlines()
+ ]
+ return content
+
+
+def _load_json(file_path: str):
+ with open(file_path, 'r', encoding='utf8') as f:
+ content = json.load(f)
+ return content
+
+
+def save(data, file_path):
+ file_path = pathlib.Path(file_path)
+ func_dict = {'.txt': _save_txt, '.json': _save_json}
+ assert file_path.suffix in func_dict
+ return func_dict[file_path.suffix](data, file_path)
+
+
+def _save_txt(data, file_path):
+ """
+ 将一个list的数组写入txt文件里
+ :param data:
+ :param file_path:
+ :return:
+ """
+ if not isinstance(data, list):
+ data = [data]
+ with open(file_path, mode='w', encoding='utf8') as f:
+ f.write('\n'.join(data))
+
+
+def _save_json(data, file_path):
+ with open(file_path, 'w', encoding='utf-8') as json_file:
+ json.dump(data, json_file, ensure_ascii=False, indent=4)
+
+
+def show_img(imgs: np.ndarray, title='img'):
+ color = (len(imgs.shape) == 3 and imgs.shape[-1] == 3)
+ imgs = np.expand_dims(imgs, axis=0)
+ for i, img in enumerate(imgs):
+ plt.figure()
+ plt.title('{}_{}'.format(title, i))
+ plt.imshow(img, cmap=None if color else 'gray')
+ plt.show()
+
+
+def draw_bbox(img_path, result, color=(255, 0, 0), thickness=2):
+ if isinstance(img_path, str):
+ img_path = cv2.imread(img_path)
+ # img_path = cv2.cvtColor(img_path, cv2.COLOR_BGR2RGB)
+ img_path = img_path.copy()
+ for point in result:
+ point = point.astype(int)
+ cv2.polylines(img_path, [point], True, color, thickness)
+ return img_path
+
+
+def cal_text_score(texts,
+ gt_texts,
+ training_masks,
+ running_metric_text,
+ thred=0.5):
+ training_masks = training_masks.numpy()
+ pred_text = texts.numpy() * training_masks
+ pred_text[pred_text <= thred] = 0
+ pred_text[pred_text > thred] = 1
+ pred_text = pred_text.astype(np.int32)
+ gt_text = gt_texts.numpy() * training_masks
+ gt_text = gt_text.astype(np.int32)
+ running_metric_text.update(gt_text, pred_text)
+ score_text, _ = running_metric_text.get_scores()
+ return score_text
+
+
+def order_points_clockwise(pts):
+ rect = np.zeros((4, 2), dtype="float32")
+ s = pts.sum(axis=1)
+ rect[0] = pts[np.argmin(s)]
+ rect[2] = pts[np.argmax(s)]
+ diff = np.diff(pts, axis=1)
+ rect[1] = pts[np.argmin(diff)]
+ rect[3] = pts[np.argmax(diff)]
+ return rect
+
+
+def order_points_clockwise_list(pts):
+ pts = pts.tolist()
+ pts.sort(key=lambda x: (x[1], x[0]))
+ pts[:2] = sorted(pts[:2], key=lambda x: x[0])
+ pts[2:] = sorted(pts[2:], key=lambda x: -x[0])
+ pts = np.array(pts)
+ return pts
+
+
+def get_datalist(train_data_path):
+ """
+ 获取训练和验证的数据list
+ :param train_data_path: 训练的dataset文件列表,每个文件内以如下格式存储 ‘path/to/img\tlabel’
+ :return:
+ """
+ train_data = []
+ for p in train_data_path:
+ with open(p, 'r', encoding='utf-8') as f:
+ for line in f.readlines():
+ line = line.strip('\n').replace('.jpg ', '.jpg\t').split('\t')
+ if len(line) > 1:
+ img_path = pathlib.Path(line[0].strip(' '))
+ label_path = pathlib.Path(line[1].strip(' '))
+ if img_path.exists() and img_path.stat(
+ ).st_size > 0 and label_path.exists() and label_path.stat(
+ ).st_size > 0:
+ train_data.append((str(img_path), str(label_path)))
+ return train_data
+
+
+def save_result(result_path, box_list, score_list, is_output_polygon):
+ if is_output_polygon:
+ with open(result_path, 'wt') as res:
+ for i, box in enumerate(box_list):
+ box = box.reshape(-1).tolist()
+ result = ",".join([str(int(x)) for x in box])
+ score = score_list[i]
+ res.write(result + ',' + str(score) + "\n")
+ else:
+ with open(result_path, 'wt') as res:
+ for i, box in enumerate(box_list):
+ score = score_list[i]
+ box = box.reshape(-1).tolist()
+ result = ",".join([str(int(x)) for x in box])
+ res.write(result + ',' + str(score) + "\n")
+
+
+def expand_polygon(polygon):
+ """
+ 对只有一个字符的框进行扩充
+ """
+ (x, y), (w, h), angle = cv2.minAreaRect(np.float32(polygon))
+ if angle < -45:
+ w, h = h, w
+ angle += 90
+ new_w = w + h
+ box = ((x, y), (new_w, h), angle)
+ points = cv2.boxPoints(box)
+ return order_points_clockwise(points)
+
+
+def _merge_dict(config, merge_dct):
+ """ Recursive dict merge. Inspired by :meth:``dict.update()``, instead of
+ updating only top-level keys, dict_merge recurses down into dicts nested
+ to an arbitrary depth, updating keys. The ``merge_dct`` is merged into
+ ``dct``.
+ Args:
+ config: dict onto which the merge is executed
+ merge_dct: dct merged into config
+ Returns: dct
+ """
+ for key, value in merge_dct.items():
+ sub_keys = key.split('.')
+ key = sub_keys[0]
+ if key in config and len(sub_keys) > 1:
+ _merge_dict(config[key], {'.'.join(sub_keys[1:]): value})
+ elif key in config and isinstance(config[key], dict) and isinstance(
+ value, Mapping):
+ _merge_dict(config[key], value)
+ else:
+ config[key] = value
+ return config
+
+
+def print_dict(cfg, print_func=print, delimiter=0):
+ """
+ Recursively visualize a dict and
+ indenting acrrording by the relationship of keys.
+ """
+ for k, v in sorted(cfg.items()):
+ if isinstance(v, dict):
+ print_func("{}{} : ".format(delimiter * " ", str(k)))
+ print_dict(v, print_func, delimiter + 4)
+ elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict):
+ print_func("{}{} : ".format(delimiter * " ", str(k)))
+ for value in v:
+ print_dict(value, print_func, delimiter + 4)
+ else:
+ print_func("{}{} : {}".format(delimiter * " ", k, v))
+
+
+class Config(object):
+ def __init__(self, config_path, BASE_KEY='base'):
+ self.BASE_KEY = BASE_KEY
+ self.cfg = self._load_config_with_base(config_path)
+
+ def _load_config_with_base(self, file_path):
+ """
+ Load config from file.
+ Args:
+ file_path (str): Path of the config file to be loaded.
+ Returns: global config
+ """
+ _, ext = os.path.splitext(file_path)
+ assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+
+ with open(file_path) as f:
+ file_cfg = yaml.load(f, Loader=yaml.Loader)
+
+ # NOTE: cfgs outside have higher priority than cfgs in _BASE_
+ if self.BASE_KEY in file_cfg:
+ all_base_cfg = dict()
+ base_ymls = list(file_cfg[self.BASE_KEY])
+ for base_yml in base_ymls:
+ with open(base_yml) as f:
+ base_cfg = self._load_config_with_base(base_yml)
+ all_base_cfg = _merge_dict(all_base_cfg, base_cfg)
+
+ del file_cfg[self.BASE_KEY]
+ file_cfg = _merge_dict(all_base_cfg, file_cfg)
+ file_cfg['filename'] = os.path.splitext(os.path.split(file_path)[-1])[0]
+ return file_cfg
+
+ def merge_dict(self, args):
+ self.cfg = _merge_dict(self.cfg, args)
+
+ def print_cfg(self, print_func=print):
+ """
+ Recursively visualize a dict and
+ indenting acrrording by the relationship of keys.
+ """
+ print_func('----------- Config -----------')
+ print_dict(self.cfg, print_func)
+ print_func('---------------------------------------------')
+
+ def save(self, p):
+ with open(p, 'w') as f:
+ yaml.dump(
+ dict(self.cfg), f, default_flow_style=False, sort_keys=False)
+
+
+class ArgsParser(ArgumentParser):
+ def __init__(self):
+ super(ArgsParser, self).__init__(
+ formatter_class=RawDescriptionHelpFormatter)
+ self.add_argument(
+ "-c", "--config_file", help="configuration file to use")
+ self.add_argument(
+ "-o", "--opt", nargs='*', help="set configuration options")
+ self.add_argument(
+ '-p',
+ '--profiler_options',
+ type=str,
+ default=None,
+ help='The option of profiler, which should be in format ' \
+ '\"key1=value1;key2=value2;key3=value3\".'
+ )
+
+ def parse_args(self, argv=None):
+ args = super(ArgsParser, self).parse_args(argv)
+ assert args.config_file is not None, \
+ "Please specify --config_file=configure_file_path."
+ args.opt = self._parse_opt(args.opt)
+ return args
+
+ def _parse_opt(self, opts):
+ config = {}
+ if not opts:
+ return config
+ for s in opts:
+ s = s.strip()
+ k, v = s.split('=', 1)
+ if '.' not in k:
+ config[k] = yaml.load(v, Loader=yaml.Loader)
+ else:
+ keys = k.split('.')
+ if keys[0] not in config:
+ config[keys[0]] = {}
+ cur = config[keys[0]]
+ for idx, key in enumerate(keys[1:]):
+ if idx == len(keys) - 2:
+ cur[key] = yaml.load(v, Loader=yaml.Loader)
+ else:
+ cur[key] = {}
+ cur = cur[key]
+ return config
+
+
+if __name__ == '__main__':
+ img = np.zeros((1, 3, 640, 640))
+ show_img(img[0][0])
+ plt.show()