Merge pull request #1 from opencv/dev

Benchmark framework impl; add YuNet for face detection, DB for text detection, CRNN for text recognition

Merge pull request #1 from opencv/dev
Benchmark framework impl; add YuNet for face detection, DB for text detection, CRNN for text recognition
3a55237d · Yuantao Feng · GitHub · af1afb38 · bfac311b · 3a55237d
26 changed file
--- a/.gitignore
+++ b/.gitignore
 *.pyc
-
-benchmark/data
-benchmark/data/**
+**/__pycache__
+**/__pycache__/**

 .vscode
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -2,6 +2,38 @@

 A zoo for models tuned for OpenCV DNN with benchmarks on different platforms.

+Guidelines:
+- To clone this repo, please install [git-lfs](https://git-lfs.github.com/), run `git lfs install` and use `git lfs clone https://github.com/opencv/opencv_zoo`.
+- To run benchmark on your hardware settings, please refer to [benchmark/README](./benchmark/README.md).
+
+## Models & Benchmarks
+
+Hardware Setup:
+- `CPU x86_64`: INTEL CPU i7-5930K @ 3.50GHz, 6 cores, 12 threads.
+- `CPU ARM`: Raspberry 4B, BCM2711B0 @ 1.5GHz (Cortex A-72), 4 cores, 4 threads.
+<!--
+- `GPU CUDA`: NVIDIA Jetson Nano B01, 128-core Maxwell, Quad-core ARM A57 @ 1.43 GHz.
+-->
+
+***Important Notes***:
+- The time data that shown on the following tables presents the time elapsed from preprocess (resize is excluded), to a forward pass of a network, and postprocess to get final results.
+- The time data that shown on the following tables is averaged from a 100-time run.
+- View [benchmark/config](./benchmark/config) for more details on benchmarking different models.
+
+<!--
+| Model | Input Size | CPU x86_64 (ms) | CPU ARM (ms) | GPU CUDA (ms) |
+|-------|------------|-----------------|--------------|---------------|
+| [YuNet](./models/face_detection_yunet) | 160x120 | 2.17   | 8.87    | 14.95  |
+| [DB](./models/text_detection_db)       | 640x480 | 148.65 | 2759.88 | 218.25 |
+| [CRNN](./models/text_recognition_crnn) | 100x32  | 23.23  | 235.87  | 195.20 |
+-->
+| Model | Input Size | CPU x86_64 (ms) | CPU ARM (ms) |
+|-------|------------|-----------------|--------------|
+| [YuNet](./models/face_detection_yunet) | 160x120 | 2.17   | 8.87    |
+| [DB](./models/text_detection_db)       | 640x480 | 148.65 | 2759.88 |
+| [CRNN](./models/text_recognition_crnn) | 100x32  | 23.23  | 235.87  |
+
+
 ## License

-OpenCV Zoo is licensed under the [Apache 2.0 license](./LICENCE). Please refer to the licenses of different models for model weights.
\ No newline at end of file
+OpenCV Zoo is licensed under the [Apache 2.0 license](./LICENSE). Please refer to licenses of different models.
--- a/benchmark/README.md
+++ b/benchmark/README.md
+# OpenCV Zoo Benchmark
+
+Benchmarking different models in the zoo.
+
+Data for benchmarking will be downloaded and loaded in [data](./data) based on given config.
+
+Time is measured from data preprocess (resize is excluded), to a forward pass of a network, and postprocess to get final results. The final time data presented is averaged from a 100-time run.
+
+## Preparation
+
+1. Install `python >= 3.6`.
+2. Install dependencies: `pip install -r requirements.txt`.
+
+## Benchmarking
+
+Run the following command to benchmark on a given config:
+
+```shell
+PYTHONPATH=.. python benchmark.py --cfg ./config/face_detection_yunet.yaml
+```
+
+If you are a Windows user and wants to run in CMD/PowerShell, use this command instead:
+```shell
+set PYTHONPATH=..
+python benchmark.py --cfg ./config/face_detection_yunet.yaml
+```
+<!--
+Omit `--cfg` if you want to benchmark all included models:
+```shell
+PYTHONPATH=.. python benchmark.py
+```
+-->
\ No newline at end of file
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
+import os
+import argparse
+
+import yaml
+import tqdm
+import numpy as np
+import cv2 as cv
+
+from models import MODELS
+from download import Downloader
+
+parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
+parser.add_argument('--cfg', '-c', type=str,
+                    help='Benchmarking on the given config.')
+args = parser.parse_args()
+
+class Timer:
+    def __init__(self):
+        self._tm = cv.TickMeter()
+
+        self._time_record = []
+        self._average_time = 0
+        self._calls = 0
+
+    def start(self):
+        self._tm.start()
+
+    def stop(self):
+        self._tm.stop()
+        self._calls += 1
+        self._time_record.append(self._tm.getTimeMilli())
+        self._average_time = sum(self._time_record) / self._calls
+        self._tm.reset()
+
+    def reset(self):
+        self._time_record = []
+        self._average_time = 0
+        self._calls = 0
+
+    def getAverageTime(self):
+        return self._average_time
+
+
+class Benchmark:
+    def __init__(self, **kwargs):
+        self._fileList = kwargs.pop('fileList', None)
+        assert self._fileList, 'fileList cannot be empty'
+
+        backend_id = kwargs.pop('backend', 'default')
+        available_backends = dict(
+            default=cv.dnn.DNN_BACKEND_DEFAULT,
+            # halide=cv.dnn.DNN_BACKEND_HALIDE,
+            # inference_engine=cv.dnn.DNN_BACKEND_INFERENCE_ENGINE,
+            opencv=cv.dnn.DNN_BACKEND_OPENCV,
+            # vkcom=cv.dnn.DNN_BACKEND_VKCOM,
+            cuda=cv.dnn.DNN_BACKEND_CUDA
+        )
+        self._backend = available_backends[backend_id]
+
+        target_id = kwargs.pop('target', 'cpu')
+        available_targets = dict(
+            cpu=cv.dnn.DNN_TARGET_CPU,
+            # opencl=cv.dnn.DNN_TARGET_OPENCL,
+            # opencl_fp16=cv.dnn.DNN_TARGET_OPENCL_FP16,
+            # myriad=cv.dnn.DNN_TARGET_MYRIAD,
+            # vulkan=cv.dnn.DNN_TARGET_VULKAN,
+            # fpga=cv.dnn.DNN_TARGET_FPGA,
+            cuda=cv.dnn.DNN_TARGET_CUDA,
+            cuda_fp16=cv.dnn.DNN_TARGET_CUDA_FP16,
+            # hddl=cv.dnn.DNN_TARGET_HDDL
+        )
+        self._target = available_targets[target_id]
+
+        self._sizes = kwargs.pop('sizes', None)
+        self._repeat = kwargs.pop('repeat', 100)
+        self._parentPath = kwargs.pop('parentPath', 'benchmark/data')
+        self._useGroundTruth = kwargs.pop('useDetectionLabel', False) # If it is enable, 'sizes' will not work
+        assert (self._sizes and not self._useGroundTruth) or (not self._sizes and self._useGroundTruth), 'If \'useDetectionLabel\' is True, \'sizes\' should not exist.'
+
+        self._timer = Timer()
+        self._benchmark_results = dict.fromkeys(self._fileList, dict())
+
+        if self._useGroundTruth:
+            self.loadLabel()
+
+    def loadLabel(self):
+        self._labels = dict.fromkeys(self._fileList, None)
+        for imgName in self._fileList:
+            self._labels[imgName] = np.loadtxt(os.path.join(self._parentPath, '{}.txt'.format(imgName[:-4])))
+
+    def run(self, model):
+        model.setBackend(self._backend)
+        model.setTarget(self._target)
+
+        for imgName in self._fileList:
+            img = cv.imread(os.path.join(self._parentPath, imgName))
+            if self._useGroundTruth:
+                for idx, gt in enumerate(self._labels[imgName]):
+                    self._benchmark_results[imgName]['gt{}'.format(idx)] = self._run(
+                        model,
+                        img,
+                        gt,
+                        pbar_msg='  {}, gt{}'.format(imgName, idx)
+                    )
+            else:
+                if self._sizes is None:
+                    h, w, _ = img.shape
+                    model.setInputSize([w, h])
+                    self._benchmark_results[imgName][str([w, h])] = self._run(
+                        model,
+                        img,
+                        pbar_msg='  {}, original size {}'.format(imgName, str([w, h]))
+                    )
+                else:
+                    for size in self._sizes:
+                        imgResized = cv.resize(img, size)
+                        model.setInputSize(size)
+                        self._benchmark_results[imgName][str(size)] = self._run(
+                            model,
+                            imgResized,
+                            pbar_msg='  {}, size {}'.format(imgName, str(size))
+                        )
+
+    def printResults(self):
+        print('  Results:')
+        for imgName, results in self._benchmark_results.items():
+            print('    image: {}'.format(imgName))
+            total_latency = 0
+            for key, latency in results.items():
+                total_latency += latency
+                print('        {}, latency: {:.4f} ms'.format(key, latency))
+            print('        Average latency: {:.4f} ms'.format(total_latency / len(results)))
+
+    def _run(self, model, *args, **kwargs):
+        self._timer.reset()
+        pbar = tqdm.tqdm(range(self._repeat))
+        for _ in pbar:
+            pbar.set_description(kwargs.get('pbar_msg', None))
+
+            self._timer.start()
+            results = model.infer(*args)
+            self._timer.stop()
+        return self._timer.getAverageTime()
+
+
+def build_from_cfg(cfg, registery):
+    obj_name = cfg.pop('name')
+    obj = registery.get(obj_name)
+    return obj(**cfg)
+
+def prepend_pythonpath(cfg, key1, key2):
+    pythonpath = os.environ['PYTHONPATH']
+    if cfg[key1][key2].startswith('/'):
+        return
+    cfg[key1][key2] = os.path.join(pythonpath, cfg[key1][key2])
+
+if __name__ == '__main__':
+    assert args.cfg.endswith('yaml'), 'Currently support configs of yaml format only.'
+    with open(args.cfg, 'r') as f:
+        cfg = yaml.safe_load(f)
+
+    # prepend PYTHONPATH to each path
+    prepend_pythonpath(cfg, key1='Data', key2='parentPath')
+    prepend_pythonpath(cfg, key1='Benchmark', key2='parentPath')
+    prepend_pythonpath(cfg, key1='Model', key2='modelPath')
+
+
+    # Download data if not exist
+    print('Loading data:')
+    downloader = Downloader(**cfg['Data'])
+    downloader.get()
+
+    # Instantiate benchmarking
+    benchmark = Benchmark(**cfg['Benchmark'])
+
+    # Instantiate model
+    model = build_from_cfg(cfg=cfg['Model'], registery=MODELS)
+
+    # Run benchmarking
+    print('Benchmarking {}:'.format(model.name))
+    benchmark.run(model)
+    benchmark.printResults()
\ No newline at end of file
--- a/benchmark/config/face_detection_yunet.yaml
+++ b/benchmark/config/face_detection_yunet.yaml
+Data:
+  name: "Images for Face Detection"
+  url: "https://drive.google.com/u/0/uc?id=1lOAliAIeOv4olM65YDzE55kn6XjiX2l6&export=download"
+  sha: "0ba67a9cfd60f7fdb65cdb7c55a1ce76c1193df1"
+  filename: "face_detection.zip"
+  parentPath: "benchmark/data"
+
+Benchmark:
+  name: "Face Detection Benchmark"
+  parentPath: "benchmark/data/face_detection"
+  fileList:
+    - "group.jpg"
+    - "concerts.jpg"
+    - "dance.jpg"
+  backend: "default"
+  target: "cpu"
+  sizes: # [w, h], Omit to run at original scale
+    - [160, 120]
+    - [640, 480]
+  repeat: 100 # default 100
+
+Model:
+  name: "YuNet"
+  modelPath: "models/face_detection_yunet/face_detection_yunet.onnx"
+  confThreshold: 0.6
+  nmsThreshold: 0.3
+  topK: 5000
+  keepTopK: 750
\ No newline at end of file
--- a/benchmark/config/text_detection_db.yaml
+++ b/benchmark/config/text_detection_db.yaml
+Data:
+  name: "Images for Text Detection"
+  url: "https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download"
+  sha: "a40cf095ceb77159ddd2a5902f3b4329696dd866"
+  filename: "text.zip"
+  parentPath: "benchmark/data"
+
+Benchmark:
+  name: "Text Detection Benchmark"
+  parentPath: "benchmark/data/text"
+  fileList:
+    - "1.jpg"
+    - "2.jpg"
+    - "3.jpg"
+  backend: "default"
+  target: "cpu"
+  sizes: # [w, h], default original scale
+    - [640, 480]
+  repeat: 100
+
+Model:
+  name: "DB"
+  modelPath: "models/text_detection_db/text_detection_db.onnx"
+  binaryThreshold: 0.3
+  polygonThreshold: 0.5
+  maxCandidates: 200
+  unclipRatio: 2.0
\ No newline at end of file
--- a/benchmark/config/text_recognition_crnn.yaml
+++ b/benchmark/config/text_recognition_crnn.yaml
+Data:
+  name: "Images for Text Detection"
+  url: "https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download"
+  sha: "a40cf095ceb77159ddd2a5902f3b4329696dd866"
+  filename: "text.zip"
+  parentPath: "benchmark/data"
+
+Benchmark:
+  name: "Text Recognition Benchmark"
+  parentPath: "benchmark/data/text"
+  fileList:
+    - "1.jpg"
+    - "2.jpg"
+    - "3.jpg"
+  backend: "default"
+  target: "cpu"
+  useDetectionLabel: True
+  repeat: 100
+
+Model:
+  name: "CRNN"
+  modelPath: "models/text_recognition_crnn/text_recognition_crnn.onnx"
\ No newline at end of file
--- a/benchmark/data/.gitignore
+++ b/benchmark/data/.gitignore
+*
+!.gitignore
\ No newline at end of file
--- a/benchmark/download.py
+++ b/benchmark/download.py
+import hashlib
+import os
+import sys
+import tarfile
+import zipfile
+import requests
+import os.path as osp
+
+from urllib.request import urlopen
+from urllib.parse import urlparse
+
+
+class Downloader:
+    MB = 1024*1024
+    BUFSIZE = 10*MB
+
+    def __init__(self, **kwargs):
+        self._name = kwargs.pop('name')
+        self._url = kwargs.pop('url', None)
+        self._filename = kwargs.pop('filename')
+        self._sha = kwargs.pop('sha', None)
+        self._saveTo = kwargs.pop('saveTo', './data')
+        self._extractTo = kwargs.pop('extractTo', './data')
+
+    def __str__(self):
+        return 'Downloader for <{}>'.format(self._name)
+
+    def printRequest(self, r):
+        def getMB(r):
+            d = dict(r.info())
+            for c in ['content-length', 'Content-Length']:
+                if c in d:
+                    return int(d[c]) / self.MB
+            return '<unknown>'
+        print('  {} {} [{} Mb]'.format(r.getcode(), r.msg, getMB(r)))
+
+    def verifyHash(self):
+        if not self._sha:
+            return False
+        sha = hashlib.sha1()
+        try:
+            with open(osp.join(self._saveTo, self._filename), 'rb') as f:
+                while True:
+                    buf = f.read(self.BUFSIZE)
+                    if not buf:
+                        break
+                    sha.update(buf)
+            if self._sha != sha.hexdigest():
+                print('  actual {}'.format(sha.hexdigest()))
+                print('  expect {}'.format(self._sha))
+            return self._sha == sha.hexdigest()
+        except Exception as e:
+            print('  catch {}'.format(e))
+
+    def get(self):
+        if self.verifyHash():
+            print('  hash match - skipping download')
+        else:
+            basedir = os.path.dirname(self._saveTo)
+            if basedir and not os.path.exists(basedir):
+                print('  creating directory: ' + basedir)
+                os.makedirs(basedir, exist_ok=True)
+
+            print('  hash check failed - downloading')
+            if 'drive.google.com' in self._url:
+                urlquery = urlparse(self._url).query.split('&')
+                for q in urlquery:
+                    if 'id=' in q:
+                        gid = q[3:]
+                sz = GDrive(gid)(osp.join(self._saveTo, self._filename))
+                print('  size = %.2f Mb' % (sz / (1024.0 * 1024)))
+            else:
+                print('  get {}'.format(self._url))
+                self.download()
+
+            # Verify hash after download
+            print(' done')
+            print(' file {}'.format(self._filename))
+            if self.verifyHash():
+                print('  hash match - extracting')
+            else:
+                print('  hash check failed - exiting')
+
+        # Extract
+        if '.zip' in self._filename:
+            print('  extracting - ', end='')
+            self.extract()
+            print('done')
+
+        return True
+
+    def download(self):
+        try:
+            r = urlopen(self._url, timeout=60)
+            self.printRequest(r)
+            self.save(r)
+        except Exception as e:
+            print('  catch {}'.format(e))
+
+    def extract(self):
+        fileLocation = os.path.join(self._saveTo, self._filename)
+        try:
+            if self._filename.endswith('.zip'):
+                with zipfile.ZipFile(fileLocation) as f:
+                    for member in f.namelist():
+                        path = osp.join(self._extractTo, member)
+                        if osp.exists(path) or osp.isfile(path):
+                            continue
+                        else:
+                            f.extract(member, self._extractTo)
+        except Exception as e:
+            print(('  catch {}'.format(e)))
+
+    def save(self, r):
+        with open(self._filename, 'wb') as f:
+            print('  progress ', end='')
+            sys.stdout.flush()
+            while True:
+                buf = r.read(self.BUFSIZE)
+                if not buf:
+                    break
+                f.write(buf)
+                print('>', end='')
+                sys.stdout.flush()
+
+
+def GDrive(gid):
+    def download_gdrive(dst):
+        session = requests.Session()  # re-use cookies
+
+        URL = "https://docs.google.com/uc?export=download"
+        response = session.get(URL, params = { 'id' : gid }, stream = True)
+
+        def get_confirm_token(response):  # in case of large files
+            for key, value in response.cookies.items():
+                if key.startswith('download_warning'):
+                    return value
+            return None
+        token = get_confirm_token(response)
+
+        if token:
+            params = { 'id' : gid, 'confirm' : token }
+            response = session.get(URL, params = params, stream = True)
+
+        BUFSIZE = 1024 * 1024
+        PROGRESS_SIZE = 10 * 1024 * 1024
+
+        sz = 0
+        progress_sz = PROGRESS_SIZE
+        with open(dst, "wb") as f:
+            for chunk in response.iter_content(BUFSIZE):
+                if not chunk:
+                    continue  # keep-alive
+
+                f.write(chunk)
+                sz += len(chunk)
+                if sz >= progress_sz:
+                    progress_sz += PROGRESS_SIZE
+                    print('>', end='')
+                    sys.stdout.flush()
+        print('')
+        return sz
+    return download_gdrive
--- a/benchmark/requirements.txt
+++ b/benchmark/requirements.txt
+numpy==1.21.2
+opencv-python==4.5.3.56
+tqdm
+pyyaml
+requests
\ No newline at end of file
--- a/models/__init__.py
+++ b/models/__init__.py
+from .face_detection_yunet.yunet import YuNet
+from .text_detection_db.db import DB
+from .text_recognition_crnn.crnn import CRNN
+
+class Registery:
+    def __init__(self, name):
+        self._name = name
+        self._dict = dict()
+
+    def get(self, key):
+        return self._dict[key]
+
+    def register(self, item):
+        self._dict[item.__name__] = item
+
+MODELS = Registery('Models')
+MODELS.register(YuNet)
+MODELS.register(DB)
+MODELS.register(CRNN)
\ No newline at end of file
--- a/models/face_detection_yunet/LICENSE
+++ b/models/face_detection_yunet/LICENSE
+MIT License
+
+Copyright (c) 2020 Shiqi Yu <shiqi.yu@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
--- a/models/face_detection_yunet/README.md
+++ b/models/face_detection_yunet/README.md
+# YuNet
+
+YuNet is a light-weight, fast and accurate face detection model, which achieves 0.834(AP_easy), 0.824(AP_medium), 0.708(AP_hard) on the WIDER Face validation set.
+
+## Demo
+
+Run the following command to try the demo:
+```shell
+# detect on camera input
+python demo.py
+# detect on an image
+python demo.py --input /path/to/image
+```
+
+## License
+
+All files in this directory are licensed under [MIT License](./LICENSE).
+
+## Reference
+
+- https://github.com/ShiqiYu/libfacedetection
+- https://github.com/ShiqiYu/libfacedetection.train
--- a/models/face_detection_yunet/demo.py
+++ b/models/face_detection_yunet/demo.py
+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+
+import argparse
+
+import numpy as np
+import cv2 as cv
+
+from yunet import YuNet
+
+def str2bool(v):
+    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
+        return True
+    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
+        return False
+    else:
+        raise NotImplementedError
+
+parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
+parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='face_detection_yunet.onnx', help='Path to the model.')
+parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
+parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
+parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
+parser.add_argument('--keep_top_k', type=int, default=750, help='Keep keep_top_k bounding boxes after NMS.')
+parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
+args = parser.parse_args()
+
+def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), fps=None):
+    output = image.copy()
+    landmark_color = [
+        (255,   0,   0), # right eye
+        (  0,   0, 255), # left eye
+        (  0, 255,   0), # nose tip
+        (255,   0, 255), # right mouth corner
+        (  0, 255, 255)  # left mouth corner
+    ]
+
+    if fps is not None:
+        cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
+
+    for det in results:
+        bbox = det[0:4].astype(np.int32)
+        cv.rectangle(output, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), box_color, 2)
+
+        conf = det[-1]
+        cv.putText(output, '{:.4f}'.format(conf), (bbox[0], bbox[1]+12), cv.FONT_HERSHEY_DUPLEX, 0.5, text_color)
+
+        landmarks = det[4:14].astype(np.int32).reshape((5,2))
+        for idx, landmark in enumerate(landmarks):
+            cv.circle(output, landmark, 2, landmark_color[idx], 2)
+
+    return output
+
+if __name__ == '__main__':
+    # Instantiate YuNet
+    model = YuNet(modelPath=args.model,
+                  inputSize=[320, 320],
+                  confThreshold=args.conf_threshold,
+                  nmsThreshold=args.nms_threshold,
+                  topK=args.top_k,
+                  keepTopK=args.keep_top_k)
+
+    # If input is an image
+    if args.input is not None:
+        image = cv.imread(args.input)
+        h, w, _ = image.shape
+
+        # Inference
+        model.setInputSize([w, h])
+        results = model.infer(image)
+
+        # Print results
+        print('{} faces detected.'.format(results.shape[0]))
+        for idx, det in enumerate(results):
+            print('{}: [{:.0f}, {:.0f}] [{:.0f}, {:.0f}], {:.2f}'.format(
+                idx, det[0], det[1], det[2], det[3], det[-1])
+            )
+
+        # Draw results on the input image
+        image = visualize(image, results)
+
+        # Save results if save is true
+        if args.save:
+            print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', image)
+
+        # Visualize results in a new window
+        if args.vis:
+            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, image)
+            cv.waitKey(0)
+    else: # Omit input to call default camera
+        deviceId = 0
+        cap = cv.VideoCapture(deviceId)
+        w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
+        h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
+        model.setInputSize([w, h])
+
+        tm = cv.TickMeter()
+        while cv.waitKey(1) < 0:
+            hasFrame, frame = cap.read()
+            if not hasFrame:
+                print('No frames grabbed!')
+                break
+
+            # Inference
+            tm.start()
+            results = model.infer(frame) # results is a tuple
+            tm.stop()
+
+            # Draw results on the input image
+            frame = visualize(frame, results, fps=tm.getFPS())
+
+            # Visualize results in a new Window
+            cv.imshow('YuNet Demo', frame)
+
+            tm.reset()
\ No newline at end of file
--- a/models/face_detection_yunet/face_detection_yunet.onnx
+++ b/models/face_detection_yunet/face_detection_yunet.onnx
--- a/models/face_detection_yunet/yunet.py
+++ b/models/face_detection_yunet/yunet.py
+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+
+from itertools import product
+
+import numpy as np
+import cv2 as cv
+
+class YuNet:
+    def __init__(self, modelPath, inputSize=[320, 320], confThreshold=0.6, nmsThreshold=0.3, topK=5000, keepTopK=750):
+        self._modelPath = modelPath
+        self._model = cv.dnn.readNet(self._modelPath)
+
+        self._inputNames = ''
+        self._outputNames = ['loc', 'conf', 'iou']
+        self._inputSize = inputSize # [w, h]
+        self._confThreshold = confThreshold
+        self._nmsThreshold = nmsThreshold
+        self._topK = topK
+        self._keepTopK = keepTopK
+
+        self._min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
+        self._steps = [8, 16, 32, 64]
+        self._variance = [0.1, 0.2]
+
+        # Generate priors
+        self._priorGen()
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def setBackend(self, backend):
+        self._model.setPreferableBackend(backend)
+
+    def setTarget(self, target):
+        self._model.setPreferableTarget(target)
+
+    def setInputSize(self, input_size):
+        self._inputSize = input_size # [w, h]
+
+        # Regenerate priors
+        self._priorGen()
+
+    def _preprocess(self, image):
+        return cv.dnn.blobFromImage(image)
+
+    def infer(self, image):
+        assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
+        assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
+
+        # Preprocess
+        inputBlob = self._preprocess(image)
+
+        # Forward
+        self._model.setInput(inputBlob, self._inputNames)
+        outputBlob = self._model.forward(self._outputNames)
+
+        # Postprocess
+        results = self._postprocess(outputBlob)
+
+        return results
+
+    def _postprocess(self, outputBlob):
+        # Decode
+        dets = self._decode(outputBlob)
+
+        # NMS
+        keepIdx = cv.dnn.NMSBoxes(
+            bboxes=dets[:, 0:4].tolist(),
+            scores=dets[:, -1].tolist(),
+            score_threshold=self._confThreshold,
+            nms_threshold=self._nmsThreshold,
+            top_k=self._topK
+        ) # box_num x class_num
+        if len(keepIdx) > 0:
+            dets = dets[keepIdx]
+            dets = np.squeeze(dets, axis=1)
+            return dets[:self._keepTopK]
+        else:
+            return np.empty(shape=(0, 15))
+
+    def _priorGen(self):
+        w, h = self._inputSize
+        feature_map_2th = [int(int((h + 1) / 2) / 2),
+                           int(int((w + 1) / 2) / 2)]
+        feature_map_3th = [int(feature_map_2th[0] / 2),
+                           int(feature_map_2th[1] / 2)]
+        feature_map_4th = [int(feature_map_3th[0] / 2),
+                           int(feature_map_3th[1] / 2)]
+        feature_map_5th = [int(feature_map_4th[0] / 2),
+                           int(feature_map_4th[1] / 2)]
+        feature_map_6th = [int(feature_map_5th[0] / 2),
+                           int(feature_map_5th[1] / 2)]
+
+        feature_maps = [feature_map_3th, feature_map_4th,
+                        feature_map_5th, feature_map_6th]
+
+        priors = []
+        for k, f in enumerate(feature_maps):
+            min_sizes = self._min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])): # i->h, j->w
+                for min_size in min_sizes:
+                    s_kx = min_size / w
+                    s_ky = min_size / h
+
+                    cx = (j + 0.5) * self._steps[k] / w
+                    cy = (i + 0.5) * self._steps[k] / h
+
+                    priors.append([cx, cy, s_kx, s_ky])
+        self.priors = np.array(priors, dtype=np.float32)
+
+    def _decode(self, outputBlob):
+        loc, conf, iou = outputBlob
+        # get score
+        cls_scores = conf[:, 1]
+        iou_scores = iou[:, 0]
+        # clamp
+        _idx = np.where(iou_scores < 0.)
+        iou_scores[_idx] = 0.
+        _idx = np.where(iou_scores > 1.)
+        iou_scores[_idx] = 1.
+        scores = np.sqrt(cls_scores * iou_scores)
+        scores = scores[:, np.newaxis]
+
+        scale = np.array(self._inputSize)
+
+        # get bboxes
+        bboxes = np.hstack((
+            (self.priors[:, 0:2] + loc[:, 0:2] * self._variance[0] * self.priors[:, 2:4]) * scale,
+            (self.priors[:, 2:4] * np.exp(loc[:, 2:4] * self._variance)) * scale
+        ))
+        # (x_c, y_c, w, h) -> (x1, y1, w, h)
+        bboxes[:, 0:2] -= bboxes[:, 2:4] / 2
+
+        # get landmarks
+        landmarks = np.hstack((
+            (self.priors[:, 0:2] + loc[:,  4: 6] * self._variance[0] * self.priors[:, 2:4]) * scale,
+            (self.priors[:, 0:2] + loc[:,  6: 8] * self._variance[0] * self.priors[:, 2:4]) * scale,
+            (self.priors[:, 0:2] + loc[:,  8:10] * self._variance[0] * self.priors[:, 2:4]) * scale,
+            (self.priors[:, 0:2] + loc[:, 10:12] * self._variance[0] * self.priors[:, 2:4]) * scale,
+            (self.priors[:, 0:2] + loc[:, 12:14] * self._variance[0] * self.priors[:, 2:4]) * scale
+        ))
+
+        dets = np.hstack((bboxes, landmarks, scores))
+        return dets
\ No newline at end of file
--- a/models/text_detection_db/LICENSE
+++ b/models/text_detection_db/LICENSE
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/models/text_detection_db/README.md
+++ b/models/text_detection_db/README.md
+# DB
+
+Real-time Scene Text Detection with Differentiable Binarization
+
+`text_detection_db.onnx` is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances. It is obtained from [here](https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html) and renamed from `DB_TD500_resnet18.onnx`.
+
+## Demo
+
+Run the following command to try the demo:
+```shell
+# detect on camera input
+python demo.py
+# detect on an image
+python demo.py --input /path/to/image
+```
+
+## License
+
+All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
+
+## Reference
+
+- https://arxiv.org/abs/1911.08947
+- https://github.com/MhLiao/DB
+- https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html
\ No newline at end of file
--- a/models/text_detection_db/db.py
+++ b/models/text_detection_db/db.py
+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+
+import numpy as np
+import cv2 as cv
+
+class DB:
+    def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0):
+        self._modelPath = modelPath
+        self._model = cv.dnn_TextDetectionModel_DB(
+            cv.dnn.readNet(self._modelPath)
+        )
+
+        self._inputSize = tuple(inputSize) # (w, h)
+        self._inputHeight = inputSize[0]
+        self._inputWidth = inputSize[1]
+        self._binaryThreshold = binaryThreshold
+        self._polygonThreshold = polygonThreshold
+        self._maxCandidates = maxCandidates
+        self._unclipRatio = unclipRatio
+
+        self._model.setBinaryThreshold(self._binaryThreshold)
+        self._model.setPolygonThreshold(self._polygonThreshold)
+        self._model.setUnclipRatio(self._unclipRatio)
+        self._model.setMaxCandidates(self._maxCandidates)
+
+        self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def setBackend(self, backend):
+        self._model.setPreferableBackend(backend)
+
+    def setTarget(self, target):
+        self._model.setPreferableTarget(target)
+
+    def setInputSize(self, input_size):
+        self._inputSize = tuple(input_size)
+        self._model.setInputParams(1.0/255.0, self._inputSize, (122.67891434, 116.66876762, 104.00698793))
+
+    def infer(self, image):
+        assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
+        assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
+
+        return self._model.detect(image)
\ No newline at end of file
--- a/models/text_detection_db/demo.py
+++ b/models/text_detection_db/demo.py
+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+
+import argparse
+
+import numpy as np
+import cv2 as cv
+
+from db import DB
+
+def str2bool(v):
+    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
+        return True
+    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
+        return False
+    else:
+        raise NotImplementedError
+
+parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
+parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='text_detection_db.onnx', help='Path to the model.')
+parser.add_argument('--width', type=int, default=736,
+                    help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
+parser.add_argument('--height', type=int, default=736,
+                    help='Preprocess input image by resizing to a specific height. It should be multiple by 32.')
+parser.add_argument('--binary_threshold', type=float, default=0.3, help='Threshold of the binary map.')
+parser.add_argument('--polygon_threshold', type=float, default=0.5, help='Threshold of polygons.')
+parser.add_argument('--max_candidates', type=int, default=200, help='Max candidates of polygons.')
+parser.add_argument('--unclip_ratio', type=np.float64, default=2.0, help=' The unclip ratio of the detected text region, which determines the output size.')
+parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
+args = parser.parse_args()
+
+def visualize(image, results, box_color=(0, 255, 0), text_color=(0, 0, 255), isClosed=True, thickness=2, fps=None):
+    output = image.copy()
+
+    if fps is not None:
+        cv.putText(output, 'FPS: {:.2f}'.format(fps), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
+
+    pts = np.array(results[0])
+    output = cv.polylines(output, pts, isClosed, box_color, thickness)
+
+    return output
+
+if __name__ == '__main__':
+    # Instantiate DB
+    model = DB(modelPath=args.model,
+               inputSize=[args.width, args.height],
+               binaryThreshold=args.binary_threshold,
+               polygonThreshold=args.polygon_threshold,
+               maxCandidates=args.max_candidates,
+               unclipRatio=args.unclip_ratio
+    )
+
+    # If input is an image
+    if args.input is not None:
+        image = cv.imread(args.input)
+        image = cv.resize(image, [args.width, args.height])
+
+        # Inference
+        results = model.infer(image)
+
+        # Print results
+        print('{} texts detected.'.format(len(results[0])))
+        for idx, (bbox, score) in enumerate(zip(results[0], results[1])):
+            print('{}: {} {} {} {}, {:.2f}'.format(idx, bbox[0], bbox[1], bbox[2], bbox[3], score[0]))
+
+        # Draw results on the input image
+        image = visualize(image, results)
+
+        # Save results if save is true
+        if args.save:
+            print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', image)
+
+        # Visualize results in a new window
+        if args.vis:
+            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, image)
+            cv.waitKey(0)
+    else: # Omit input to call default camera
+        deviceId = 0
+        cap = cv.VideoCapture(deviceId)
+
+        tm = cv.TickMeter()
+        while cv.waitKey(1) < 0:
+            hasFrame, frame = cap.read()
+            if not hasFrame:
+                print('No frames grabbed!')
+                break
+
+            frame = cv.resize(frame, [args.width, args.height])
+            # Inference
+            tm.start()
+            results = model.infer(frame) # results is a tuple
+            tm.stop()
+
+            # Draw results on the input image
+            frame = visualize(frame, results, fps=tm.getFPS())
+
+            # Visualize results in a new Window
+            cv.imshow('{} Demo'.format(model.name), frame)
+
+            tm.reset()
\ No newline at end of file
--- a/models/text_detection_db/text_detection_db.onnx
+++ b/models/text_detection_db/text_detection_db.onnx
--- a/models/text_recognition_crnn/LICENSE
+++ b/models/text_recognition_crnn/LICENSE
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/models/text_recognition_crnn/README.md
+++ b/models/text_recognition_crnn/README.md
+# CRNN
+
+An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition
+
+`text_recognition_crnn.onnx` is trained using the code from https://github.com/zihaomu/deep-text-recognition-benchmark, which can only recognize english words. It is obtained from https://drive.google.com/drive/folders/1cTbQ3nuZG-EKWak6emD_s8_hHXWz7lAr and renamed from `CRNN_VGG_BiLSTM_CTC.onnx`. Visit https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html for more information.
+
+## Demo
+
+***NOTE**: This demo use [text_detection_db](../text_detection_db) as text detector.
+
+Run the following command to try the demo:
+```shell
+# detect on camera input
+python demo.py
+# detect on an image
+python demo.py --input /path/to/image
+```
+
+## License
+
+All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
+
+## Reference
+
+- https://arxiv.org/abs/1507.05717
+- https://github.com/bgshih/crnn
+- https://github.com/meijieru/crnn.pytorch
+- https://github.com/zihaomu/deep-text-recognition-benchmark
+- https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html
\ No newline at end of file
--- a/models/text_recognition_crnn/crnn.py
+++ b/models/text_recognition_crnn/crnn.py
+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+
+import numpy as np
+import cv2 as cv
+
+class CRNN:
+    def __init__(self, modelPath):
+        self._model = cv.dnn.readNet(modelPath)
+        self._inputSize = [100, 32] # Fixed
+        self._targetVertices = np.array([
+            [0, self._inputSize[1] - 1],
+            [0, 0],
+            [self._inputSize[0] - 1, 0],
+            [self._inputSize[0] - 1, self._inputSize[1] - 1]
+        ], dtype=np.float32)
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def setBackend(self, backend_id):
+        self._model.setPreferableBackend(backend_id)
+
+    def setTarget(self, target_id):
+        self._model.setPreferableTarget(target_id)
+
+    def _preprocess(self, image, rbbox):
+        # Remove conf, reshape and ensure all is np.float32
+        vertices = rbbox.reshape((4, 2)).astype(np.float32)
+
+        rotationMatrix = cv.getPerspectiveTransform(vertices, self._targetVertices)
+        cropped = cv.warpPerspective(image, rotationMatrix, self._inputSize)
+
+        cropped = cv.cvtColor(cropped, cv.COLOR_BGR2GRAY)
+
+        return cv.dnn.blobFromImage(cropped, size=self._inputSize, mean=127.5, scalefactor=1 / 127.5)
+
+    def infer(self, image, rbbox):
+        # Preprocess
+        inputBlob = self._preprocess(image, rbbox)
+
+        # Forward
+        self._model.setInput(inputBlob)
+        outputBlob = self._model.forward()
+
+        # Postprocess
+        results = self._postprocess(outputBlob)
+
+        return results
+
+    def _postprocess(self, outputBlob):
+        '''Decode charaters from outputBlob
+        '''
+        text = ""
+        alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
+        for i in range(outputBlob.shape[0]):
+            c = np.argmax(outputBlob[i][0])
+            if c != 0:
+                text += alphabet[c - 1]
+            else:
+                text += '-'
+
+        # adjacent same letters as well as background text must be removed to get the final output
+        char_list = []
+        for i in range(len(text)):
+            if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
+                char_list.append(text[i])
+        return ''.join(char_list)
\ No newline at end of file
--- a/models/text_recognition_crnn/demo.py
+++ b/models/text_recognition_crnn/demo.py
+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+
+import sys
+import argparse
+
+import numpy as np
+import cv2 as cv
+
+from crnn import CRNN
+
+sys.path.append('../text_detection_db')
+from db import DB
+
+def str2bool(v):
+    if v.lower() in ['on', 'yes', 'true', 'y', 't']:
+        return True
+    elif v.lower() in ['off', 'no', 'false', 'n', 'f']:
+        return False
+    else:
+        raise NotImplementedError
+
+parser = argparse.ArgumentParser(
+    description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
+parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
+parser.add_argument('--model', '-m', type=str, default='text_recognition_crnn.onnx', help='Path to the model.')
+parser.add_argument('--width', type=int, default=736,
+                    help='The width of input image being sent to the text detector.')
+parser.add_argument('--height', type=int, default=736,
+                    help='The height of input image being sent to the text detector.')
+parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
+parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
+args = parser.parse_args()
+
+def visualize(image, boxes, texts, color=(0, 255, 0), isClosed=True, thickness=2):
+    output = image.copy()
+
+    pts = np.array(boxes[0])
+    output = cv.polylines(output, pts, isClosed, color, thickness)
+    for box, text in zip(boxes[0], texts):
+        cv.putText(output, text, (box[1].astype(np.int32)), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
+    return output
+
+if __name__ == '__main__':
+    # Instantiate CRNN for text recognition
+    recognizer = CRNN(modelPath=args.model)
+    # Instantiate DB for text detection
+    detector = DB(modelPath='../text_detection_db/text_detection_db.onnx',
+                  inputSize=[args.width, args.height],
+                  binaryThreshold=0.3,
+                  polygonThreshold=0.5,
+                  maxCandidates=200,
+                  unclipRatio=2.0
+    )
+
+    # If input is an image
+    if args.input is not None:
+        image = cv.imread(args.input)
+        image = cv.resize(image, [args.width, args.height])
+
+        # Inference
+        results = detector.infer(image)
+        texts = []
+        for box, score in zip(results[0], results[1]):
+            texts.append(
+                recognizer.infer(image, box.reshape(8))
+            )
+
+        # Draw results on the input image
+        image = visualize(image, results, texts)
+
+        # Save results if save is true
+        if args.save:
+            print('Resutls saved to result.jpg\n')
+            cv.imwrite('result.jpg', image)
+
+        # Visualize results in a new window
+        if args.vis:
+            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
+            cv.imshow(args.input, image)
+            cv.waitKey(0)
+    else: # Omit input to call default camera
+        deviceId = 0
+        cap = cv.VideoCapture(deviceId)
+
+        tm = cv.TickMeter()
+        while cv.waitKey(1) < 0:
+            hasFrame, frame = cap.read()
+            if not hasFrame:
+                print('No frames grabbed!')
+                break
+
+            frame = cv.resize(frame, [args.width, args.height])
+            # Inference of text detector
+            tm.start()
+            results = detector.infer(frame)
+            tm.stop()
+            latency_detector = tm.getFPS()
+            tm.reset()
+            # Inference of text recognizer
+            texts = []
+            tm.start()
+            for box, score in zip(results[0], results[1]):
+                result = np.hstack(
+                    (box.reshape(8), score)
+                )
+                texts.append(
+                    recognizer.infer(frame, result)
+                )
+            tm.stop()
+            latency_recognizer = tm.getFPS()
+            tm.reset()
+
+            # Draw results on the input image
+            frame = visualize(frame, results, texts)
+
+            cv.putText(frame, 'Latency - {}: {}'.format(detector.name, latency_detector), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
+            cv.putText(frame, 'Latency - {}: {}'.format(recognizer.name, latency_recognizer), (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
+
+            # Visualize results in a new Window
+            cv.imshow('{} Demo'.format(recognizer.name), frame)
\ No newline at end of file
--- a/models/text_recognition_crnn/text_recognition_crnn.onnx
+++ b/models/text_recognition_crnn/text_recognition_crnn.onnx