diff --git a/README.md b/README.md
index 1517734c847dd580ad86b17ddf6b093157953d06..64cd2585b420597294d9d1d9152164a26d853709 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,8 @@ Mobile DEMO experience (based on EasyEdge and Paddle-Lite, supports iOS and Andr
-## PP-OCR 2.0 series model list(Update on Sep 17)
+
+## PP-OCR 2.0 series model list(Update on Dec 15)
| Model introduction | Model name | Recommended scene | Detection model | Direction classifier | Recognition model |
| ------------------------------------------------------------ | ---------------------------- | ----------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
diff --git a/deploy/hubserving/ocr_cls/params.py b/deploy/hubserving/ocr_cls/params.py
old mode 100644
new mode 100755
index bcdb2d6e3800c0ba7897b71f0b0999cafdc223af..72a7a10249176d86f75b5d3c3adae7f1021a75a8
--- a/deploy/hubserving/ocr_cls/params.py
+++ b/deploy/hubserving/ocr_cls/params.py
@@ -12,7 +12,7 @@ def read_params():
cfg = Config()
#params for text classifier
- cfg.cls_model_dir = "./inference/ch_ppocr_mobile_v1.1_cls_infer/"
+ cfg.cls_model_dir = "./inference/ch_ppocr_mobile_v2.0_cls_infer/"
cfg.cls_image_shape = "3, 48, 192"
cfg.label_list = ['0', '180']
cfg.cls_batch_num = 30
diff --git a/deploy/hubserving/ocr_det/params.py b/deploy/hubserving/ocr_det/params.py
old mode 100644
new mode 100755
index 4d4a9fc27b727034d8185c82dad3e542659fd463..e50decbbc8ee604863c5965aa95bf1f79fa71d0a
--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
@@ -13,7 +13,7 @@ def read_params():
#params for text detector
cfg.det_algorithm = "DB"
- cfg.det_model_dir = "./inference/ch_ppocr_mobile_v1.1_det_infer/"
+ cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/"
cfg.det_limit_side_len = 960
cfg.det_limit_type = 'max'
@@ -27,16 +27,6 @@ def read_params():
# cfg.det_east_cover_thresh = 0.1
# cfg.det_east_nms_thresh = 0.2
- # #params for text recognizer
- # cfg.rec_algorithm = "CRNN"
- # cfg.rec_model_dir = "./inference/ch_det_mv3_crnn/"
-
- # cfg.rec_image_shape = "3, 32, 320"
- # cfg.rec_char_type = 'ch'
- # cfg.rec_batch_num = 30
- # cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
- # cfg.use_space_char = True
-
cfg.use_zero_copy_run = False
cfg.use_pdserving = False
diff --git a/deploy/hubserving/ocr_system/params.py b/deploy/hubserving/ocr_system/params.py
old mode 100644
new mode 100755
index 1f6a07bcc0167e90564edab9c4719b9192233b4c..a0e1960b2857630780f6b34773d7760279f862a2
--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
@@ -13,7 +13,7 @@ def read_params():
#params for text detector
cfg.det_algorithm = "DB"
- cfg.det_model_dir = "./inference/ch_ppocr_mobile_v1.1_det_infer/"
+ cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/"
cfg.det_limit_side_len = 960
cfg.det_limit_type = 'max'
@@ -29,7 +29,7 @@ def read_params():
#params for text recognizer
cfg.rec_algorithm = "CRNN"
- cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v1.1_rec_infer/"
+ cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/"
cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch'
@@ -41,7 +41,7 @@ def read_params():
#params for text classifier
cfg.use_angle_cls = True
- cfg.cls_model_dir = "./inference/ch_ppocr_mobile_v1.1_cls_infer/"
+ cfg.cls_model_dir = "./inference/ch_ppocr_mobile_v2.0_cls_infer/"
cfg.cls_image_shape = "3, 48, 192"
cfg.label_list = ['0', '180']
cfg.cls_batch_num = 30
@@ -49,5 +49,6 @@ def read_params():
cfg.use_zero_copy_run = False
cfg.use_pdserving = False
+ cfg.drop_score = 0.5
return cfg
diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md
old mode 100644
new mode 100755
index f64bd372569f12ea52214e3e89927df0c859a17f..d86d47041007f614d48c9b0e5adebc8739029aac
--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
@@ -33,11 +33,11 @@ pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
```
### 2. 下载推理模型
-安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是v1.1版的超轻量模型,默认模型路径为:
+安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是v2.0版的超轻量模型,默认模型路径为:
```
-检测模型:./inference/ch_ppocr_mobile_v1.1_det_infer/
-识别模型:./inference/ch_ppocr_mobile_v1.1_rec_infer/
-方向分类器:./inference/ch_ppocr_mobile_v1.1_cls_infer/
+检测模型:./inference/ch_ppocr_mobile_v2.0_det_infer/
+识别模型:./inference/ch_ppocr_mobile_v2.0_rec_infer/
+方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/
```
**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的[模型库](../../doc/doc_ch/models_list.md)下载,也可以替换成自己训练转换好的模型。
diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md
old mode 100644
new mode 100755
index c6cf53413bc3eac45f933fead66356d1491cc60c..b2ffdf0b7af638281933bfd84f9304d9ec9867cf
--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
@@ -34,11 +34,11 @@ pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
```
### 2. Download inference model
-Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v1.1 is used, and the default model path is:
+Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v2.0 is used, and the default model path is:
```
-detection model: ./inference/ch_ppocr_mobile_v1.1_det_infer/
-recognition model: ./inference/ch_ppocr_mobile_v1.1_rec_infer/
-text direction classifier: ./inference/ch_ppocr_mobile_v1.1_cls_infer/
+detection model: ./inference/ch_ppocr_mobile_v2.0_det_infer/
+recognition model: ./inference/ch_ppocr_mobile_v2.0_rec_infer/
+text direction classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
```
**The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself.
diff --git a/deploy/pdserving/det_local_server.py b/deploy/pdserving/det_local_server.py
deleted file mode 100644
index eb7948daadd018810997bba78367e86aa3398e31..0000000000000000000000000000000000000000
--- a/deploy/pdserving/det_local_server.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-import cv2
-import sys
-import numpy as np
-import os
-from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes
-if sys.argv[1] == 'gpu':
- from paddle_serving_server_gpu.web_service import WebService
-elif sys.argv[1] == 'cpu':
- from paddle_serving_server.web_service import WebService
-import time
-import re
-import base64
-
-
-class OCRService(WebService):
- def init_det(self):
- self.det_preprocess = Sequential([
- ResizeByFactor(32, 960), Div(255),
- Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
- (2, 0, 1))
- ])
- self.filter_func = FilterBoxes(10, 10)
- self.post_func = DBPostProcess({
- "thresh": 0.3,
- "box_thresh": 0.5,
- "max_candidates": 1000,
- "unclip_ratio": 1.5,
- "min_size": 3
- })
-
- def preprocess(self, feed=[], fetch=[]):
- data = base64.b64decode(feed[0]["image"].encode('utf8'))
- data = np.fromstring(data, np.uint8)
- im = cv2.imdecode(data, cv2.IMREAD_COLOR)
- self.ori_h, self.ori_w, _ = im.shape
- det_img = self.det_preprocess(im)
- _, self.new_h, self.new_w = det_img.shape
- return {"image": det_img[np.newaxis, :].copy()}, ["concat_1.tmp_0"]
-
- def postprocess(self, feed={}, fetch=[], fetch_map=None):
- det_out = fetch_map["concat_1.tmp_0"]
- ratio_list = [
- float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
- ]
- dt_boxes_list = self.post_func(det_out, [ratio_list])
- dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
- return {"dt_boxes": dt_boxes.tolist()}
-
-
-ocr_service = OCRService(name="ocr")
-ocr_service.load_model_config("ocr_det_model")
-ocr_service.init_det()
-if sys.argv[1] == 'gpu':
- ocr_service.set_gpus("0")
- ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
- ocr_service.run_debugger_service(gpu=True)
-elif sys.argv[1] == 'cpu':
- ocr_service.prepare_server(workdir="workdir", port=9292)
- ocr_service.run_debugger_service()
-ocr_service.init_det()
-ocr_service.run_web_service()
diff --git a/deploy/pdserving/det_web_server.py b/deploy/pdserving/det_web_server.py
deleted file mode 100644
index 14be74130dcb413c31a3e76c150d74f65575f451..0000000000000000000000000000000000000000
--- a/deploy/pdserving/det_web_server.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-import cv2
-import sys
-import numpy as np
-import os
-from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes
-if sys.argv[1] == 'gpu':
- from paddle_serving_server_gpu.web_service import WebService
-elif sys.argv[1] == 'cpu':
- from paddle_serving_server.web_service import WebService
-import time
-import re
-import base64
-
-
-class OCRService(WebService):
- def init_det(self):
- self.det_preprocess = Sequential([
- ResizeByFactor(32, 960), Div(255),
- Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
- (2, 0, 1))
- ])
- self.filter_func = FilterBoxes(10, 10)
- self.post_func = DBPostProcess({
- "thresh": 0.3,
- "box_thresh": 0.5,
- "max_candidates": 1000,
- "unclip_ratio": 1.5,
- "min_size": 3
- })
-
- def preprocess(self, feed=[], fetch=[]):
- data = base64.b64decode(feed[0]["image"].encode('utf8'))
- data = np.fromstring(data, np.uint8)
- im = cv2.imdecode(data, cv2.IMREAD_COLOR)
- self.ori_h, self.ori_w, _ = im.shape
- det_img = self.det_preprocess(im)
- _, self.new_h, self.new_w = det_img.shape
- print(det_img)
- return {"image": det_img}, ["concat_1.tmp_0"]
-
- def postprocess(self, feed={}, fetch=[], fetch_map=None):
- det_out = fetch_map["concat_1.tmp_0"]
- ratio_list = [
- float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
- ]
- dt_boxes_list = self.post_func(det_out, [ratio_list])
- dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
- return {"dt_boxes": dt_boxes.tolist()}
-
-
-ocr_service = OCRService(name="ocr")
-ocr_service.load_model_config("ocr_det_model")
-if sys.argv[1] == 'gpu':
- ocr_service.set_gpus("0")
- ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
-elif sys.argv[1] == 'cpu':
- ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
-ocr_service.init_det()
-ocr_service.run_rpc_service()
-ocr_service.run_web_service()
diff --git a/deploy/pdserving/ocr_local_server.py b/deploy/pdserving/ocr_local_server.py
deleted file mode 100644
index de5b3d13f12afd4a84c5d46625682c42f418d6bb..0000000000000000000000000000000000000000
--- a/deploy/pdserving/ocr_local_server.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-from paddle_serving_app.reader import OCRReader
-import cv2
-import sys
-import numpy as np
-import os
-from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
-if sys.argv[1] == 'gpu':
- from paddle_serving_server_gpu.web_service import WebService
-elif sys.argv[1] == 'cpu':
- from paddle_serving_server.web_service import WebService
-from paddle_serving_app.local_predict import Debugger
-import time
-import re
-import base64
-
-
-class OCRService(WebService):
- def init_det_debugger(self, det_model_config):
- self.det_preprocess = Sequential([
- ResizeByFactor(32, 960), Div(255),
- Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
- (2, 0, 1))
- ])
- self.det_client = Debugger()
- if sys.argv[1] == 'gpu':
- self.det_client.load_model_config(
- det_model_config, gpu=True, profile=False)
- elif sys.argv[1] == 'cpu':
- self.det_client.load_model_config(
- det_model_config, gpu=False, profile=False)
- self.ocr_reader = OCRReader()
-
- def preprocess(self, feed=[], fetch=[]):
- data = base64.b64decode(feed[0]["image"].encode('utf8'))
- data = np.fromstring(data, np.uint8)
- im = cv2.imdecode(data, cv2.IMREAD_COLOR)
- ori_h, ori_w, _ = im.shape
- det_img = self.det_preprocess(im)
- _, new_h, new_w = det_img.shape
- det_img = det_img[np.newaxis, :]
- det_img = det_img.copy()
- det_out = self.det_client.predict(
- feed={"image": det_img}, fetch=["concat_1.tmp_0"])
- filter_func = FilterBoxes(10, 10)
- post_func = DBPostProcess({
- "thresh": 0.3,
- "box_thresh": 0.5,
- "max_candidates": 1000,
- "unclip_ratio": 1.5,
- "min_size": 3
- })
- sorted_boxes = SortedBoxes()
- ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
- dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list])
- dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
- dt_boxes = sorted_boxes(dt_boxes)
- get_rotate_crop_image = GetRotateCropImage()
- img_list = []
- max_wh_ratio = 0
- for i, dtbox in enumerate(dt_boxes):
- boximg = get_rotate_crop_image(im, dt_boxes[i])
- img_list.append(boximg)
- h, w = boximg.shape[0:2]
- wh_ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
- if len(img_list) == 0:
- return [], []
- _, w, h = self.ocr_reader.resize_norm_img(img_list[0],
- max_wh_ratio).shape
- imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
- for id, img in enumerate(img_list):
- norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
- imgs[id] = norm_img
- feed = {"image": imgs.copy()}
- fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
- return feed, fetch
-
- def postprocess(self, feed={}, fetch=[], fetch_map=None):
- rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
- res_lst = []
- for res in rec_res:
- res_lst.append(res[0])
- res = {"res": res_lst}
- return res
-
-
-ocr_service = OCRService(name="ocr")
-ocr_service.load_model_config("ocr_rec_model")
-ocr_service.init_det_debugger(det_model_config="ocr_det_model")
-if sys.argv[1] == 'gpu':
- ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
- ocr_service.run_debugger_service(gpu=True)
-elif sys.argv[1] == 'cpu':
- ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
- ocr_service.run_debugger_service()
-ocr_service.run_web_service()
diff --git a/deploy/pdserving/ocr_web_client.py b/deploy/pdserving/ocr_web_client.py
deleted file mode 100644
index e2a92eb8ee4aa62059be184dd7e67237ed460f13..0000000000000000000000000000000000000000
--- a/deploy/pdserving/ocr_web_client.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# -*- coding: utf-8 -*-
-
-import requests
-import json
-import cv2
-import base64
-import os, sys
-import time
-
-def cv2_to_base64(image):
- #data = cv2.imencode('.jpg', image)[1]
- return base64.b64encode(image).decode(
- 'utf8') #data.tostring()).decode('utf8')
-
-headers = {"Content-type": "application/json"}
-url = "http://127.0.0.1:9292/ocr/prediction"
-test_img_dir = "../../doc/imgs/"
-for img_file in os.listdir(test_img_dir):
- with open(os.path.join(test_img_dir, img_file), 'rb') as file:
- image_data1 = file.read()
- image = cv2_to_base64(image_data1)
- data = {"feed": [{"image": image}], "fetch": ["res"]}
- r = requests.post(url=url, headers=headers, data=json.dumps(data))
- print(r.json())
diff --git a/deploy/pdserving/ocr_web_server.py b/deploy/pdserving/ocr_web_server.py
deleted file mode 100644
index 6c0de44661958a6425f57039261969551ff552c5..0000000000000000000000000000000000000000
--- a/deploy/pdserving/ocr_web_server.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-from paddle_serving_app.reader import OCRReader
-import cv2
-import sys
-import numpy as np
-import os
-from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
-if sys.argv[1] == 'gpu':
- from paddle_serving_server_gpu.web_service import WebService
-elif sys.argv[1] == 'cpu':
- from paddle_serving_server.web_service import WebService
-import time
-import re
-import base64
-
-
-class OCRService(WebService):
- def init_det_client(self, det_port, det_client_config):
- self.det_preprocess = Sequential([
- ResizeByFactor(32, 960), Div(255),
- Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
- (2, 0, 1))
- ])
- self.det_client = Client()
- self.det_client.load_client_config(det_client_config)
- self.det_client.connect(["127.0.0.1:{}".format(det_port)])
- self.ocr_reader = OCRReader()
-
- def preprocess(self, feed=[], fetch=[]):
- data = base64.b64decode(feed[0]["image"].encode('utf8'))
- data = np.fromstring(data, np.uint8)
- im = cv2.imdecode(data, cv2.IMREAD_COLOR)
- ori_h, ori_w, _ = im.shape
- det_img = self.det_preprocess(im)
- det_out = self.det_client.predict(
- feed={"image": det_img}, fetch=["concat_1.tmp_0"])
- _, new_h, new_w = det_img.shape
- filter_func = FilterBoxes(10, 10)
- post_func = DBPostProcess({
- "thresh": 0.3,
- "box_thresh": 0.5,
- "max_candidates": 1000,
- "unclip_ratio": 1.5,
- "min_size": 3
- })
- sorted_boxes = SortedBoxes()
- ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
- dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list])
- dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
- dt_boxes = sorted_boxes(dt_boxes)
- get_rotate_crop_image = GetRotateCropImage()
- feed_list = []
- img_list = []
- max_wh_ratio = 0
- for i, dtbox in enumerate(dt_boxes):
- boximg = get_rotate_crop_image(im, dt_boxes[i])
- img_list.append(boximg)
- h, w = boximg.shape[0:2]
- wh_ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
- for img in img_list:
- norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
- feed = {"image": norm_img}
- feed_list.append(feed)
- fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
- return feed_list, fetch
-
- def postprocess(self, feed={}, fetch=[], fetch_map=None):
- rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
- res_lst = []
- for res in rec_res:
- res_lst.append(res[0])
- res = {"res": res_lst}
- return res
-
-
-ocr_service = OCRService(name="ocr")
-ocr_service.load_model_config("ocr_rec_model")
-if sys.argv[1] == 'gpu':
- ocr_service.set_gpus("0")
- ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
-elif sys.argv[1] == 'cpu':
- ocr_service.prepare_server(workdir="workdir", port=9292)
-ocr_service.init_det_client(
- det_port=9293,
- det_client_config="ocr_det_client/serving_client_conf.prototxt")
-ocr_service.run_rpc_service()
-ocr_service.run_web_service()
diff --git a/deploy/pdserving/readme.md b/deploy/pdserving/readme.md
deleted file mode 100644
index f9ad80b896be0be29e3a7bb17e4aa119af81d5c4..0000000000000000000000000000000000000000
--- a/deploy/pdserving/readme.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Paddle Serving 服务部署(Beta)
-
-本教程将介绍基于[Paddle Serving](https://github.com/PaddlePaddle/Serving)部署PaddleOCR在线预测服务的详细步骤。
-
-## 快速启动服务
-
-### 1. 准备环境
-我们先安装Paddle Serving相关组件
-我们推荐用户使用GPU来做Paddle Serving的OCR服务部署
-
-**CUDA版本:9.0**
-
-**CUDNN版本:7.0**
-
-**操作系统版本:CentOS 6以上**
-
-**Python3操作指南:**
-```
-#以下提供beta版本的paddle serving whl包,欢迎试用,正式版会在8月中正式上线
-#GPU用户下载server包使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server_gpu-0.3.2-py3-none-any.whl
-python -m pip install paddle_serving_server_gpu-0.3.2-py3-none-any.whl
-#CPU版本使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server-0.3.2-py3-none-any.whl
-python -m pip install paddle_serving_server-0.3.2-py3-none-any.whl
-#客户端和App包使用以下链接(CPU,GPU通用)
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_client-0.3.2-cp36-none-any.whl
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_app-0.1.2-py3-none-any.whl
-python -m pip install paddle_serving_app-0.1.2-py3-none-any.whl paddle_serving_client-0.3.2-cp36-none-any.whl
-```
-
-**Python2操作指南:**
-```
-#以下提供beta版本的paddle serving whl包,欢迎试用,正式版会在8月中正式上线
-#GPU用户下载server包使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server_gpu-0.3.2-py2-none-any.whl
-python -m pip install paddle_serving_server_gpu-0.3.2-py2-none-any.whl
-#CPU版本使用这个链接
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_server-0.3.2-py2-none-any.whl
-python -m pip install paddle_serving_server-0.3.2-py2-none-any.whl
-
-#客户端和App包使用以下链接(CPU,GPU通用)
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_app-0.1.2-py2-none-any.whl
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/others/paddle_serving_client-0.3.2-cp27-none-any.whl
-python -m pip install paddle_serving_app-0.1.2-py2-none-any.whl paddle_serving_client-0.3.2-cp27-none-any.whl
-```
-
-### 2. 模型转换
-可以使用`paddle_serving_app`提供的模型,执行下列命令
-```
-python -m paddle_serving_app.package --get_model ocr_rec
-tar -xzvf ocr_rec.tar.gz
-python -m paddle_serving_app.package --get_model ocr_det
-tar -xzvf ocr_det.tar.gz
-```
-执行上述命令会下载`db_crnn_mobile`的模型,如果想要下载规模更大的`db_crnn_server`模型,可以在下载预测模型并解压之后。参考[如何从Paddle保存的预测模型转为Paddle Serving格式可部署的模型](https://github.com/PaddlePaddle/Serving/blob/develop/doc/INFERENCE_TO_SERVING_CN.md)。
-
-我们以`ch_rec_r34_vd_crnn`模型作为例子,下载链接在:
-
-```
-wget --no-check-certificate https://paddleocr.bj.bcebos.com/ch_models/ch_rec_r34_vd_crnn_infer.tar
-tar xf ch_rec_r34_vd_crnn_infer.tar
-```
-因此我们按照Serving模型转换教程,运行下列python文件。
-```
-from paddle_serving_client.io import inference_model_to_serving
-inference_model_dir = "ch_rec_r34_vd_crnn"
-serving_client_dir = "serving_client_dir"
-serving_server_dir = "serving_server_dir"
-feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir, model_filename="model", params_filename="params")
-```
-最终会在`serving_client_dir`和`serving_server_dir`生成客户端和服务端的模型配置。
-
-### 3. 启动服务
-启动服务可以根据实际需求选择启动`标准版`或者`快速版`,两种方式的对比如下表:
-
-|版本|特点|适用场景|
-|-|-|-|
-|标准版|稳定性高,分布式部署|适用于吞吐量大,需要跨机房部署的情况|
-|快速版|部署方便,预测速度快|适用于对预测速度要求高,迭代速度快的场景|
-
-#### 方式1. 启动标准版服务
-
-```
-# cpu,gpu启动二选一,以下是cpu启动
-python -m paddle_serving_server.serve --model ocr_det_model --port 9293
-python ocr_web_server.py cpu
-# gpu启动
-python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
-python ocr_web_server.py gpu
-```
-
-#### 方式2. 启动快速版服务
-
-```
-# cpu,gpu启动二选一,以下是cpu启动
-python ocr_local_server.py cpu
-# gpu启动
-python ocr_local_server.py gpu
-```
-
-## 发送预测请求
-
-```
-python ocr_web_client.py
-```
-
-## 返回结果格式说明
-
-返回结果是json格式
-```
-{u'result': {u'res': [u'\u571f\u5730\u6574\u6cbb\u4e0e\u571f\u58e4\u4fee\u590d\u7814\u7a76\u4e2d\u5fc3', u'\u534e\u5357\u519c\u4e1a\u5927\u5b661\u7d20\u56fe']}}
-```
-我们也可以打印结果json串中`res`字段的每一句话
-```
-土地整治与土壤修复研究中心
-华南农业大学1素图
-```
-
-## 自定义修改服务逻辑
-
-在`ocr_web_server.py`或是`ocr_local_server.py`当中的`preprocess`函数里面做了检测服务和识别服务的前处理,`postprocess`函数里面做了识别的后处理服务,可以在相应的函数中做修改。调用了`paddle_serving_app`库提供的常见CV模型的前处理/后处理库。
-
-如果想要单独启动Paddle Serving的检测服务和识别服务,参见下列表格, 执行对应的脚本即可,并且在命令行参数注明用的CPU或是GPU来提供服务。
-
-| 模型 | 标准版 | 快速版 |
-| ---- | ----------------- | ------------------- |
-| 检测 | det_web_server.py | det_local_server.py |
-| 识别 | rec_web_server.py | rec_local_server.py |
-
-更多信息参见[Paddle Serving](https://github.com/PaddlePaddle/Serving)
diff --git a/deploy/pdserving/rec_local_server.py b/deploy/pdserving/rec_local_server.py
deleted file mode 100644
index ba021c1cd5054071eb115b3e6e9c64cb572ff871..0000000000000000000000000000000000000000
--- a/deploy/pdserving/rec_local_server.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-from paddle_serving_app.reader import OCRReader
-import cv2
-import sys
-import numpy as np
-import os
-from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
-if sys.argv[1] == 'gpu':
- from paddle_serving_server_gpu.web_service import WebService
-elif sys.argv[1] == 'cpu':
- from paddle_serving_server.web_service import WebService
-import time
-import re
-import base64
-
-
-class OCRService(WebService):
- def init_rec(self):
- self.ocr_reader = OCRReader()
-
- def preprocess(self, feed=[], fetch=[]):
- img_list = []
- for feed_data in feed:
- data = base64.b64decode(feed_data["image"].encode('utf8'))
- data = np.fromstring(data, np.uint8)
- im = cv2.imdecode(data, cv2.IMREAD_COLOR)
- img_list.append(im)
- max_wh_ratio = 0
- for i, boximg in enumerate(img_list):
- h, w = boximg.shape[0:2]
- wh_ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
- _, w, h = self.ocr_reader.resize_norm_img(img_list[0],
- max_wh_ratio).shape
- imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
- for i, img in enumerate(img_list):
- norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
- imgs[i] = norm_img
- feed = {"image": imgs.copy()}
- fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
- return feed, fetch
-
- def postprocess(self, feed={}, fetch=[], fetch_map=None):
- rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
- res_lst = []
- for res in rec_res:
- res_lst.append(res[0])
- res = {"res": res_lst}
- return res
-
-
-ocr_service = OCRService(name="ocr")
-ocr_service.load_model_config("ocr_rec_model")
-ocr_service.init_rec()
-if sys.argv[1] == 'gpu':
- ocr_service.set_gpus("0")
- ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
- ocr_service.run_debugger_service(gpu=True)
-elif sys.argv[1] == 'cpu':
- ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
- ocr_service.run_debugger_service()
-ocr_service.run_web_service()
diff --git a/deploy/pdserving/rec_web_server.py b/deploy/pdserving/rec_web_server.py
deleted file mode 100644
index 0f4e9f6d264ed602f387bfaf0303cd59af7823fa..0000000000000000000000000000000000000000
--- a/deploy/pdserving/rec_web_server.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-from paddle_serving_app.reader import OCRReader
-import cv2
-import sys
-import numpy as np
-import os
-from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
-from paddle_serving_app.reader import Div, Normalize, Transpose
-from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
-if sys.argv[1] == 'gpu':
- from paddle_serving_server_gpu.web_service import WebService
-elif sys.argv[1] == 'cpu':
- from paddle_serving_server.web_service import WebService
-import time
-import re
-import base64
-
-
-class OCRService(WebService):
- def init_rec(self):
- self.ocr_reader = OCRReader()
-
- def preprocess(self, feed=[], fetch=[]):
- # TODO: to handle batch rec images
- img_list = []
- for feed_data in feed:
- data = base64.b64decode(feed_data["image"].encode('utf8'))
- data = np.fromstring(data, np.uint8)
- im = cv2.imdecode(data, cv2.IMREAD_COLOR)
- img_list.append(im)
- feed_list = []
- max_wh_ratio = 0
- for i, boximg in enumerate(img_list):
- h, w = boximg.shape[0:2]
- wh_ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, wh_ratio)
- for img in img_list:
- norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
- feed = {"image": norm_img}
- feed_list.append(feed)
- fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
- return feed_list, fetch
-
- def postprocess(self, feed={}, fetch=[], fetch_map=None):
- rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
- res_lst = []
- for res in rec_res:
- res_lst.append(res[0])
- res = {"res": res_lst}
- return res
-
-
-ocr_service = OCRService(name="ocr")
-ocr_service.load_model_config("ocr_rec_model")
-ocr_service.init_rec()
-if sys.argv[1] == 'gpu':
- ocr_service.set_gpus("0")
- ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0)
-elif sys.argv[1] == 'cpu':
- ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
-ocr_service.run_rpc_service()
-ocr_service.run_web_service()
diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md
index b281e1e736f6c3747c2ae07188dc6f87abfc67a8..285c9899d867c0365c98be8ca9844aa555389356 100644
--- a/doc/doc_ch/models_list.md
+++ b/doc/doc_ch/models_list.md
@@ -68,7 +68,6 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训
|ch_ppocr_mobile_v2.0_cls|原始模型|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |
-## OCR模型列表(V1.1,2020年9月22日更新)
-
-[1.1系列模型地址](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/models_list.md)
+## OCR模型列表(V2.0,2020年12月15日更新)
+[2.0系列模型地址](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/models_list.md)
diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md
index 87d60c5504d28c3cae660ebfd3765bb6893f163e..d67179e026f6dc5b0f2baaea482f6b8cee337dc5 100644
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -7,7 +7,7 @@
- [字典](#字典)
- [支持空格](#支持空格)
-- [二、启动训练](#文本检测模型推理)
+- [二、启动训练](#启动训练)
- [1. 数据增强](#数据增强)
- [2. 训练](#训练)
- [3. 小语种](#小语种)
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index 532ebd90cf149813acc9ad929840e1611766f652..7888bd96476e21aa3c12edba283742db425c0219 100644
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -1,7 +1,7 @@
## Algorithm introduction
-This tutorial lists the text detection algorithms and text recognition algorithms supported by PaddleOCR, as well as the models and metrics of each algorithm on **English public datasets**. It is mainly used for algorithm introduction and algorithm performance comparison. For more models on other datasets including Chinese, please refer to [PP-OCR v1.1 models list](./models_list_en.md).
+This tutorial lists the text detection algorithms and text recognition algorithms supported by PaddleOCR, as well as the models and metrics of each algorithm on **English public datasets**. It is mainly used for algorithm introduction and algorithm performance comparison. For more models on other datasets including Chinese, please refer to [PP-OCR v2.0 models list](./models_list_en.md).
- [1. Text Detection Algorithm](#TEXTDETECTIONALGORITHM)
diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md
index 63d8c598bbe4e3b37ae47804e595438ee79905c8..f92820232e140ec255ca1fe2a62e85280263dc67 100644
--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -1,4 +1,4 @@
-## OCR model list(V1.1, updated on 2020.12.12)
+## OCR model list(V2.0, updated on 2020.12.12)
- [1. Text Detection Model](#Detection)
- [2. Text Recognition Model](#Recognition)
@@ -66,6 +66,6 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine
|ch_ppocr_mobile_v2.0_cls|Original model|[cls_mv3.yml](../../configs/cls/cls_mv3.yml)|1.38M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |
-## OCR model list (V1.1,updated on 2020.9.22)
+## OCR model list (V2.0,updated on 2020.12.15)
-[1.1 series model address](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/models_list.md)
+[2.0 series model address](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/models_list.md)
diff --git a/ppocr/modeling/transforms/tps.py b/ppocr/modeling/transforms/tps.py
index 50c1740ee4a3c687405c4d28818543c043e53227..74bec7416bb1fd970ad00aecfdafc4173827a145 100644
--- a/ppocr/modeling/transforms/tps.py
+++ b/ppocr/modeling/transforms/tps.py
@@ -128,7 +128,7 @@ class LocalizationNetwork(nn.Layer):
i = 0
for block in self.block_list:
x = block(x)
- x = x.reshape([B, -1])
+ x = x.squeeze(axis=2).squeeze(axis=2)
x = self.fc1(x)
x = F.relu(x)
@@ -176,14 +176,15 @@ class GridGenerator(nn.Layer):
Return:
batch_P_prime: the grid for the grid_sampler
"""
- C = self.build_C()
- P = self.build_P(I_r_size)
- inv_delta_C = self.build_inv_delta_C(C).astype('float32')
- P_hat = self.build_P_hat(C, P).astype('float32')
+ C = self.build_C_paddle()
+ P = self.build_P_paddle(I_r_size)
+
+ inv_delta_C_tensor = self.build_inv_delta_C_paddle(C).astype('float32')
+ # inv_delta_C_tensor = paddle.zeros((23,23)).astype('float32')
+ P_hat_tensor = self.build_P_hat_paddle(
+ C, paddle.to_tensor(P)).astype('float32')
- inv_delta_C_tensor = paddle.to_tensor(inv_delta_C)
inv_delta_C_tensor.stop_gradient = True
- P_hat_tensor = paddle.to_tensor(P_hat)
P_hat_tensor.stop_gradient = True
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
@@ -196,71 +197,80 @@ class GridGenerator(nn.Layer):
batch_P_prime = paddle.matmul(P_hat_tensor, batch_T)
return batch_P_prime
- def build_C(self):
+ def build_C_paddle(self):
""" Return coordinates of fiducial points in I_r; C """
F = self.F
- ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
- ctrl_pts_y_top = -1 * np.ones(int(F / 2))
- ctrl_pts_y_bottom = np.ones(int(F / 2))
- ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
- ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
- C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
+ ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2))
+ ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)])
+ ctrl_pts_y_bottom = paddle.ones([int(F / 2)])
+ ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
+ ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
+ C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return C # F x 2
- def build_P(self, I_r_size):
- I_r_width, I_r_height = I_r_size
- I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0) \
- / I_r_width # self.I_r_width
- I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0) \
- / I_r_height # self.I_r_height
+ def build_P_paddle(self, I_r_size):
+ I_r_height, I_r_width = I_r_size
+ I_r_grid_x = (
+ paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0
+ ) / I_r_width # self.I_r_width
+ I_r_grid_y = (
+ paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0
+ ) / I_r_height # self.I_r_height
# P: self.I_r_width x self.I_r_height x 2
- P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
+ P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
+ P = paddle.transpose(P, perm=[1, 0, 2])
# n (= self.I_r_width x self.I_r_height) x 2
return P.reshape([-1, 2])
- def build_inv_delta_C(self, C):
+ def build_inv_delta_C_paddle(self, C):
""" Return inv_delta_C which is needed to calculate T """
F = self.F
- hat_C = np.zeros((F, F), dtype=float) # F x F
+ hat_C = paddle.zeros((F, F), dtype='float32') # F x F
for i in range(0, F):
for j in range(i, F):
- r = np.linalg.norm(C[i] - C[j])
- hat_C[i, j] = r
- hat_C[j, i] = r
- np.fill_diagonal(hat_C, 1)
- hat_C = (hat_C**2) * np.log(hat_C)
- # print(C.shape, hat_C.shape)
- delta_C = np.concatenate( # F+3 x F+3
+ if i == j:
+ hat_C[i, j] = 1
+ else:
+ r = paddle.norm(C[i] - C[j])
+ hat_C[i, j] = r
+ hat_C[j, i] = r
+ hat_C = (hat_C**2) * paddle.log(hat_C)
+ delta_C = paddle.concat( # F+3 x F+3
[
- np.concatenate(
- [np.ones((F, 1)), C, hat_C], axis=1), # F x F+3
- np.concatenate(
- [np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3
- np.concatenate(
- [np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3
+ paddle.concat(
+ [paddle.ones((F, 1)), C, hat_C], axis=1), # F x F+3
+ paddle.concat(
+ [paddle.zeros((2, 3)), paddle.transpose(
+ C, perm=[1, 0])],
+ axis=1), # 2 x F+3
+ paddle.concat(
+ [paddle.zeros((1, 3)), paddle.ones((1, F))],
+ axis=1) # 1 x F+3
],
axis=0)
- inv_delta_C = np.linalg.inv(delta_C)
+ inv_delta_C = paddle.inverse(delta_C)
return inv_delta_C # F+3 x F+3
- def build_P_hat(self, C, P):
+ def build_P_hat_paddle(self, C, P):
F = self.F
eps = self.eps
n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
# P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
- P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
- C_tile = np.expand_dims(C, axis=0) # 1 x F x 2
+ P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1))
+ C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2
P_diff = P_tile - C_tile # n x F x 2
# rbf_norm: n x F
- rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
+ rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False)
+
# rbf: n x F
- rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
- P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
+ rbf = paddle.multiply(
+ paddle.square(rbf_norm), paddle.log(rbf_norm + eps))
+ P_hat = paddle.concat([paddle.ones((n, 1)), P, rbf], axis=1)
return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime):
- B = batch_C_prime.shape[0]
- batch_C_prime = batch_C_prime.reshape([B, -1])
+ B, H, C = batch_C_prime.shape
+ batch_C_prime = batch_C_prime.reshape([B, H * C])
batch_C_ex_part_tensor = self.fc(batch_C_prime)
batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2])
return batch_C_ex_part_tensor
@@ -277,10 +287,8 @@ class TPS(nn.Layer):
def forward(self, image):
image.stop_gradient = False
- I_r_size = [image.shape[3], image.shape[2]]
-
batch_C_prime = self.loc_net(image)
- batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
+ batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:])
batch_P_prime = batch_P_prime.reshape(
[-1, image.shape[2], image.shape[3], 2])
batch_I_r = F.grid_sample(x=image, grid=batch_P_prime)
diff --git a/tools/test_hubserving.py b/tools/test_hubserving.py
old mode 100644
new mode 100755
index f28ff39e441e9f0d8a4c6e1081827daf8aff9792..0548726417699855a3905fa1a3fb679d69c85fc8
--- a/tools/test_hubserving.py
+++ b/tools/test_hubserving.py
@@ -17,8 +17,9 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
+from ppocr.utils.logging import get_logger
+logger = get_logger()
+
import cv2
import numpy as np
import time