diff --git a/python/examples/ocr/README.md b/python/examples/ocr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..04c4fd3eaa304e55d980a2cf4fc34dda50f5009c --- /dev/null +++ b/python/examples/ocr/README.md @@ -0,0 +1,21 @@ +# OCR + +## Get Model +``` +python -m paddle_serving_app.package --get_model ocr_rec +tar -xzvf ocr_rec.tar.gz +``` + +## RPC Service + +### Start Service + +``` +python -m paddle_serving_server.serve --model ocr_rec_model --port 9292 +``` + +### Client Prediction + +``` +python test_ocr_rec_client.py +``` diff --git a/python/examples/ocr/test_ocr_rec_client.py b/python/examples/ocr/test_ocr_rec_client.py new file mode 100644 index 0000000000000000000000000000000000000000..b61256d03202374ada5b0d50a075fef156eca2ea --- /dev/null +++ b/python/examples/ocr/test_ocr_rec_client.py @@ -0,0 +1,31 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +from paddle_serving_app.reader import OCRReader +import cv2 + +client = Client() +client.load_client_config("ocr_rec_client/serving_client_conf.prototxt") +client.connect(["127.0.0.1:9292"]) + +image_file_list = ["./test_rec.jpg"] +img = cv2.imread(image_file_list[0]) +ocr_reader = OCRReader() +feed = {"image": ocr_reader.preprocess([img])} +fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] +fetch_map = client.predict(feed=feed, fetch=fetch) +rec_res = ocr_reader.postprocess(fetch_map) +print(image_file_list[0]) +print(rec_res[0][0]) diff --git a/python/examples/ocr/test_rec.jpg b/python/examples/ocr/test_rec.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2c34cd33eac5766a072fde041fa6c9b1d612f1db Binary files /dev/null and b/python/examples/ocr/test_rec.jpg differ diff --git a/python/paddle_serving_app/models/model_list.py b/python/paddle_serving_app/models/model_list.py index 28c759b9c72338bb60971ba030df49378fe75161..cf0ca3bf5765d65065e541462eb919ccc5c4b978 100644 --- a/python/paddle_serving_app/models/model_list.py +++ b/python/paddle_serving_app/models/model_list.py @@ -31,10 +31,12 @@ class ServingModels(object): self.model_dict["ImageClassification"] = [ "resnet_v2_50_imagenet", "mobilenet_v2_imagenet" ] + self.model_dict["OCR"] = ["ocr_rec"] image_class_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/" image_seg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageSegmentation/" object_detection_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/" + ocr_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/" senta_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/" semantic_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticRepresentation/" wordseg_url = "https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/" @@ -52,6 +54,7 @@ class ServingModels(object): pack_url(self.model_dict, "ObjectDetection", object_detection_url) pack_url(self.model_dict, "ImageSegmentation", image_seg_url) pack_url(self.model_dict, "ImageClassification", image_class_url) + pack_url(self.model_dict, "OCR", ocr_url) def get_model_list(self): return self.model_dict diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py index 0eee878284e2028657a660acd38a21934bb5ccd7..b2b5e75ac430ecf897e34ec7afc994c9ccf8ee66 100644 --- a/python/paddle_serving_app/reader/__init__.py +++ b/python/paddle_serving_app/reader/__init__.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. from .chinese_bert_reader import ChineseBertReader -from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, RCNNPostprocess, SegPostprocess, PadStride +from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize +from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB +from .image_reader import RCNNPostprocess, SegPostprocess, PadStride from .lac_reader import LACReader from .senta_reader import SentaReader from .imdb_reader import IMDBDataset +from .ocr_reader import OCRReader diff --git a/python/paddle_serving_app/reader/ocr_reader.py b/python/paddle_serving_app/reader/ocr_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..e5dc88482bd5e0a7a26873fd5cb60c43dc5104c9 --- /dev/null +++ b/python/paddle_serving_app/reader/ocr_reader.py @@ -0,0 +1,203 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import copy +import numpy as np +import math +import re +import sys +import argparse +from paddle_serving_app.reader import Sequential, Resize, Transpose, Div, Normalize + + +class CharacterOps(object): + """ Convert between text-label and text-index """ + + def __init__(self, config): + self.character_type = config['character_type'] + self.loss_type = config['loss_type'] + if self.character_type == "en": + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + elif self.character_type == "ch": + character_dict_path = config['character_dict_path'] + self.character_str = "" + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n").strip("\r\n") + self.character_str += line + dict_character = list(self.character_str) + elif self.character_type == "en_sensitive": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + else: + self.character_str = None + assert self.character_str is not None, \ + "Nonsupport type of the character: {}".format(self.character_str) + self.beg_str = "sos" + self.end_str = "eos" + if self.loss_type == "attention": + dict_character = [self.beg_str, self.end_str] + dict_character + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def encode(self, text): + """convert text-label into text-index. + input: + text: text labels of each image. [batch_size] + + output: + text: concatenated text index for CTCLoss. + [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] + length: length of each text. [batch_size] + """ + if self.character_type == "en": + text = text.lower() + + text_list = [] + for char in text: + if char not in self.dict: + continue + text_list.append(self.dict[char]) + text = np.array(text_list) + return text + + def decode(self, text_index, is_remove_duplicate=False): + """ convert text-index into text-label. """ + char_list = [] + char_num = self.get_char_num() + + if self.loss_type == "attention": + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + ignored_tokens = [beg_idx, end_idx] + else: + ignored_tokens = [char_num] + + for idx in range(len(text_index)): + if text_index[idx] in ignored_tokens: + continue + if is_remove_duplicate: + if idx > 0 and text_index[idx - 1] == text_index[idx]: + continue + char_list.append(self.character[text_index[idx]]) + text = ''.join(char_list) + return text + + def get_char_num(self): + return len(self.character) + + def get_beg_end_flag_idx(self, beg_or_end): + if self.loss_type == "attention": + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx"\ + % beg_or_end + return idx + else: + err = "error in get_beg_end_flag_idx when using the loss %s"\ + % (self.loss_type) + assert False, err + + +class OCRReader(object): + def __init__(self): + args = self.parse_args() + image_shape = [int(v) for v in args.rec_image_shape.split(",")] + self.rec_image_shape = image_shape + self.character_type = args.rec_char_type + self.rec_batch_num = args.rec_batch_num + char_ops_params = {} + char_ops_params["character_type"] = args.rec_char_type + char_ops_params["character_dict_path"] = args.rec_char_dict_path + char_ops_params['loss_type'] = 'ctc' + self.char_ops = CharacterOps(char_ops_params) + + def parse_args(self): + parser = argparse.ArgumentParser() + parser.add_argument("--rec_algorithm", type=str, default='CRNN') + parser.add_argument("--rec_model_dir", type=str) + parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") + parser.add_argument("--rec_char_type", type=str, default='ch') + parser.add_argument("--rec_batch_num", type=int, default=1) + parser.add_argument( + "--rec_char_dict_path", type=str, default="./ppocr_keys_v1.txt") + return parser.parse_args() + + def resize_norm_img(self, img, max_wh_ratio): + imgC, imgH, imgW = self.rec_image_shape + if self.character_type == "ch": + imgW = int(32 * max_wh_ratio) + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + + seq = Sequential([ + Resize(imgH, resized_w), Transpose((2, 0, 1)), Div(255), + Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5], True) + ]) + resized_image = seq(img) + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + + return padding_im + + def preprocess(self, img_list): + img_num = len(img_list) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(img_num): + h, w = img_list[ino].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(img_num): + norm_img = self.resize_norm_img(img_list[ino], max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + + return norm_img_batch[0] + + def postprocess(self, outputs): + rec_res = [] + rec_idx_lod = outputs["ctc_greedy_decoder_0.tmp_0.lod"] + predict_lod = outputs["softmax_0.tmp_0.lod"] + rec_idx_batch = outputs["ctc_greedy_decoder_0.tmp_0"] + for rno in range(len(rec_idx_lod) - 1): + beg = rec_idx_lod[rno] + end = rec_idx_lod[rno + 1] + rec_idx_tmp = rec_idx_batch[beg:end, 0] + preds_text = self.char_ops.decode(rec_idx_tmp) + beg = predict_lod[rno] + end = predict_lod[rno + 1] + probs = outputs["softmax_0.tmp_0"][beg:end, :] + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + score = np.mean(probs[valid_ind, ind[valid_ind]]) + rec_res.append([preds_text, score]) + return rec_res