From fb9be2013df4c74b78717acc376f0be89d79b496 Mon Sep 17 00:00:00 2001 From: WenmuZhou <572459439@qq.com> Date: Tue, 9 Aug 2022 09:52:45 +0000 Subject: [PATCH] rm layout files --- ppstructure/layout/picodet_postprocess.py | 227 ---------------------- ppstructure/layout/predict_layout.py | 155 --------------- 2 files changed, 382 deletions(-) delete mode 100644 ppstructure/layout/picodet_postprocess.py delete mode 100644 ppstructure/layout/predict_layout.py diff --git a/ppstructure/layout/picodet_postprocess.py b/ppstructure/layout/picodet_postprocess.py deleted file mode 100644 index 7df13f82..00000000 --- a/ppstructure/layout/picodet_postprocess.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from scipy.special import softmax - - -def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): - """ - Args: - box_scores (N, 5): boxes in corner-form and probabilities. - iou_threshold: intersection over union threshold. - top_k: keep top_k results. If k <= 0, keep all the results. - candidate_size: only consider the candidates with the highest scores. - Returns: - picked: a list of indexes of the kept boxes - """ - scores = box_scores[:, -1] - boxes = box_scores[:, :-1] - picked = [] - indexes = np.argsort(scores) - indexes = indexes[-candidate_size:] - while len(indexes) > 0: - current = indexes[-1] - picked.append(current) - if 0 < top_k == len(picked) or len(indexes) == 1: - break - current_box = boxes[current, :] - indexes = indexes[:-1] - rest_boxes = boxes[indexes, :] - iou = iou_of( - rest_boxes, - np.expand_dims( - current_box, axis=0), ) - indexes = indexes[iou <= iou_threshold] - - return box_scores[picked, :] - - -def iou_of(boxes0, boxes1, eps=1e-5): - """Return intersection-over-union (Jaccard index) of boxes. - Args: - boxes0 (N, 4): ground truth boxes. - boxes1 (N or 1, 4): predicted boxes. - eps: a small number to avoid 0 as denominator. - Returns: - iou (N): IoU values. - """ - overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) - overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) - - overlap_area = area_of(overlap_left_top, overlap_right_bottom) - area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) - area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) - return overlap_area / (area0 + area1 - overlap_area + eps) - - -def area_of(left_top, right_bottom): - """Compute the areas of rectangles given two corners. - Args: - left_top (N, 2): left top corner. - right_bottom (N, 2): right bottom corner. - Returns: - area (N): return the area. - """ - hw = np.clip(right_bottom - left_top, 0.0, None) - return hw[..., 0] * hw[..., 1] - - -class PicoDetPostProcess(object): - """ - Args: - input_shape (int): network input image size - ori_shape (int): ori image shape of before padding - scale_factor (float): scale factor of ori image - enable_mkldnn (bool): whether to open MKLDNN - """ - - def __init__(self, - input_shape, - ori_shape, - scale_factor, - strides=[8, 16, 32, 64], - score_threshold=0.4, - nms_threshold=0.5, - nms_top_k=1000, - keep_top_k=100): - self.ori_shape = ori_shape - self.input_shape = input_shape - self.scale_factor = scale_factor - self.strides = strides - self.score_threshold = score_threshold - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - - def warp_boxes(self, boxes, ori_shape): - """Apply transform to boxes - """ - width, height = ori_shape[1], ori_shape[0] - n = len(boxes) - if n: - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( - n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - # xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = np.concatenate( - (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - # clip boxes - xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) - xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) - return xy.astype(np.float32) - else: - return boxes - - def __call__(self, scores, raw_boxes): - batch_size = raw_boxes[0].shape[0] - reg_max = int(raw_boxes[0].shape[-1] / 4 - 1) - out_boxes_num = [] - out_boxes_list = [] - for batch_id in range(batch_size): - # generate centers - decode_boxes = [] - select_scores = [] - for stride, box_distribute, score in zip(self.strides, raw_boxes, - scores): - box_distribute = box_distribute[batch_id] - score = score[batch_id] - # centers - fm_h = self.input_shape[0] / stride - fm_w = self.input_shape[1] / stride - h_range = np.arange(fm_h) - w_range = np.arange(fm_w) - ww, hh = np.meshgrid(w_range, h_range) - ct_row = (hh.flatten() + 0.5) * stride - ct_col = (ww.flatten() + 0.5) * stride - center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1) - - # box distribution to distance - reg_range = np.arange(reg_max + 1) - box_distance = box_distribute.reshape((-1, reg_max + 1)) - box_distance = softmax(box_distance, axis=1) - box_distance = box_distance * np.expand_dims(reg_range, axis=0) - box_distance = np.sum(box_distance, axis=1).reshape((-1, 4)) - box_distance = box_distance * stride - - # top K candidate - topk_idx = np.argsort(score.max(axis=1))[::-1] - topk_idx = topk_idx[:self.nms_top_k] - center = center[topk_idx] - score = score[topk_idx] - box_distance = box_distance[topk_idx] - - # decode box - decode_box = center + [-1, -1, 1, 1] * box_distance - - select_scores.append(score) - decode_boxes.append(decode_box) - - # nms - bboxes = np.concatenate(decode_boxes, axis=0) - confidences = np.concatenate(select_scores, axis=0) - picked_box_probs = [] - picked_labels = [] - for class_index in range(0, confidences.shape[1]): - probs = confidences[:, class_index] - mask = probs > self.score_threshold - probs = probs[mask] - if probs.shape[0] == 0: - continue - subset_boxes = bboxes[mask, :] - box_probs = np.concatenate( - [subset_boxes, probs.reshape(-1, 1)], axis=1) - box_probs = hard_nms( - box_probs, - iou_threshold=self.nms_threshold, - top_k=self.keep_top_k, ) - picked_box_probs.append(box_probs) - picked_labels.extend([class_index] * box_probs.shape[0]) - - if len(picked_box_probs) == 0: - out_boxes_list.append(np.empty((0, 4))) - out_boxes_num.append(0) - - else: - picked_box_probs = np.concatenate(picked_box_probs) - - # resize output boxes - picked_box_probs[:, :4] = self.warp_boxes( - picked_box_probs[:, :4], self.ori_shape[batch_id]) - im_scale = np.concatenate([ - self.scale_factor[batch_id][::-1], - self.scale_factor[batch_id][::-1] - ]) - picked_box_probs[:, :4] /= im_scale - # clas score box - out_boxes_list.append( - np.concatenate( - [ - np.expand_dims( - np.array(picked_labels), - axis=-1), np.expand_dims( - picked_box_probs[:, 4], axis=-1), - picked_box_probs[:, :4] - ], - axis=1)) - out_boxes_num.append(len(picked_labels)) - - out_boxes_list = np.concatenate(out_boxes_list, axis=0) - out_boxes_num = np.asarray(out_boxes_num).astype(np.int32) - return out_boxes_list, out_boxes_num diff --git a/ppstructure/layout/predict_layout.py b/ppstructure/layout/predict_layout.py deleted file mode 100644 index 2fb4b462..00000000 --- a/ppstructure/layout/predict_layout.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..'))) - -os.environ["FLAGS_allocator_strategy"] = 'auto_growth' - -import cv2 -import numpy as np -import time - -import tools.infer.utility as utility -from ppocr.data import create_operators, transform -from ppocr.postprocess import build_post_process -from ppocr.utils.logging import get_logger -from ppocr.utils.utility import get_image_file_list, check_and_read_gif -from ppstructure.utility import parse_args -from picodet_postprocess import PicoDetPostProcess - -logger = get_logger() - - -class LayoutPredictor(object): - def __init__(self, args): - pre_process_list = [{ - 'Resize': { - 'size': [800, 608] - } - }, { - 'NormalizeImage': { - 'std': [0.229, 0.224, 0.225], - 'mean': [0.485, 0.456, 0.406], - 'scale': '1./255.', - 'order': 'hwc' - } - }, { - 'ToCHWImage': None - }, { - 'KeepKeys': { - 'keep_keys': ['image'] - } - }] - # postprocess_params = { - # 'name': 'LayoutPostProcess', - # "character_dict_path": args.layout_dict_path, - # } - - self.preprocess_op = create_operators(pre_process_list) - # self.postprocess_op = build_post_process(postprocess_params) - self.predictor, self.input_tensor, self.output_tensors, self.config = \ - utility.create_predictor(args, 'layout', logger) - - def __call__(self, img): - ori_im = img.copy() - data = {'image': img} - data = transform(data, self.preprocess_op) - img = data[0] - - if img is None: - return None, 0 - - img = np.expand_dims(img, axis=0) - img = img.copy() - - preds, elapse = 0, 1 - starttime = time.time() - - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - - # outputs = [] - # for output_tensor in self.output_tensors: - # output = output_tensor.copy_to_cpu() - # outputs.append(output) - np_score_list, np_boxes_list = [], [] - output_names = self.predictor.get_output_names() - num_outs = int(len(output_names) / 2) - for out_idx in range(num_outs): - np_score_list.append( - self.predictor.get_output_handle(output_names[out_idx]) - .copy_to_cpu()) - np_boxes_list.append( - self.predictor.get_output_handle(output_names[ - out_idx + num_outs]).copy_to_cpu()) - # result = dict(boxes=np_score_list, boxes_num=np_boxes_list) - postprocessor = PicoDetPostProcess( - (800, 608), [[800., 608.]], - np.array([[1.010101, 0.99346405]]), - strides=[8, 16, 32, 64], - nms_threshold=0.5) - np_boxes, np_boxes_num = postprocessor(np_score_list, np_boxes_list) - result = dict(boxes=np_boxes, boxes_num=np_boxes_num) - # print(result) - im_bboxes_num = result['boxes_num'][0] - # print('im_bboxes_num:',im_bboxes_num) - - bboxs = result['boxes'][0:0 + im_bboxes_num, :] - threshold = 0.5 - expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) - np_boxes = np_boxes[expect_boxes, :] - preds = [] - - id2label = {1: 'text', 2: 'title', 3: 'list', 4: 'table', 5: 'figure'} - for dt in np_boxes: - clsid, bbox, score = int(dt[0]), dt[2:], dt[1] - label = id2label[clsid + 1] - result_di = {'bbox': bbox, 'label': label} - preds.append(result_di) - # print('result_di',result_di) - # print('clsid, bbox, score:',clsid, bbox, score) - - elapse = time.time() - starttime - return preds, elapse - - -def main(args): - image_file_list = get_image_file_list(args.image_dir) - layout_predictor = LayoutPredictor(args) - count = 0 - total_time = 0 - - for image_file in image_file_list: - img, flag = check_and_read_gif(image_file) - if not flag: - img = cv2.imread(image_file) - if img is None: - logger.info("error in loading image:{}".format(image_file)) - continue - layout_res, elapse = layout_predictor(img) - - logger.info("result: {}".format(layout_res)) - - if count > 0: - total_time += elapse - count += 1 - logger.info("Predict time of {}: {}".format(image_file, elapse)) - - -if __name__ == "__main__": - main(parse_args()) -- GitLab