diff --git a/configs/table/SLANet.yml b/configs/table/SLANet.yml index 4858c71c28710c2f68faac6397d4f60fba7a4e95..2264eb14d229ebe314f65bf75bb2f12efc95a9c9 100644 --- a/configs/table/SLANet.yml +++ b/configs/table/SLANet.yml @@ -86,7 +86,8 @@ Train: loc_reg_num: *loc_reg_num max_text_length: *max_text_length - TableBoxEncode: - box_format: *box_format + in_box_format: *box_format + out_box_format: *box_format - ResizeTableImage: max_len: 488 - NormalizeImage: @@ -121,7 +122,8 @@ Eval: loc_reg_num: *loc_reg_num max_text_length: *max_text_length - TableBoxEncode: - box_format: *box_format + in_box_format: *box_format + out_box_format: *box_format - ResizeTableImage: max_len: 488 - NormalizeImage: diff --git a/configs/table/table_master.yml b/configs/table/table_master.yml index 1f50d20bfa4b2f3b28a6375580529e25f24aaa2d..df437f7c95523c5fe12f7166d011b4ad8473628b 100755 --- a/configs/table/table_master.yml +++ b/configs/table/table_master.yml @@ -90,7 +90,8 @@ Train: - PaddingTableImage: size: [480, 480] - TableBoxEncode: - box_format: *box_format + in_box_format: *box_format + out_box_format: *box_format - NormalizeImage: scale: 1./255. mean: [0.5, 0.5, 0.5] @@ -126,7 +127,8 @@ Eval: - PaddingTableImage: size: [480, 480] - TableBoxEncode: - box_format: *box_format + in_box_format: *box_format + out_box_format: *box_format - NormalizeImage: scale: 1./255. mean: [0.5, 0.5, 0.5] diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index d98facb5cbf7850e5bc96e5510ce2e6622e1a009..73d4ca430f6d3c234c640384d819a3c0dfa18470 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -749,28 +749,35 @@ class TableMasterLabelEncode(TableLabelEncode): class TableBoxEncode(object): - def __init__(self, box_format='xyxy', **kwargs): + def __init__(self, in_box_format='xyxy', out_box_format='xyxy', **kwargs): assert box_format in ['xywh', 'xyxy', 'xyxyxyxy'] - self.box_format = box_format + self.in_box_format = in_box_format + self.out_box_format = out_box_format def __call__(self, data): img_height, img_width = data['image'].shape[:2] bboxes = data['bboxes'] - if self.box_format == 'xywh' and bboxes.shape[1] == 4: - bboxes = self.xyxy2xywh(bboxes) + if self.in_box_format != self.out_box_format: + if self.out_box_format == 'xywh': + if self.in_box_format == 'xyxyxyxy': + bboxes = self.xyxyxyxy2xywh(bboxes) + elif self.in_box_format == 'xyxy': + bboxes = self.xyxy2xywh(bboxes) + bboxes[:, 0::2] /= img_width bboxes[:, 1::2] /= img_height data['bboxes'] = bboxes return data + def xyxyxyxy2xywh(self, boxes): + new_bboxes = np.zeros([len(bboxes), 4]) + new_bboxes[:, 0] = bboxes[:, 0::2].min() # x1 + new_bboxes[:, 1] = bboxes[:, 1::2].min() # y1 + new_bboxes[:, 2] = bboxes[:, 0::2].max() - new_bboxes[:, 0] # w + new_bboxes[:, 3] = bboxes[:, 1::2].max() - new_bboxes[:, 1] # h + return new_bboxes + def xyxy2xywh(self, bboxes): - """ - Convert coord (x1,y1,x2,y2) to (x,y,w,h). - where (x1,y1) is top-left, (x2,y2) is bottom-right. - (x,y) is bbox center and (w,h) is width and height. - :param bboxes: (x1, y1, x2, y2) - :return: - """ new_bboxes = np.empty_like(bboxes) new_bboxes[:, 0] = (bboxes[:, 0] + bboxes[:, 2]) / 2 # x center new_bboxes[:, 1] = (bboxes[:, 1] + bboxes[:, 3]) / 2 # y center