diff --git a/configs/e2e/e2e_r50_vd_pg.yml b/configs/e2e/e2e_r50_vd_pg.yml index 0bacf12d49c4b47c53e6e12ec8fe73cea843bed5..2f1fab2ee15eba1cd3abccff7552aa0fd43621d1 100644 --- a/configs/e2e/e2e_r50_vd_pg.yml +++ b/configs/e2e/e2e_r50_vd_pg.yml @@ -60,8 +60,10 @@ PostProcess: name: PGPostProcess score_thresh: 0.5 mode: fast # fast or slow two ways + Metric: name: E2EMetric + mode: A # two ways for eval, A: label from txt, B: label from gt_mat gt_mat_dir: ./train_data/total_text/gt # the dir of gt_mat character_dict_path: ppocr/utils/ic15_dict.txt main_indicator: f_score_e2e @@ -76,7 +78,7 @@ Train: - DecodeImage: # load image img_mode: BGR channel_first: False - - E2ELabelEncode: + - E2ELabelEncode_train: - PGProcessTrain: batch_size: 14 # same as loader: batch_size_per_card min_crop_size: 24 @@ -99,6 +101,7 @@ Eval: - DecodeImage: # load image img_mode: RGB channel_first: False + - E2ELabelEncode_test: - E2EResizeForTest: max_side_len: 768 - NormalizeImage: @@ -108,7 +111,7 @@ Eval: order: 'hwc' - ToCHWImage: - KeepKeys: - keep_keys: [ 'image', 'shape', 'img_id'] + keep_keys: [ 'image', 'shape', 'polys', 'texts', 'tags', 'img_id'] loader: shuffle: False drop_last: False diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 61cc730359a7711a857079524a318040e24ceabb..0225001f296f570995a88a53b5366996db3cc12b 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -187,7 +187,51 @@ class CTCLabelEncode(BaseRecLabelEncode): return dict_character -class E2ELabelEncode(object): +class E2ELabelEncode_test(BaseRecLabelEncode): + def __init__(self, + max_text_length, + character_dict_path=None, + character_type='EN', + use_space_char=False, + **kwargs): + super(E2ELabelEncode_test, + self).__init__(max_text_length, character_dict_path, + character_type, use_space_char) + + def __call__(self, data): + import json + padnum = len(self.dict) + label = data['label'] + label = json.loads(label) + nBox = len(label) + boxes, txts, txt_tags = [], [], [] + for bno in range(0, nBox): + box = label[bno]['points'] + txt = label[bno]['transcription'] + boxes.append(box) + txts.append(txt) + if txt in ['*', '###']: + txt_tags.append(True) + else: + txt_tags.append(False) + boxes = np.array(boxes, dtype=np.float32) + txt_tags = np.array(txt_tags, dtype=np.bool) + data['polys'] = boxes + data['tags'] = txt_tags + temp_texts = [] + for text in txts: + text = text.lower() + text = self.encode(text) + if text is None: + return None + text = text + [padnum] * (self.max_text_len - len(text) + ) # use 36 to pad + temp_texts.append(text) + data['texts'] = np.array(temp_texts) + return data + + +class E2ELabelEncode_train(object): def __init__(self, **kwargs): pass @@ -211,7 +255,7 @@ class E2ELabelEncode(object): data['polys'] = boxes data['texts'] = txts - data['ignore_tags'] = txt_tags + data['tags'] = txt_tags return data diff --git a/ppocr/data/imaug/pg_process.py b/ppocr/data/imaug/pg_process.py index 53031064c019ddce00c7546f898ac67a7f0459f9..0012466bce710753bc41107b946fffda5e9d47b1 100644 --- a/ppocr/data/imaug/pg_process.py +++ b/ppocr/data/imaug/pg_process.py @@ -750,7 +750,7 @@ class PGProcessTrain(object): input_size = 512 im = data['image'] text_polys = data['polys'] - text_tags = data['ignore_tags'] + text_tags = data['tags'] text_strs = data['texts'] h, w, _ = im.shape text_polys, text_tags, hv_tags = self.check_and_validate_polys( diff --git a/ppocr/metrics/e2e_metric.py b/ppocr/metrics/e2e_metric.py index 525aa0035b1070c5dc8a80939fc691f992fd05e7..41b7ac2bad041295ad67a2de3461c109cf76a84a 100644 --- a/ppocr/metrics/e2e_metric.py +++ b/ppocr/metrics/e2e_metric.py @@ -18,16 +18,18 @@ from __future__ import print_function __all__ = ['E2EMetric'] -from ppocr.utils.e2e_metric.Deteval import get_socre, combine_results +from ppocr.utils.e2e_metric.Deteval import get_socre_A, get_socre_B, combine_results from ppocr.utils.e2e_utils.extract_textpoint_slow import get_dict class E2EMetric(object): def __init__(self, + mode, gt_mat_dir, character_dict_path, main_indicator='f_score_e2e', **kwargs): + self.mode = mode self.gt_mat_dir = gt_mat_dir self.label_list = get_dict(character_dict_path) self.max_index = len(self.label_list) @@ -35,13 +37,45 @@ class E2EMetric(object): self.reset() def __call__(self, preds, batch, **kwargs): - img_id = batch[2][0] - e2e_info_list = [{ - 'points': det_polyon, - 'texts': pred_str - } for det_polyon, pred_str in zip(preds['points'], preds['texts'])] - result = get_socre(self.gt_mat_dir, img_id, e2e_info_list) - self.results.append(result) + if self.mode == 'A': + gt_polyons_batch = batch[2] + temp_gt_strs_batch = batch[3][0] + ignore_tags_batch = batch[4] + gt_strs_batch = [] + + for temp_list in temp_gt_strs_batch: + t = "" + for index in temp_list: + if index < self.max_index: + t += self.label_list[index] + gt_strs_batch.append(t) + + for pred, gt_polyons, gt_strs, ignore_tags in zip( + [preds], gt_polyons_batch, [gt_strs_batch], ignore_tags_batch): + # prepare gt + gt_info_list = [{ + 'points': gt_polyon, + 'text': gt_str, + 'ignore': ignore_tag + } for gt_polyon, gt_str, ignore_tag in + zip(gt_polyons, gt_strs, ignore_tags)] + # prepare det + e2e_info_list = [{ + 'points': det_polyon, + 'texts': pred_str + } for det_polyon, pred_str in + zip(pred['points'], pred['texts'])] + + result = get_socre_A(gt_info_list, e2e_info_list) + self.results.append(result) + else: + img_id = batch[5][0] + e2e_info_list = [{ + 'points': det_polyon, + 'texts': pred_str + } for det_polyon, pred_str in zip(preds['points'], preds['texts'])] + result = get_socre_B(self.gt_mat_dir, img_id, e2e_info_list) + self.results.append(result) def get_metric(self): metircs = combine_results(self.results) diff --git a/ppocr/utils/e2e_metric/Deteval.py b/ppocr/utils/e2e_metric/Deteval.py index 2aa09304600ddb228ca9b70c8d7e860ac3205d19..45567a7dd2d82b6c583abd4a4eabef52974be081 100755 --- a/ppocr/utils/e2e_metric/Deteval.py +++ b/ppocr/utils/e2e_metric/Deteval.py @@ -17,7 +17,144 @@ import scipy.io as io from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area -def get_socre(gt_dir, img_id, pred_dict): +def get_socre_A(gt_dir, pred_dict): + allInputs = 1 + + def input_reading_mod(pred_dict): + """This helper reads input from txt files""" + det = [] + n = len(pred_dict) + for i in range(n): + points = pred_dict[i]['points'] + text = pred_dict[i]['texts'] + point = ",".join(map(str, points.reshape(-1, ))) + det.append([point, text]) + return det + + def gt_reading_mod(gt_dict): + """This helper reads groundtruths from mat files""" + gt = [] + n = len(gt_dict) + for i in range(n): + points = gt_dict[i]['points'].tolist() + h = len(points) + text = gt_dict[i]['text'] + xx = [ + np.array( + ['x:'], dtype=' 1): + gt_x = list(map(int, np.squeeze(gt[1]))) + gt_y = list(map(int, np.squeeze(gt[3]))) + for det_id, detection in enumerate(detections): + detection_orig = detection + detection = [float(x) for x in detection[0].split(',')] + detection = list(map(int, detection)) + det_x = detection[0::2] + det_y = detection[1::2] + det_gt_iou = iod(det_x, det_y, gt_x, gt_y) + if det_gt_iou > threshold: + detections[det_id] = [] + + detections[:] = [item for item in detections if item != []] + return detections + + def sigma_calculation(det_x, det_y, gt_x, gt_y): + """ + sigma = inter_area / gt_area + """ + return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / + area(gt_x, gt_y)), 2) + + def tau_calculation(det_x, det_y, gt_x, gt_y): + if area(det_x, det_y) == 0.0: + return 0 + return np.round((area_of_intersection(det_x, det_y, gt_x, gt_y) / + area(det_x, det_y)), 2) + + ##############################Initialization################################### + # global_sigma = [] + # global_tau = [] + # global_pred_str = [] + # global_gt_str = [] + ############################################################################### + + for input_id in range(allInputs): + if (input_id != '.DS_Store') and (input_id != 'Pascal_result.txt') and ( + input_id != 'Pascal_result_curved.txt') and (input_id != 'Pascal_result_non_curved.txt') and ( + input_id != 'Deteval_result.txt') and (input_id != 'Deteval_result_curved.txt') \ + and (input_id != 'Deteval_result_non_curved.txt'): + detections = input_reading_mod(pred_dict) + groundtruths = gt_reading_mod(gt_dir) + detections = detection_filtering( + detections, + groundtruths) # filters detections overlapping with DC area + dc_id = [] + for i in range(len(groundtruths)): + if groundtruths[i][5] == '#': + dc_id.append(i) + cnt = 0 + for a in dc_id: + num = a - cnt + del groundtruths[num] + cnt += 1 + + local_sigma_table = np.zeros((len(groundtruths), len(detections))) + local_tau_table = np.zeros((len(groundtruths), len(detections))) + local_pred_str = {} + local_gt_str = {} + + for gt_id, gt in enumerate(groundtruths): + if len(detections) > 0: + for det_id, detection in enumerate(detections): + detection_orig = detection + detection = [float(x) for x in detection[0].split(',')] + detection = list(map(int, detection)) + pred_seq_str = detection_orig[1].strip() + det_x = detection[0::2] + det_y = detection[1::2] + gt_x = list(map(int, np.squeeze(gt[1]))) + gt_y = list(map(int, np.squeeze(gt[3]))) + gt_seq_str = str(gt[4].tolist()[0]) + + local_sigma_table[gt_id, det_id] = sigma_calculation( + det_x, det_y, gt_x, gt_y) + local_tau_table[gt_id, det_id] = tau_calculation( + det_x, det_y, gt_x, gt_y) + local_pred_str[det_id] = pred_seq_str + local_gt_str[gt_id] = gt_seq_str + + global_sigma = local_sigma_table + global_tau = local_tau_table + global_pred_str = local_pred_str + global_gt_str = local_gt_str + + single_data = {} + single_data['sigma'] = global_sigma + single_data['global_tau'] = global_tau + single_data['global_pred_str'] = global_pred_str + single_data['global_gt_str'] = global_gt_str + return single_data + + +def get_socre_B(gt_dir, img_id, pred_dict): allInputs = 1 def input_reading_mod(pred_dict):