diff --git a/ppocr/metrics/rec_metric.py b/ppocr/metrics/rec_metric.py index 515b9372e38a7213cde29fdc9834ed6df45a0a80..6a13129eddc419c4bde70cd2c5a0c018035d63cd 100644 --- a/ppocr/metrics/rec_metric.py +++ b/ppocr/metrics/rec_metric.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import Levenshtein +from rapidfuzz.distance import Levenshtein import string @@ -45,8 +45,7 @@ class RecMetric(object): if self.is_filter: pred = self._normalize_text(pred) target = self._normalize_text(target) - norm_edit_dis += Levenshtein.distance(pred, target) / max( - len(pred), len(target), 1) + norm_edit_dis += Levenshtein.normalized_distance(pred, target) if pred == target: correct_num += 1 all_num += 1 diff --git a/ppstructure/table/table_metric/table_metric.py b/ppstructure/table/table_metric/table_metric.py index 9aca98ad785d4614a803fa5a277a6e4a27b3b078..923a9c0071d083de72a2a896d6f62037373d4e73 100755 --- a/ppstructure/table/table_metric/table_metric.py +++ b/ppstructure/table/table_metric/table_metric.py @@ -9,7 +9,7 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # Apache 2.0 License for more details. -import distance +from rapidfuzz.distance import Levenshtein from apted import APTED, Config from apted.helpers import Tree from lxml import etree, html @@ -39,17 +39,6 @@ class TableTree(Tree): class CustomConfig(Config): - @staticmethod - def maximum(*sequences): - """Get maximum possible value - """ - return max(map(len, sequences)) - - def normalized_distance(self, *sequences): - """Get distance from 0 to 1 - """ - return float(distance.levenshtein(*sequences)) / self.maximum(*sequences) - def rename(self, node1, node2): """Compares attributes of trees""" #print(node1.tag) @@ -58,23 +47,12 @@ class CustomConfig(Config): if node1.tag == 'td': if node1.content or node2.content: #print(node1.content, ) - return self.normalized_distance(node1.content, node2.content) + return Levenshtein.normalized_distance(node1.content, node2.content) return 0. class CustomConfig_del_short(Config): - @staticmethod - def maximum(*sequences): - """Get maximum possible value - """ - return max(map(len, sequences)) - - def normalized_distance(self, *sequences): - """Get distance from 0 to 1 - """ - return float(distance.levenshtein(*sequences)) / self.maximum(*sequences) - def rename(self, node1, node2): """Compares attributes of trees""" if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): @@ -90,21 +68,10 @@ class CustomConfig_del_short(Config): node1_content = ['####'] if len(node2_content) < 3: node2_content = ['####'] - return self.normalized_distance(node1_content, node2_content) + return Levenshtein.normalized_distance(node1_content, node2_content) return 0. class CustomConfig_del_block(Config): - @staticmethod - def maximum(*sequences): - """Get maximum possible value - """ - return max(map(len, sequences)) - - def normalized_distance(self, *sequences): - """Get distance from 0 to 1 - """ - return float(distance.levenshtein(*sequences)) / self.maximum(*sequences) - def rename(self, node1, node2): """Compares attributes of trees""" if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): @@ -120,7 +87,7 @@ class CustomConfig_del_block(Config): while ' ' in node2_content: print(node2_content.index(' ')) node2_content.pop(node2_content.index(' ')) - return self.normalized_distance(node1_content, node2_content) + return Levenshtein.normalized_distance(node1_content, node2_content) return 0. class TEDS(object): diff --git a/ppstructure/vqa/tools/eval_with_label_end2end.py b/ppstructure/vqa/tools/eval_with_label_end2end.py index b13ffb568fd9610fee5d5a246c501ed5b90de91a..b0fd84363f450dfb7e4ef18e53adc17ef088cf18 100644 --- a/ppstructure/vqa/tools/eval_with_label_end2end.py +++ b/ppstructure/vqa/tools/eval_with_label_end2end.py @@ -20,7 +20,7 @@ from shapely.geometry import Polygon import numpy as np from collections import defaultdict import operator -import Levenshtein +from rapidfuzz.distance import Levenshtein import argparse import json import copy diff --git a/requirements.txt b/requirements.txt index b15176db3eb42c381c1612f404fd15c6b020b3dc..976d29192abbbf89b8ee6064c0b4ec48d43ad268 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ lmdb tqdm numpy visualdl -python-Levenshtein +rapidfuzz opencv-contrib-python==4.4.0.46 cython lxml