未验证 提交 4c29b070 编写于 作者: E Evezerest 提交者: GitHub

Merge pull request #6585 from maxbachmann/release/2.5

replace GPL licensed components
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import Levenshtein from rapidfuzz.distance import Levenshtein
import string import string
...@@ -45,8 +45,7 @@ class RecMetric(object): ...@@ -45,8 +45,7 @@ class RecMetric(object):
if self.is_filter: if self.is_filter:
pred = self._normalize_text(pred) pred = self._normalize_text(pred)
target = self._normalize_text(target) target = self._normalize_text(target)
norm_edit_dis += Levenshtein.distance(pred, target) / max( norm_edit_dis += Levenshtein.normalized_distance(pred, target)
len(pred), len(target), 1)
if pred == target: if pred == target:
correct_num += 1 correct_num += 1
all_num += 1 all_num += 1
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Apache 2.0 License for more details. # Apache 2.0 License for more details.
import distance from rapidfuzz.distance import Levenshtein
from apted import APTED, Config from apted import APTED, Config
from apted.helpers import Tree from apted.helpers import Tree
from lxml import etree, html from lxml import etree, html
...@@ -39,17 +39,6 @@ class TableTree(Tree): ...@@ -39,17 +39,6 @@ class TableTree(Tree):
class CustomConfig(Config): class CustomConfig(Config):
@staticmethod
def maximum(*sequences):
"""Get maximum possible value
"""
return max(map(len, sequences))
def normalized_distance(self, *sequences):
"""Get distance from 0 to 1
"""
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
def rename(self, node1, node2): def rename(self, node1, node2):
"""Compares attributes of trees""" """Compares attributes of trees"""
#print(node1.tag) #print(node1.tag)
...@@ -58,23 +47,12 @@ class CustomConfig(Config): ...@@ -58,23 +47,12 @@ class CustomConfig(Config):
if node1.tag == 'td': if node1.tag == 'td':
if node1.content or node2.content: if node1.content or node2.content:
#print(node1.content, ) #print(node1.content, )
return self.normalized_distance(node1.content, node2.content) return Levenshtein.normalized_distance(node1.content, node2.content)
return 0. return 0.
class CustomConfig_del_short(Config): class CustomConfig_del_short(Config):
@staticmethod
def maximum(*sequences):
"""Get maximum possible value
"""
return max(map(len, sequences))
def normalized_distance(self, *sequences):
"""Get distance from 0 to 1
"""
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
def rename(self, node1, node2): def rename(self, node1, node2):
"""Compares attributes of trees""" """Compares attributes of trees"""
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
...@@ -90,21 +68,10 @@ class CustomConfig_del_short(Config): ...@@ -90,21 +68,10 @@ class CustomConfig_del_short(Config):
node1_content = ['####'] node1_content = ['####']
if len(node2_content) < 3: if len(node2_content) < 3:
node2_content = ['####'] node2_content = ['####']
return self.normalized_distance(node1_content, node2_content) return Levenshtein.normalized_distance(node1_content, node2_content)
return 0. return 0.
class CustomConfig_del_block(Config): class CustomConfig_del_block(Config):
@staticmethod
def maximum(*sequences):
"""Get maximum possible value
"""
return max(map(len, sequences))
def normalized_distance(self, *sequences):
"""Get distance from 0 to 1
"""
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
def rename(self, node1, node2): def rename(self, node1, node2):
"""Compares attributes of trees""" """Compares attributes of trees"""
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
...@@ -120,7 +87,7 @@ class CustomConfig_del_block(Config): ...@@ -120,7 +87,7 @@ class CustomConfig_del_block(Config):
while ' ' in node2_content: while ' ' in node2_content:
print(node2_content.index(' ')) print(node2_content.index(' '))
node2_content.pop(node2_content.index(' ')) node2_content.pop(node2_content.index(' '))
return self.normalized_distance(node1_content, node2_content) return Levenshtein.normalized_distance(node1_content, node2_content)
return 0. return 0.
class TEDS(object): class TEDS(object):
......
...@@ -20,7 +20,7 @@ from shapely.geometry import Polygon ...@@ -20,7 +20,7 @@ from shapely.geometry import Polygon
import numpy as np import numpy as np
from collections import defaultdict from collections import defaultdict
import operator import operator
import Levenshtein from rapidfuzz.distance import Levenshtein
import argparse import argparse
import json import json
import copy import copy
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册