提交 ec307398 编写于 作者: H Hai Liang Wang

Fix invalid cosine value

上级 e141ffe9
......@@ -108,6 +108,12 @@ class Test(unittest.TestCase):
r = synonyms.compare(sen1, sen2, seg=True)
print("%s vs %s" % (sen1, sen2), r)
sen1 = "你们好呀"
sen2 = "大家好"
r = synonyms.compare(sen1, sen2, seg=False)
print("%s vs %s" % (sen1, sen2), r)
def test_nearby(self):
synonyms.display("奥运") # synonyms.display calls synonyms.nearby
synonyms.display("北新桥") # synonyms.display calls synonyms.nearby
......@@ -118,5 +124,5 @@ def test():
if __name__ == '__main__':
FLAGS([__file__, '--verbosity', '-2'])
FLAGS([__file__, '--verbosity', '1'])
test()
......@@ -13,7 +13,7 @@ Welcome
setup(
name='synonyms',
version='3.3.9',
version='3.3.10',
description='Chinese Synonyms for Natural Language Processing and Understanding',
long_description=LONGDOC,
author='Hai Liang Wang, Hu Ying Xi',
......
......@@ -20,7 +20,7 @@ from __future__ import division
__copyright__ = "Copyright (c) 2017 . All Rights Reserved"
__author__ = "Hu Ying Xi<>, Hai Liang Wang<hailiang.hl.wang@gmail.com>"
__date__ = "2017-09-27"
__version__ = "3.3.9"
__version__ = "3.3.10"
import os
import sys
......@@ -53,6 +53,7 @@ from .utils import any2utf8
from .utils import any2unicode
from .utils import sigmoid
from .utils import cosine
from .utils import is_digit
import jieba
from .jieba import posseg as _tokenizer
......@@ -226,20 +227,14 @@ def _similarity_distance(s1, s2):
'''
compute similarity with distance measurement
'''
# g = 0.0
g = 0.0
try:
g = cosine(_flat_sum_array(_get_wv(s1)), _flat_sum_array(_get_wv(s2)))
g_ = cosine(_flat_sum_array(_get_wv(s1)), _flat_sum_array(_get_wv(s2)))
if is_digit(g_): g = g_
except: pass
try:
g_nan_num = np.isnan(g).sum()
if g_nan_num == 100:
g = 0.0
except:
pass
u = _nearby_levenshtein_distance(s1, s2)
# print("g: %s, u: %s" % (g, u))
logging.debug("g: %s, u: %s" % (g, u))
if u >= 0.99:
r = 1.0
elif u > 0.9:
......
......@@ -252,6 +252,12 @@ def call_on_class_only(*args, **kwargs):
"""Raise exception when load methods are called on instance"""
raise AttributeError('This method should be called on a class object.')
def is_digit(obj):
'''
Check if an object is Number
'''
return isinstance(obj, (numbers.Integral, numbers.Complex, numbers.Real))
def is_zhs(str):
'''
Check if str is Chinese Word
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册