提交 6698fe64 编写于 作者: Y Yibing Liu

modify language model scoring

上级 1d7ade10
...@@ -52,13 +52,19 @@ class Scorer(object): ...@@ -52,13 +52,19 @@ class Scorer(object):
""" """
def __init__(self, alpha, beta, model_path): def __init__(self, alpha, beta, model_path):
self._alpha = alpha self._alpha = alpha
self._beta = beta self._beta = beta
self._language_model = kenlm.LanguageModel(model_path) self._language_model = kenlm.LanguageModel(model_path)
def language_model_score(self, sentence, bos=True, eos=False): def language_model_score(self, sentence, bos=True, eos=False):
log_prob = self._language_model.score(sentence, bos, eos) words = sentence.strip().split(' ')
length = len(words)
if length == 1:
log_prob = self._language_model.score(sentence, bos, eos)
else:
prefix_sent = ' '.join(words[0:length - 1])
log_prob = self._language_model.score(sentence, bos, eos) \
- self._language_model.score(prefix_sent, bos, eos)
return np.power(10, log_prob) return np.power(10, log_prob)
def word_count(self, sentence): def word_count(self, sentence):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册