From 6698fe640de1ff024f756330a00569a4df7de6d3 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 7 Jun 2017 14:57:04 +0800 Subject: [PATCH] modify language model scoring --- deep_speech_2/decoder.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/deep_speech_2/decoder.py b/deep_speech_2/decoder.py index e16d1054..458cd9ad 100755 --- a/deep_speech_2/decoder.py +++ b/deep_speech_2/decoder.py @@ -52,13 +52,19 @@ class Scorer(object): """ def __init__(self, alpha, beta, model_path): - self._alpha = alpha self._beta = beta self._language_model = kenlm.LanguageModel(model_path) def language_model_score(self, sentence, bos=True, eos=False): - log_prob = self._language_model.score(sentence, bos, eos) + words = sentence.strip().split(' ') + length = len(words) + if length == 1: + log_prob = self._language_model.score(sentence, bos, eos) + else: + prefix_sent = ' '.join(words[0:length - 1]) + log_prob = self._language_model.score(sentence, bos, eos) \ + - self._language_model.score(prefix_sent, bos, eos) return np.power(10, log_prob) def word_count(self, sentence): -- GitLab