diff --git a/paddlespeech/server/engine/asr/online/ctc_search.py b/paddlespeech/server/engine/asr/online/ctc_search.py index ad9647ef993588ce8d8c6d669b4634f8c9a159b6..06adb9cccc99505388184a32cbf3c72c9bfd9e12 100644 --- a/paddlespeech/server/engine/asr/online/ctc_search.py +++ b/paddlespeech/server/engine/asr/online/ctc_search.py @@ -83,8 +83,8 @@ class CTCPrefixBeamSearch: # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score)) # 0. blank_ending_score, # 1. none_blank_ending_score, - # 2. viterbi_blank ending, - # 3. viterbi_non_blank, + # 2. viterbi_blank ending score, + # 3. viterbi_non_blank score, # 4. current_token_prob, # 5. times_viterbi_blank, times_b # 6. times_titerbi_non_blank, times_nb @@ -110,63 +110,63 @@ class CTCPrefixBeamSearch: times_nb) in self.cur_hyps: last = prefix[-1] if len(prefix) > 0 else None if s == blank_id: # blank - n_pb, n_pnb, n_v_s, n_v_ns, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ + n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ prefix] n_pb = log_add([n_pb, pb + ps, pnb + ps]) pre_times = times_b if v_b_s > v_nb_s else times_nb n_times_b = copy.deepcopy(pre_times) viterbi_score = v_b_s if v_b_s > v_nb_s else v_nb_s - n_v_s = viterbi_score + ps - next_hyps[prefix] = (n_pb, n_pnb, n_v_s, n_v_ns, + n_v_b = viterbi_score + ps + next_hyps[prefix] = (n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb) elif s == last: # Update *ss -> *s; # case1: *a + a => *a - n_pb, n_pnb, n_v_s, n_v_ns, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ + n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ prefix] n_pnb = log_add([n_pnb, pnb + ps]) - if n_v_ns < v_nb_s + ps: - n_v_ns = v_nb_s + ps + if n_v_nb < v_nb_s + ps: + n_v_nb = v_nb_s + ps if n_cur_token_prob < ps: n_cur_token_prob = ps n_times_nb = copy.deepcopy(times_nb) n_times_nb[ -1] = self.abs_time_step # 注意,这里要重新使用绝对时间 - next_hyps[prefix] = (n_pb, n_pnb, n_v_s, n_v_ns, + next_hyps[prefix] = (n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb) # Update *s-s -> *ss, - is for blank # Case 2: *aε + a => *aa n_prefix = prefix + (s, ) - n_pb, n_pnb, n_v_s, n_v_ns, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ + n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ n_prefix] - if n_v_ns < v_b_s + ps: - n_v_ns = v_b_s + ps + if n_v_nb < v_b_s + ps: + n_v_nb = v_b_s + ps n_cur_token_prob = ps n_times_nb = copy.deepcopy(times_b) n_times_nb.append(self.abs_time_step) n_pnb = log_add([n_pnb, pb + ps]) - next_hyps[n_prefix] = (n_pb, n_pnb, n_v_s, n_v_ns, + next_hyps[n_prefix] = (n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb) else: # Case 3: *a + b => *ab, *aε + b => *ab n_prefix = prefix + (s, ) - n_pb, n_pnb, n_v_s, n_v_ns, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ + n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[ n_prefix] viterbi_score = v_b_s if v_b_s > v_nb_s else v_nb_s pre_times = times_b if v_b_s > v_nb_s else times_nb - if n_v_ns < viterbi_score + ps: - n_v_ns = viterbi_score + ps + if n_v_nb < viterbi_score + ps: + n_v_nb = viterbi_score + ps n_cur_token_prob = ps n_times_nb = copy.deepcopy(pre_times) n_times_nb.append(self.abs_time_step) n_pnb = log_add([n_pnb, pb + ps, pnb + ps]) - next_hyps[n_prefix] = (n_pb, n_pnb, n_v_s, n_v_ns, + next_hyps[n_prefix] = (n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb)