decoder.py 2.6 KB
Newer Older
1 2 3 4 5 6
"""
    CTC-like decoder utilitis.
"""

from itertools import groupby
import numpy as np
Y
Yibing Liu 已提交
7
from ctc_beam_search_decoder import *
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39


def ctc_best_path_decode(probs_seq, vocabulary):
    """
    Best path decoding, also called argmax decoding or greedy decoding.
    Path consisting of the most probable tokens are further post-processed to
    remove consecutive repetitions and all blanks.

    :param probs_seq: 2-D list of probabilities over the vocabulary for each
                      character. Each element is a list of float probabilities
                      for one character.
    :type probs_seq: list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :return: Decoding result string.
    :rtype: baseline
    """
    # dimension verification
    for probs in probs_seq:
        if not len(probs) == len(vocabulary) + 1:
            raise ValueError("probs_seq dimension mismatchedd with vocabulary")
    # argmax to get the best index for each time step
    max_index_list = list(np.array(probs_seq).argmax(axis=1))
    # remove consecutive duplicate indexes
    index_list = [index_group[0] for index_group in groupby(max_index_list)]
    # remove blank indexes
    blank_index = len(vocabulary)
    index_list = [index for index in index_list if index != blank_index]
    # convert index list to string
    return ''.join([vocabulary[index] for index in index_list])


Y
Yibing Liu 已提交
40 41 42 43 44
def ctc_decode(probs_seq,
               vocabulary,
               method,
               beam_size=None,
               num_results_per_sample=None):
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
    """
    CTC-like sequence decoding from a sequence of likelihood probablilites. 

    :param probs_seq: 2-D list of probabilities over the vocabulary for each
                      character. Each element is a list of float probabilities
                      for one character.
    :type probs_seq: list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :param method: Decoding method name, with options: "best_path".
    :type method: basestring
    :return: Decoding result string.
    :rtype: baseline
    """
    for prob_list in probs_seq:
        if not len(prob_list) == len(vocabulary) + 1:
            raise ValueError("probs dimension mismatchedd with vocabulary")
    if method == "best_path":
        return ctc_best_path_decode(probs_seq, vocabulary)
Y
Yibing Liu 已提交
64 65 66 67 68 69 70
    elif method == "beam_search":
        return ctc_beam_search_decoder(
            input_probs_matrix=probs_seq,
            vocabulary=vocabulary,
            beam_size=beam_size,
            blank_id=len(vocabulary),
            num_results_per_sample=num_results_per_sample)
71
    else:
Y
Yibing Liu 已提交
72
        raise ValueError("Decoding method [%s] is not supported." % method)