from__future__importabsolute_importfrom__future__importdivisionfrom__future__importprint_functionfromitertoolsimportgroupbyimportnumpyasnpdefctc_greedy_decoder(probs_seq,vocabulary):"""CTC greedy (best path) decoder. Path consisting of the most probable tokens are further post-processed to remove consecutive repetitions and all blanks. :param probs_seq: 2-D list of probabilities over the vocabulary for each character. Each element is a list of float probabilities for one character. :type probs_seq: list :param vocabulary: Vocabulary list. :type vocabulary: list :return: Decoding result string. :rtype: baseline """# dimension verificationforprobsinprobs_seq:ifnotlen(probs)==len(vocabulary)+1:raiseValueError("probs_seq dimension mismatchedd with vocabulary")# argmax to get the best index for each time stepmax_index_list=list(np.array(probs_seq).argmax(axis=1))# remove consecutive duplicate indexesindex_list=[index_group[0]forindex_groupingroupby(max_index_list)]# remove blank indexesblank_index=len(vocabulary)index_list=[indexforindexinindex_listifindex!=blank_index]# convert index list to stringreturn''.join([vocabulary[index]forindexinindex_list])