diff --git a/decoders/swig/ctc_decoders.cpp b/decoders/swig/ctc_decoders.cpp index e86bfe0f2c554e56f54283db79444985f863fb41..86598eee6e0513d74111a3702586549e59ef1464 100644 --- a/decoders/swig/ctc_decoders.cpp +++ b/decoders/swig/ctc_decoders.cpp @@ -18,7 +18,7 @@ std::string ctc_greedy_decoder( const std::vector &vocabulary) { // dimension check size_t num_time_steps = probs_seq.size(); - for (size_t i = 0; i < num_time_steps; i++) { + for (size_t i = 0; i < num_time_steps; ++i) { VALID_CHECK_EQ(probs_seq[i].size(), vocabulary.size() + 1, "The shape of probs_seq does not match with " @@ -28,7 +28,7 @@ std::string ctc_greedy_decoder( size_t blank_id = vocabulary.size(); std::vector max_idx_vec; - for (size_t i = 0; i < num_time_steps; i++) { + for (size_t i = 0; i < num_time_steps; ++i) { double max_prob = 0.0; size_t max_idx = 0; for (size_t j = 0; j < probs_seq[i].size(); j++) { @@ -41,14 +41,14 @@ std::string ctc_greedy_decoder( } std::vector idx_vec; - for (size_t i = 0; i < max_idx_vec.size(); i++) { + for (size_t i = 0; i < max_idx_vec.size(); ++i) { if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) { idx_vec.push_back(max_idx_vec[i]); } } std::string best_path_result; - for (size_t i = 0; i < idx_vec.size(); i++) { + for (size_t i = 0; i < idx_vec.size(); ++i) { if (idx_vec[i] != blank_id) { best_path_result += vocabulary[idx_vec[i]]; } @@ -65,7 +65,7 @@ std::vector> ctc_beam_search_decoder( Scorer *ext_scorer) { // dimension check size_t num_time_steps = probs_seq.size(); - for (size_t i = 0; i < num_time_steps; i++) { + for (size_t i = 0; i < num_time_steps; ++i) { VALID_CHECK_EQ(probs_seq[i].size(), vocabulary.size() + 1, "The shape of probs_seq does not match with " @@ -111,7 +111,7 @@ std::vector> ctc_beam_search_decoder( for (size_t time_step = 0; time_step < num_time_steps; time_step++) { std::vector prob = probs_seq[time_step]; std::vector> prob_idx; - for (size_t i = 0; i < prob.size(); i++) { + for (size_t i = 0; i < prob.size(); ++i) { prob_idx.push_back(std::pair(i, prob[i])); } @@ -134,7 +134,7 @@ std::vector> ctc_beam_search_decoder( if (cutoff_prob < 1.0) { double cum_prob = 0.0; cutoff_len = 0; - for (size_t i = 0; i < prob_idx.size(); i++) { + for (size_t i = 0; i < prob_idx.size(); ++i) { cum_prob += prob_idx[i].second; cutoff_len += 1; if (cum_prob >= cutoff_prob) break; @@ -145,7 +145,7 @@ std::vector> ctc_beam_search_decoder( prob_idx.begin(), prob_idx.begin() + cutoff_len); } std::vector> log_prob_idx; - for (size_t i = 0; i < cutoff_len; i++) { + for (size_t i = 0; i < cutoff_len; ++i) { log_prob_idx.push_back(std::pair( prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN))); } @@ -155,7 +155,7 @@ std::vector> ctc_beam_search_decoder( auto c = log_prob_idx[index].first; float log_prob_c = log_prob_idx[index].second; - for (size_t i = 0; i < prefixes.size() && i < beam_size; i++) { + for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) { auto prefix = prefixes[i]; if (full_beam && log_prob_c + prefix->score < min_cutoff) { @@ -222,14 +222,14 @@ std::vector> ctc_beam_search_decoder( prefixes.end(), prefix_compare); - for (size_t i = beam_size; i < prefixes.size(); i++) { + for (size_t i = beam_size; i < prefixes.size(); ++i) { prefixes[i]->remove(); } } } // end of loop over time // compute aproximate ctc score as the return score - for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { double approx_ctc = prefixes[i]->score; if (ext_scorer != nullptr) { @@ -249,14 +249,14 @@ std::vector> ctc_beam_search_decoder( // allow for the post processing std::vector space_prefixes; if (space_prefixes.empty()) { - for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) { + for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { space_prefixes.push_back(prefixes[i]); } } std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare); std::vector> output_vecs; - for (size_t i = 0; i < beam_size && i < space_prefixes.size(); i++) { + for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) { std::vector output; space_prefixes[i]->get_path_vec(output); // convert index to string @@ -301,7 +301,7 @@ ctc_beam_search_decoder_batch( // enqueue the tasks of decoding std::vector>>> res; - for (size_t i = 0; i < batch_size; i++) { + for (size_t i = 0; i < batch_size; ++i) { res.emplace_back(pool.enqueue(ctc_beam_search_decoder, probs_split[i], beam_size, @@ -313,7 +313,7 @@ ctc_beam_search_decoder_batch( // get decoding results std::vector>> batch_results; - for (size_t i = 0; i < batch_size; i++) { + for (size_t i = 0; i < batch_size; ++i) { batch_results.emplace_back(res[i].get()); } return batch_results; diff --git a/models/aishell/download_model.sh b/models/aishell/download_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..4368ee55af8c062c2ac5d7e1bcc56d086a186887 --- /dev/null +++ b/models/aishell/download_model.sh @@ -0,0 +1,19 @@ +#! /usr/bin/bash + +source ../../utils/utility.sh + +URL='http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274' +MD5=28521a58552885a81cf92a1e9b133a71 +TARGET=./aishell_model.tar.gz + + +echo "Download Aishell model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download Aishell model!" + exit 1 +fi +tar -zxvf $TARGET + + +exit 0 diff --git a/models/librispeech/download_model.sh b/models/librispeech/download_model.sh index 4408f6c1c7f648a8b3688959a782dfaaee628711..b5fcd7d8c133ea27d1f10d90b8d09e15821a220e 100644 --- a/models/librispeech/download_model.sh +++ b/models/librispeech/download_model.sh @@ -2,9 +2,8 @@ source ../../utils/utility.sh -# TODO: add urls -URL='to-be-added' -MD5=5b4af224b26c1dc4dd972b7d32f2f52a +URL='http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae' +MD5=ea5024a457a91179472f6dfee60e053d TARGET=./librispeech_model.tar.gz diff --git a/models/lm/download_lm_ch.sh b/models/lm/download_lm_ch.sh new file mode 100644 index 0000000000000000000000000000000000000000..7f1c47a27641cb07e4ab638b2949e667abcc473d --- /dev/null +++ b/models/lm/download_lm_ch.sh @@ -0,0 +1,18 @@ +#! /usr/bin/bash + +source ../../utils/utility.sh + +URL=http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e +MD5="29e02312deb2e59b3c8686c7966d4fe3" +TARGET=./zh_giga.no_cna_cmn.prune01244.klm + + +echo "Download language model ..." +download $URL $MD5 $TARGET +if [ $? -ne 0 ]; then + echo "Fail to download the language model!" + exit 1 +fi + + +exit 0 diff --git a/requirements.txt b/requirements.txt index 131f75ff47e003f3b44f4a62f1431cf13d4f44a4..e104f633c792ee9be4f2864e99aea64924a1e2fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,3 @@ scipy==0.13.1 resampy==0.1.5 SoundFile==0.9.0.post1 python_speech_features -https://github.com/luotao1/kenlm/archive/master.zip diff --git a/utils/utility.sh b/utils/utility.sh index aa0ec002bca0597bd61728180e2fb1ccdd02e5d6..f242b7640b6ac2af42aaa450261ddfb43bf7357e 100644 --- a/utils/utility.sh +++ b/utils/utility.sh @@ -11,10 +11,9 @@ download() { fi fi - wget -c $URL -P `dirname "$TARGET"` + wget -c $URL -O "$TARGET" md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'` if [ ! $MD5 == $md5_result ]; then - echo "Fail to download the language model!" return 1 fi }