diff --git a/.gitignore b/.gitignore index 639472001a719aca5cb93e851ef1f628fc3cae9b..7328b329420c880e71b0f99879e5aa64de885632 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,12 @@ tools/Miniconda3-latest-Linux-x86_64.sh tools/activate_python.sh tools/miniconda.sh tools/CRF++-0.58/ +tools/liblbfgs-1.10/ +tools/srilm/ +tools/env.sh +tools/openfst-1.8.1/ +tools/libsndfile/ +tools/python-soundfile/ speechx/fc_patch/ diff --git a/speechx/examples/build_wfst/path.sh b/speechx/examples/build_wfst/path.sh new file mode 100644 index 0000000000000000000000000000000000000000..e4008cd2ce9514dbdfdfb84d601249fb32d3af51 --- /dev/null +++ b/speechx/examples/build_wfst/path.sh @@ -0,0 +1,27 @@ +# This contains the locations of binarys build required for running the examples. + +SPEECHX_ROOT=$PWD/../../../ +MAIN_ROOT=$SPEECHX_ROOT/../ +SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples + +SPEECHX_TOOLS=$SPEECHX_ROOT/tools +TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin + +[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; } + +export LC_AL=C + +export PATH=$PATH:$TOOLS_BIN + +# srilm +export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10 +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs +export SRILM=${MAIN_ROOT}/tools/srilm +export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64 + +# Kaldi +export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present, can not using Kaldi!" +[ -f $KALDI_ROOT/tools/config/common_path.sh ] && . $KALDI_ROOT/tools/config/common_path.sh diff --git a/speechx/examples/build_wfst/run.sh b/speechx/examples/build_wfst/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..bba14c59457fbc610cd288ca7bf72e8dbab9f835 --- /dev/null +++ b/speechx/examples/build_wfst/run.sh @@ -0,0 +1,64 @@ +#!/bin/bash +set -eo pipefail + +. path.sh + +stage=-1 +stop_stage=100 +corpus=aishell +lmtype=srilm + +lexicon= # aishell/resource_aishell/lexicon.txt +text= # aishell/data_aishell/transcript/aishell_transcript_v0.8.txt + +source parse_options.sh + +if [ ! which ngram-count ]; then + pushd $MAIN_ROOT/tools + make srilm.done + popd +fi + +if [ ! which fstprint ]; then + pushd $MAIN_ROOT/tools + make kaldi.done + popd +fi + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # 7.1 Prepare dict + unit_file=data/vocab.txt + mkdir -p data/local/dict + cp $unit_file data/local/dict/units.txt + utils/fst/prepare_dict.py \ + --unit_file $unit_file \ + --in_lexicon ${lexicon} \ + --out_lexicon data/local/dict/lexicon.txt +fi + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # 7.2 Train lm + lm=data/local/lm + mkdir -p data/train + mkdir -p $lm + utils/manifest_key_value.py \ + --manifest_path data/manifest.train \ + --output_path data/train + utils/filter_scp.pl data/train/text \ + $text > $lm/text + if [ $lmtype == 'srilm' ];then + local/aishell_train_lms.sh + else + utils/ngram_train.sh --order 3 $lm/text $lm/lm.arpa + fi +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # 7.3 Build decoding TLG + utils/fst/compile_lexicon_token_fst.sh \ + data/local/dict data/local/tmp data/local/lang + utils/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1; +fi + +echo "Aishell build TLG done." +exit 0 diff --git a/speechx/examples/ds2_ol/aishell/README.md b/speechx/examples/ds2_ol/aishell/README.md index eec67c3b24decf1bd89543d86a85b7e62075cadd..1ee73a338f599ac3938f06e9dbbcae039b5f69a1 100644 --- a/speechx/examples/ds2_ol/aishell/README.md +++ b/speechx/examples/ds2_ol/aishell/README.md @@ -10,12 +10,18 @@ Other -> 0.00 % N=0 C=0 S=0 D=0 I=0 ## CTC Prefix Beam Search w LM +LM: zh_giga.no_cna_cmn.prune01244.klm ``` - +Overall -> 7.86 % N=104768 C=96865 S=7573 D=330 I=327 +Mandarin -> 7.86 % N=104768 C=96865 S=7573 D=330 I=327 +Other -> 0.00 % N=0 C=0 S=0 D=0 I=0 ``` ## CTC WFST +LM: aishell train ``` - +Overall -> 11.14 % N=103017 C=93363 S=9583 D=71 I=1819 +Mandarin -> 11.14 % N=103017 C=93363 S=9583 D=71 I=1818 +Other -> 0.00 % N=0 C=0 S=0 D=0 I=1 ``` \ No newline at end of file diff --git a/speechx/examples/ds2_ol/aishell/path.sh b/speechx/examples/ds2_ol/aishell/path.sh index 8e26e6e7eef1421827c92bda7b4b5679677de8c9..0a300f362b8d46c6045e51298ca52fbe18db6f60 100644 --- a/speechx/examples/ds2_ol/aishell/path.sh +++ b/speechx/examples/ds2_ol/aishell/path.sh @@ -11,4 +11,4 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin export LC_AL=C SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/decoder:$SPEECHX_EXAMPLES/ds2_ol/feat -export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN +export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN \ No newline at end of file diff --git a/speechx/examples/ds2_ol/aishell/run.sh b/speechx/examples/ds2_ol/aishell/run.sh index 3a1c19ee4ce79407b4e80aac2dd77f671166f476..6a59ca9b8c08b58c47ea9a00acf813f88f490bca 100755 --- a/speechx/examples/ds2_ol/aishell/run.sh +++ b/speechx/examples/ds2_ol/aishell/run.sh @@ -5,7 +5,10 @@ set -e . path.sh nj=40 +stage=0 +stop_stage=100 +. utils/parse_options.sh # 1. compile if [ ! -d ${SPEECHX_EXAMPLES} ]; then @@ -26,102 +29,112 @@ vocb_dir=$ckpt_dir/data/lang_char/ mkdir -p exp exp=$PWD/exp -aishell_wav_scp=aishell_test.scp -if [ ! -d $data/test ]; then - pushd $data - wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip - unzip aishell_test.zip - popd - - realpath $data/test/*/*.wav > $data/wavlist - awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id - paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp -fi - - -if [ ! -d $ckpt_dir ]; then - mkdir -p $ckpt_dir - wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz - tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir -fi - -lm=$data/zh_giga.no_cna_cmn.prune01244.klm -if [ ! -f $lm ]; then - pushd $data - wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm - popd +if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then + aishell_wav_scp=aishell_test.scp + if [ ! -d $data/test ]; then + pushd $data + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip + unzip aishell_test.zip + popd + + realpath $data/test/*/*.wav > $data/wavlist + awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id + paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp + fi + + + if [ ! -d $ckpt_dir ]; then + mkdir -p $ckpt_dir + wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz + tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir + fi + + lm=$data/zh_giga.no_cna_cmn.prune01244.klm + if [ ! -f $lm ]; then + pushd $data + wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm + popd + fi fi # 3. make feature +text=$data/test/text label_file=./aishell_result wer=./aishell_wer export GLOG_logtostderr=1 -# 3. gen linear feat -cmvn=$PWD/cmvn.ark -cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + # 3. gen linear feat + cmvn=$data/cmvn.ark + cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn -./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj + ./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj -utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \ -linear-spectrogram-wo-db-norm-ol \ - --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ - --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \ - --cmvn_file=$cmvn \ - --streaming_chunk=0.36 - -text=$data/test/text + utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \ + linear-spectrogram-wo-db-norm-ol \ + --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ + --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \ + --cmvn_file=$cmvn \ + --streaming_chunk=0.36 +fi -# 4. recognizer -utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wolm.log \ - ctc-prefix-beam-search-decoder-ol \ - --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ - --model_path=$model_dir/avg_1.jit.pdmodel \ - --param_path=$model_dir/avg_1.jit.pdiparams \ - --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ - --dict_file=$vocb_dir/vocab.txt \ - --result_wspecifier=ark,t:$data/split${nj}/JOB/result - -cat $data/split${nj}/*/result > ${label_file} -utils/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer} - -# 4. decode with lm -utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.lm.log \ - ctc-prefix-beam-search-decoder-ol \ - --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ - --model_path=$model_dir/avg_1.jit.pdmodel \ - --param_path=$model_dir/avg_1.jit.pdiparams \ - --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ - --dict_file=$vocb_dir/vocab.txt \ - --lm_path=$lm \ - --result_wspecifier=ark,t:$data/split${nj}/JOB/result_lm - - -cat $data/split${nj}/*/result_lm > ${label_file}_lm -utils/compute-wer.py --char=1 --v=1 ${label_file}_lm $text > ${wer}_lm - - -graph_dir=./aishell_graph -if [ ! -d $ ]; then - wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip - unzip -d aishell_graph.zip +if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then + # recognizer + utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wolm.log \ + ctc-prefix-beam-search-decoder-ol \ + --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --param_path=$model_dir/avg_1.jit.pdiparams \ + --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ + --dict_file=$vocb_dir/vocab.txt \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/result + + cat $data/split${nj}/*/result > $exp/${label_file} + utils/compute-wer.py --char=1 --v=1 $exp/${label_file} $text > $exp/${wer} fi +if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then + # decode with lm + utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.lm.log \ + ctc-prefix-beam-search-decoder-ol \ + --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --param_path=$model_dir/avg_1.jit.pdiparams \ + --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ + --dict_file=$vocb_dir/vocab.txt \ + --lm_path=$lm \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/result_lm + + cat $data/split${nj}/*/result_lm > $exp/${label_file}_lm + utils/compute-wer.py --char=1 --v=1 $exp/${label_file}_lm $text > $exp/${wer}_lm +fi -# 5. test TLG decoder -utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wfst.log \ - wfst-decoder-ol \ - --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ - --model_path=$model_dir/avg_1.jit.pdmodel \ - --param_path=$model_dir/avg_1.jit.pdiparams \ - --word_symbol_table=$graph_dir/words.txt \ - --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ - --graph_path=$graph_dir/TLG.fst --max_active=7500 \ - --acoustic_scale=1.2 \ - --result_wspecifier=ark,t:$data/split${nj}/JOB/result_tlg +wfst=$data/wfst/ +mkdir -p $wfst +if [ ! -f $wfst/aishell_graph.zip ]; then + pushd $wfst + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip + unzip aishell_graph.zip + popd +fi -cat $data/split${nj}/*/result_tlg > ${label_file}_tlg -utils/compute-wer.py --char=1 --v=1 ${label_file}_tlg $text > ${wer}_tlg \ No newline at end of file +graph_dir=$wfst/aishell_graph +if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then + # TLG decoder + utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wfst.log \ + wfst-decoder-ol \ + --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --param_path=$model_dir/avg_1.jit.pdiparams \ + --word_symbol_table=$graph_dir/words.txt \ + --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ + --graph_path=$graph_dir/TLG.fst --max_active=7500 \ + --acoustic_scale=1.2 \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/result_tlg + + cat $data/split${nj}/*/result_tlg > $exp/${label_file}_tlg + utils/compute-wer.py --char=1 --v=1 $exp/${label_file}_tlg $text > $exp/${wer}_tlg +fi \ No newline at end of file diff --git a/speechx/examples/ngram/local/aishell_train_lms.sh b/speechx/examples/ngram/local/aishell_train_lms.sh new file mode 100644 index 0000000000000000000000000000000000000000..d9f87aca920c6b7244fe1f16a8c1cd481e138457 --- /dev/null +++ b/speechx/examples/ngram/local/aishell_train_lms.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# To be run from one directory above this script. +. ./path.sh + +text=data/local/lm/text +lexicon=data/local/dict/lexicon.txt + +for f in "$text" "$lexicon"; do + [ ! -f $x ] && echo "$0: No such file $f" && exit 1; +done + +# Check SRILM tools +if ! which ngram-count > /dev/null; then + echo "srilm tools are not found, please download it and install it from: " + echo "http://www.speech.sri.com/projects/srilm/download.html" + echo "Then add the tools to your PATH" + exit 1 +fi + +# This script takes no arguments. It assumes you have already run +# aishell_data_prep.sh. +# It takes as input the files +# data/local/lm/text +# data/local/dict/lexicon.txt +dir=data/local/lm +mkdir -p $dir + +cleantext=$dir/text.no_oov + +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ + > $cleantext || exit 1; + +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \ + sort -nr > $dir/word.counts || exit 1; + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1; + +cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo ""; echo "" ) > $dir/wordlist + +heldout_sent=10000 # Don't change this if you want result to be comparable with + # kaldi_lm results +mkdir -p $dir +cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $dir/heldout +cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $dir/train + +ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \ + -map-unk "" -kndiscount -interpolate -lm $dir/lm.arpa +ngram -lm $dir/lm.arpa -ppl $dir/heldout \ No newline at end of file diff --git a/speechx/examples/ngram/path.sh b/speechx/examples/ngram/path.sh new file mode 100644 index 0000000000000000000000000000000000000000..f926ccd28943e2e1d4ddc78738279d64db657f38 --- /dev/null +++ b/speechx/examples/ngram/path.sh @@ -0,0 +1,20 @@ +# This contains the locations of binarys build required for running the examples. + +SPEECHX_ROOT=$PWD/../../../ +MAIN_ROOT=$SPEECHX_ROOT/../ +SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples + +SPEECHX_TOOLS=$SPEECHX_ROOT/tools +TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin + +[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; } + +export LC_AL=C + +export PATH=$PATH:$TOOLS_BIN + +# srilm +export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10 +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs +export SRILM=${MAIN_ROOT}/tools/srilm +export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64 \ No newline at end of file diff --git a/speechx/examples/ngram/run.sh b/speechx/examples/ngram/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..462a89550f888d02e05017b19e01f0ce23a78681 --- /dev/null +++ b/speechx/examples/ngram/run.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -eo pipefail + +. path.sh + +stage=-1 +stop_stage=100 +corpus=aishell + +unit=data/vocab.txt # vocab +lexicon= # aishell/resource_aishell/lexicon.txt +text= # aishell/data_aishell/transcript/aishell_transcript_v0.8.txt + +. parse_options.sh + +data=$PWD/data +mkdir -p $data + +if [ ! -f $unit ]; then + echo "$0: No such file $unit" + exit 1; +fi + +if [ ! which ngram-count ]; then + pushd $MAIN_ROOT/tools + make srilm.done + popd +fi + +if [ ! which fstaddselfloops ]; then + pushd $MAIN_ROOT/tools + make kaldi.done + popd +fi + +mkdir -p data/local/dict +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # 7.1 Prepare dict + cp $unit data/local/dict/units.txt + utils/fst/prepare_dict.py \ + --unit_file $unit \ + --in_lexicon ${lexicon} \ + --out_lexicon data/local/dict/lexicon.txt +fi + +lm=data/local/lm +mkdir -p data/train +mkdir -p $lm +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # 7.2 Train lm + utils/manifest_key_value.py \ + --manifest_path data/manifest.train \ + --output_path data/train + utils/filter_scp.pl data/train/text \ + $text > $lm/text + + local/aishell_train_lms.sh +fi + +echo "build LM done." +exit 0 diff --git a/speechx/examples/ngram/utils b/speechx/examples/ngram/utils new file mode 120000 index 0000000000000000000000000000000000000000..256f914abcaa47d966c44878b88a300437f110fb --- /dev/null +++ b/speechx/examples/ngram/utils @@ -0,0 +1 @@ +../../../utils/ \ No newline at end of file diff --git a/speechx/tools/install_srilm.sh b/speechx/tools/install_srilm.sh deleted file mode 100755 index 813109dbb80ea0e22791e6f32034544073df91e6..0000000000000000000000000000000000000000 --- a/speechx/tools/install_srilm.sh +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env bash - -current_path=`pwd` -current_dir=`basename "$current_path"` - -if [ "tools" != "$current_dir" ]; then - echo "You should run this script in tools/ directory!!" - exit 1 -fi - -if [ ! -d liblbfgs-1.10 ]; then - echo Installing libLBFGS library to support MaxEnt LMs - bash extras/install_liblbfgs.sh || exit 1 -fi - -! command -v gawk > /dev/null && \ - echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1; - -if [ $# -ne 3 ]; then - echo "SRILM download requires some information about you" - echo - echo "Usage: $0 " - exit 1 -fi - -srilm_url="http://www.speech.sri.com/projects/srilm/srilm_download.php" -post_data="WWW_file=srilm-1.7.3.tar.gz&WWW_name=$1&WWW_org=$2&WWW_email=$3" - -if ! wget --post-data "$post_data" -O ./srilm.tar.gz "$srilm_url"; then - echo 'There was a problem downloading the file.' - echo 'Check you internet connection and try again.' - exit 1 -fi - -mkdir -p srilm -cd srilm - - -if [ -f ../srilm.tgz ]; then - tar -xvzf ../srilm.tgz # Old SRILM format -elif [ -f ../srilm.tar.gz ]; then - tar -xvzf ../srilm.tar.gz # Changed format type from tgz to tar.gz -fi - -major=`gawk -F. '{ print $1 }' RELEASE` -minor=`gawk -F. '{ print $2 }' RELEASE` -micro=`gawk -F. '{ print $3 }' RELEASE` - -if [ $major -le 1 ] && [ $minor -le 7 ] && [ $micro -le 1 ]; then - echo "Detected version 1.7.1 or earlier. Applying patch." - patch -p0 < ../extras/srilm.patch -fi - -# set the SRILM variable in the top-level Makefile to this directory. -cp Makefile tmpf - -cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \ - > Makefile || exit 1 -rm tmpf - -mtype=`sbin/machine-type` - -echo HAVE_LIBLBFGS=1 >> common/Makefile.machine.$mtype -grep ADDITIONAL_INCLUDES common/Makefile.machine.$mtype | \ - sed 's|$| -I$(SRILM)/../liblbfgs-1.10/include|' \ - >> common/Makefile.machine.$mtype - -grep ADDITIONAL_LDFLAGS common/Makefile.machine.$mtype | \ - sed 's|$| -L$(SRILM)/../liblbfgs-1.10/lib/ -Wl,-rpath -Wl,$(SRILM)/../liblbfgs-1.10/lib/|' \ - >> common/Makefile.machine.$mtype - -make || exit - -cd .. -( - [ ! -z "${SRILM}" ] && \ - echo >&2 "SRILM variable is aleady defined. Undefining..." && \ - unset SRILM - - [ -f ./env.sh ] && . ./env.sh - - [ ! -z "${SRILM}" ] && \ - echo >&2 "SRILM config is already in env.sh" && exit - - wd=`pwd` - wd=`readlink -f $wd || pwd` - - echo "export SRILM=$wd/srilm" - dirs="\${PATH}" - for directory in $(cd srilm && find bin -type d ) ; do - dirs="$dirs:\${SRILM}/$directory" - done - echo "export PATH=$dirs" -) >> env.sh - -echo >&2 "Installation of SRILM finished successfully" -echo >&2 "Please source the tools/env.sh in your path.sh to enable it" diff --git a/tools/Makefile b/tools/Makefile index 285f85c86caf7a2f112fa96175514d888518dcdb..a5a4485da79795ed8f2abec1a24abb2f6d9a98b0 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -25,7 +25,7 @@ clean: apt.done: apt update -y - apt install -y bc flac jq vim tig tree pkg-config libsndfile1 libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev + apt install -y bc flac jq vim tig tree sox pkg-config libsndfile1 libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev echo "check_certificate = off" >> ~/.wgetrc touch apt.done @@ -50,7 +50,7 @@ openblas.done: bash extras/install_openblas.sh touch openblas.done -kaldi.done: openblas.done +kaldi.done: apt.done openblas.done bash extras/install_kaldi.sh touch kaldi.done @@ -58,6 +58,11 @@ sctk.done: ./extras/install_sclite.sh touch sctk.done +srilm.done: + ./extras/install_liblbfgs.sh + extras/install_srilm.sh + touch srilm.done + ###################### dev: python conda_packages.done sctk.done @@ -96,4 +101,4 @@ conda_packages.done: bc.done cmake.done flac.done ffmpeg.done sox.done sndfile.d else conda_packages.done: endif - touch conda_packages.done \ No newline at end of file + touch conda_packages.done diff --git a/tools/extras/install_openfst.sh b/tools/extras/install_openfst.sh index 54ddef6a7119f285fbec4512fd6d113381268e9b..5e97bc81fb4662f96154f8c561d73aa20a36a837 100755 --- a/tools/extras/install_openfst.sh +++ b/tools/extras/install_openfst.sh @@ -7,8 +7,9 @@ set -x # openfst openfst=openfst-1.8.1 shared=true +WGET="wget -c --no-check-certificate" -test -e ${openfst}.tar.gz || wget http://www.openfst.org/twiki/pub/FST/FstDownload/${openfst}.tar.gz +test -e ${openfst}.tar.gz || $WGET http://www.openfst.org/twiki/pub/FST/FstDownload/${openfst}.tar.gz test -d ${openfst} || tar -xvf ${openfst}.tar.gz && chown -R root:root ${openfst} diff --git a/utils/espnet_json_to_manifest.py b/utils/espnet_json_to_manifest.py old mode 100644 new mode 100755 diff --git a/utils/generate_infer_yaml.py b/utils/generate_infer_yaml.py old mode 100644 new mode 100755 diff --git a/utils/link_wav.py b/utils/link_wav.py old mode 100644 new mode 100755 diff --git a/utils/manifest_key_value.py b/utils/manifest_key_value.py index fb3d3aaaf47948428cd5eaf4a9ae6b0fe82b93e1..0ab3ae08fc4801cc0e07fda34af0ba2cf04fc8d2 100755 --- a/utils/manifest_key_value.py +++ b/utils/manifest_key_value.py @@ -26,23 +26,38 @@ def main(args): with wav_scp.open('w') as fwav, dur_scp.open('w') as fdur, text_scp.open( 'w') as ftxt: for line_json in manifest_jsons: + # utt:str + # utt2spk:str + # input: [{name:str, shape:[dur_in_sec, feat_dim], feat:str, filetype:str}, ] + # output: [{name:str, shape:[tokenlen, vocab_dim], text:str, token:str, tokenid:str}, ] utt = line_json['utt'] - feat = line_json['feat'] + utt2spk = line_json['utt2spk'] + + # input + assert(len(line_json['input']) == 1), "only support one input now" + input_json = line_json['input'][0] + feat = input_json['feat'] + feat_shape = input_json['shape'] + file_type = input_json['filetype'] + file_ext = Path(feat).suffix # .wav - text = line_json['text'] - feat_shape = line_json['feat_shape'] dur = feat_shape[0] feat_dim = feat_shape[1] - if 'token' in line_json: - tokens = line_json['token'] - tokenids = line_json['token_id'] - token_shape = line_json['token_shape'] - token_len = token_shape[0] - vocab_dim = token_shape[1] if file_ext == '.wav': fwav.write(f"{utt} {feat}\n") fdur.write(f"{utt} {dur}\n") + + # output + assert(len(line_json['output']) == 1), "only support one output now" + output_json = line_json['output'][0] + text = output_json['text'] + if 'token' in output_json: + tokens = output_json['token'] + tokenids = output_json['tokenid'] + token_shape = output_json['shape'] + token_len = token_shape[0] + vocab_dim = token_shape[1] ftxt.write(f"{utt} {text}\n") count += 1