ds2 ol aishell egs

1e420790 · Hui Zhang · c7b987c5 · 1e420790 · 1e420790 · 1e420790
6 changed file
--- a/speechx/examples/README.md
+++ b/speechx/examples/README.md
-# Examples
+# Examples for SpeechX
+* dev - for speechx developer, using for test.
+* ngram - using to build NGram ARPA lm.
+* ds2_ol - ds2 streaming test under `aishell-1` test dataset.
+ The entrypoint is `ds2_ol/aishell/run.sh`
-* glog - glog usage
-* feat - mfcc, linear 
-* nnet - ds2 nn
-* decoder - online decoder to work as offline
 ## How to run
 `run.sh` is the entry point.
-Example to play `decoder`:
+Example to play `ds2_ol`:
 ```
-pushd decoder
+pushd ds2_ol/aishell
 bash run.sh
 ```
+## Display Model with [Netron](https://github.com/lutzroeder/netron)
+```
+pip install netron
+netron exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel  --port 8022 --host 10.21.55.20
+```
--- a/speechx/examples/ds2_ol/aishell/.gitignore
+++ b/speechx/examples/ds2_ol/aishell/.gitignore
+data
+exp
--- a/speechx/examples/ds2_ol/aishell/path.sh
+++ b/speechx/examples/ds2_ol/aishell/path.sh
@@ -10,5 +10,5 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
 export LC_AL=C
-SPEECHX_BIN=$SPEECHX_EXAMPLES/decoder:$SPEECHX_EXAMPLES/feat
+SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/decoder:$SPEECHX_EXAMPLES/ds2_ol/feat
 export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
--- a/speechx/examples/ds2_ol/aishell/run.sh
+++ b/speechx/examples/ds2_ol/aishell/run.sh
@@ -4,6 +4,9 @@ set -e
 . path.sh
+nj=40
 # 1. compile
 if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    pushd ${SPEECHX_ROOT} 
@@ -19,52 +22,51 @@ ckpt_dir=$data/model
 model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
 vocb_dir=$ckpt_dir/data/lang_char/
-lm=$data/zh_giga.no_cna_cmn.prune01244.klm
 # output
 mkdir -p exp
 exp=$PWD/exp
 aishell_wav_scp=aishell_test.scp
 if [ ! -d $data/test ]; then
+    pushd $data
    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
-    unzip -d $data aishell_test.zip
+    unzip  aishell_test.zip
+    popd
    realpath $data/test/*/*.wav > $data/wavlist
    awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
    paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
 fi
-model_dir=$PWD/aishell_ds2_online_model
-if [ ! -d $model_dir ]; then
+if [ ! -d $ckpt_dir ]; then
-    mkdir -p $model_dir 
+    mkdir -p $ckpt_dir
-    wget -P $model_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
-    tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $model_dir
+    tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir
 fi
+lm=$data/zh_giga.no_cna_cmn.prune01244.klm
 if [ ! -f $lm ]; then
    pushd $data
    wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
    popd
 fi
 # 3. make feature
-aishell_online_model=$model_dir/exp/deepspeech2_online/checkpoints
 label_file=./aishell_result
 wer=./aishell_wer
-nj=40
 export GLOG_logtostderr=1
-#./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
-data=$PWD/data
 # 3. gen linear feat
 cmvn=$PWD/cmvn.ark
-cmvn_json2binary_main --json_file=$model_dir/data/mean_std.json --cmvn_write_path=$cmvn
+cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
+./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat_log \
-linear_spectrogram_without_db_norm_main \
+linear-spectrogram-wo-db-norm-ol \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \
    --cmvn_file=$cmvn \
@@ -74,10 +76,10 @@ text=$data/test/text
 # 4. recognizer
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \
-  offline_decoder_sliding_chunk_main \
+  ctc-prefix-beam-search-decoder-ol \
    --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
-    --model_path=$aishell_online_model/avg_1.jit.pdmodel \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$aishell_online_model/avg_1.jit.pdiparams \
+    --param_path=$model_dir/avg_1.jit.pdiparams \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
    --dict_file=$vocb_dir/vocab.txt \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result
@@ -87,10 +89,10 @@ utils/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer}
 # 4. decode with lm
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_lm \
-  offline_decoder_sliding_chunk_main \
+  ctc-prefix-beam-search-decoder-ol \
    --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
-    --model_path=$aishell_online_model/avg_1.jit.pdmodel \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$aishell_online_model/avg_1.jit.pdiparams \
+    --param_path=$model_dir/avg_1.jit.pdiparams \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
    --dict_file=$vocb_dir/vocab.txt \
    --lm_path=$lm \
@@ -110,10 +112,10 @@ fi
 # 5. test TLG decoder
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_tlg \
-  offline_wfst_decoder_main \
+  wfst-decoder-ol \
    --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
-    --model_path=$aishell_online_model/avg_1.jit.pdmodel \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$aishell_online_model/avg_1.jit.pdiparams \
+    --param_path=$model_dir/avg_1.jit.pdiparams \
    --word_symbol_table=$graph_dir/words.txt \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
     --graph_path=$graph_dir/TLG.fst --max_active=7500 \

--- a/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
+++ b/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
@@ -32,41 +32,50 @@ int main(int argc, char* argv[]) {
    google::InitGoogleLogging(argv[0]);
    LOG(INFO) << "cmvn josn path: " << FLAGS_json_file;
-    padded_string json = padded_string::load(FLAGS_json_file);
-    ondemand::parser parser;
+    try {
-    ondemand::document doc = parser.iterate(json);
+        padded_string json = padded_string::load(FLAGS_json_file);
-    ondemand::value val = doc;
-    ondemand::array mean_stat = val["mean_stat"];
+        ondemand::parser parser;
-    std::vector<kaldi::BaseFloat> mean_stat_vec;
+        ondemand::document doc = parser.iterate(json);
-    for (double x : mean_stat) {
+        ondemand::value val = doc;
-        mean_stat_vec.push_back(x);
-    }
-    // LOG(INFO) << mean_stat; this line will casue
-    // simdjson::simdjson_error("Objects and arrays can only be iterated when
-    // they are first encountered")
-    ondemand::array var_stat = val["var_stat"];
+        ondemand::array mean_stat = val["mean_stat"];
-    std::vector<kaldi::BaseFloat> var_stat_vec;
+        std::vector<kaldi::BaseFloat> mean_stat_vec;
-    for (double x : var_stat) {
+        for (double x : mean_stat) {
-        var_stat_vec.push_back(x);
+            mean_stat_vec.push_back(x);
-    }
+        }
+        // LOG(INFO) << mean_stat; this line will casue
+        // simdjson::simdjson_error("Objects and arrays can only be iterated
+        // when
+        // they are first encountered")
+        ondemand::array var_stat = val["var_stat"];
+        std::vector<kaldi::BaseFloat> var_stat_vec;
+        for (double x : var_stat) {
+            var_stat_vec.push_back(x);
+        }
-    kaldi::int32 frame_num = uint64_t(val["frame_num"]);
+        kaldi::int32 frame_num = uint64_t(val["frame_num"]);
-    LOG(INFO) << "nframe: " << frame_num;
+        LOG(INFO) << "nframe: " << frame_num;
-    size_t mean_size = mean_stat_vec.size();
+        size_t mean_size = mean_stat_vec.size();
-    kaldi::Matrix<double> cmvn_stats(2, mean_size + 1);
+        kaldi::Matrix<double> cmvn_stats(2, mean_size + 1);
-    for (size_t idx = 0; idx < mean_size; ++idx) {
+        for (size_t idx = 0; idx < mean_size; ++idx) {
-        cmvn_stats(0, idx) = mean_stat_vec[idx];
+            cmvn_stats(0, idx) = mean_stat_vec[idx];
-        cmvn_stats(1, idx) = var_stat_vec[idx];
+            cmvn_stats(1, idx) = var_stat_vec[idx];
+        }
+        cmvn_stats(0, mean_size) = frame_num;
+        LOG(INFO) << cmvn_stats;
+        kaldi::WriteKaldiObject(
+            cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
+        LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
+        LOG(INFO) << "Binary: " << FLAGS_binary;
+    } catch (simdjson::simdjson_error& err) {
+        LOG(ERR) << err.what();
    }
-    cmvn_stats(0, mean_size) = frame_num;
-    LOG(INFO) << cmvn_stats;
-    kaldi::WriteKaldiObject(cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
-    LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
-    LOG(INFO) << "Binary: " << FLAGS_binary;
    return 0;
 }
\ No newline at end of file
--- a/speechx/speechx/utils/simdjson.h
+++ b/speechx/speechx/utils/simdjson.h
@@ -2412,7 +2412,7 @@ enum error_code {
    CAPACITY,     ///< This parser can't support a document that big
    MEMALLOC,     ///< Error allocating memory, most likely out of memory
    TAPE_ERROR,   ///< Something went wrong while writing to the tape (stage 2),
-                  ///this is a generic error
+                  /// this is a generic error
    DEPTH_ERROR,  ///< Your document exceeds the user-specified depth limitation
    STRING_ERROR,   ///< Problem while parsing a string
    T_ATOM_ERROR,   ///< Problem while parsing an atom starting with the letter
@@ -2438,9 +2438,9 @@ enum error_code {
    UNEXPECTED_ERROR,            ///< indicative of a bug in simdjson
    PARSER_IN_USE,               ///< parser is already in use.
    OUT_OF_ORDER_ITERATION,      ///< tried to iterate an array or object out of
-                                 ///order
+                                 /// order
    INSUFFICIENT_PADDING,        ///< The JSON doesn't have enough padding for
-                                 ///simdjson to safely parse it.
+                                 /// simdjson to safely parse it.
    INCOMPLETE_ARRAY_OR_OBJECT,  ///< The document ends early.
    SCALAR_DOCUMENT_AS_VALUE,    ///< A scalar document is treated as a value.
    OUT_OF_BOUNDS,  ///< Attempted to access location outside of document.