all decode method test scripts; result readme

ba1d7dc6 · Hui Zhang · 146a60d9 · ba1d7dc6 · ba1d7dc6 · ba1d7dc6
7 changed file
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -450,7 +450,7 @@ class U2Tester(U2Trainer):
        logger.info(msg)
        # test meta results
-        err_meta_path = os.path.splitext(self.args.checkpoint_path)[0] + '.err'
+        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w') as f:
            data = json.dumps({
@@ -471,6 +471,8 @@ class U2Tester(U2Trainer):
                errors_sum,
                "ref_len":
                len_refs,
+                "decode_method":
+                self.config.decoding.decoding_method,
            })
            f.write(data + '\n')

--- a/examples/aishell/s1/README.md
+++ b/examples/aishell/s1/README.md
+# Aishell
+## Conformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | attention | - | 0.059858 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_greedy_search | - | 0.062311 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 |
+## Transformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| transformer | conf/transformer.yaml | spec_aug + shift | test | attention | - | - |
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@@ -21,17 +21,39 @@ ckpt_prefix=$2
 #    exit 1
 #fi
-python3 -u ${BIN_DIR}/test.py \
--device ${device} \
--nproc 1 \
--config ${config_path} \
--result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix}
-if [ $? -ne 0 ]; then
-    echo "Failed in evaluation!"
-    exit 1
-fi
+for type in attention ctc_greedy_search; do
+    echo "decoding ${type}"
+    batch_size=64
+    python3 -u ${BIN_DIR}/test.py \
+    --device ${device} \
+    --nproc 1 \
+    --config ${config_path} \
+    --result_file ${ckpt_prefix}.${type}.rsl \
+    --checkpoint_path ${ckpt_prefix} \
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+    if [ $? -ne 0 ]; then
+        echo "Failed in evaluation!"
+        exit 1
+    fi
+done
+for type in ctc_prefix_beam_search attention_rescoring; do
+    echo "decoding ${type}"
+    batch_size=1
+    python3 -u ${BIN_DIR}/test.py \
+    --device ${device} \
+    --nproc 1 \
+    --config ${config_path} \
+    --result_file ${ckpt_prefix}.${type}.rsl \
+    --checkpoint_path ${ckpt_prefix} \
+    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+    if [ $? -ne 0 ]; then
+        echo "Failed in evaluation!"
+        exit 1
+    fi
+done
 exit 0
--- a/examples/librispeech/README.md
+++ b/examples/librispeech/README.md
 # ASR
 * s0 is for deepspeech2
-* s1 is for U2
+* s1 is for transformer/conformer/U2
--- a/examples/librispeech/s1/README.md
+++ b/examples/librispeech/s1/README.md
+# LibriSpeech
+## Conformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-all | attention | test-all 6.35 | 0.057117 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.35 | 0.030162 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | test-all 6.35 | 0.037910 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | test-all 6.35 | 0.037761 |
+| conformer | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | test-all 6.35 | 0.032115 |
+## Transformer
+| Model | Config | Augmentation| Test set | Decode method | Loss | WER |
+| --- | --- | --- | --- | --- | --- |
+| transformer | conf/transformer.yaml | spec_aug + shift | test-all | attention | test-all 6.98 | 0.066500 |
+| transformer | conf/transformer.yaml | spec_aug + shift | test-clean | attention | test-all 6.98 | 0.036 |
--- a/examples/librispeech/s1/conf/conformer.yaml
+++ b/examples/librispeech/s1/conf/conformer.yaml
@@ -14,7 +14,7 @@ data:
  min_output_len: 0.0 # tokens
  max_output_len: 400.0 # tokens
  min_output_input_ratio: 0.05
-  max_output_input_ratio: 10.0
+  max_output_input_ratio: .inf
  raw_wav: True  # use raw_wav or kaldi feature
  specgram_type: fbank #linear, mfcc, fbank
  feat_dim: 80
@@ -77,7 +77,7 @@ model:
 training:
  n_epoch: 120
  accum_grad: 8
-  global_grad_clip: 5.0
+  global_grad_clip: 3.0
  optim: adam
  optim_conf:
    lr: 0.004

--- a/examples/librispeech/s1/local/test.sh
+++ b/examples/librispeech/s1/local/test.sh
@@ -28,7 +28,7 @@ for type in attention ctc_greedy_search; do
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
-    --result_file ${ckpt_prefix}.rsl \
+    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
@@ -45,7 +45,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
-    --result_file ${ckpt_prefix}.rsl \
+    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}