From 341038b6264df4d1c9733edefcecf2af7c901fff Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 30 Aug 2021 03:08:01 +0000
Subject: [PATCH] ds2 offline cer 6p4287

---
 deepspeech/models/ds2/conv.py             |  7 ------
 deepspeech/modules/subsampling.py         | 14 ++++++------
 examples/aishell/s0/README.md             |  2 +-
 examples/aishell/s0/conf/deepspeech2.yaml |  2 +-
 examples/aishell/s0/local/train.sh        |  2 +-
 utils/avg.sh                              | 26 ++++++++++++++++-------
 utils/tarball.sh                          |  9 ++++----
 7 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/deepspeech/models/ds2/conv.py b/deepspeech/models/ds2/conv.py
index 8bf48b2c..ce962a44 100644
--- a/deepspeech/models/ds2/conv.py
+++ b/deepspeech/models/ds2/conv.py
@@ -41,13 +41,6 @@ def conv_output_size(I, F, P, S):
     return (I - F + 2 * P - S) // S
 
 
-# receptive field calculator
-# https://fomoro.com/research/article/receptive-field-calculator
-# https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-convolutional-neural-networks#hyperparameters
-# https://distill.pub/2019/computing-receptive-fields/
-# Rl-1 = Sl * Rl + (Kl - Sl) 
-
-
 class ConvBn(nn.Layer):
     """Convolution layer with batch normalization.
 
diff --git a/deepspeech/modules/subsampling.py b/deepspeech/modules/subsampling.py
index 40fa7b00..3bed62f3 100644
--- a/deepspeech/modules/subsampling.py
+++ b/deepspeech/modules/subsampling.py
@@ -108,8 +108,8 @@ class Conv2dSubsampling4(BaseSubsampling):
             nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
         self.subsampling_rate = 4
         # The right context for every conv layer is computed by:
-        # (kernel_size - 1) / 2 * stride  * frame_rate_of_this_layer
-        # 6 = (3 - 1) / 2 * 2 * 1 + (3 - 1) / 2 * 2 * 2
+        # (kernel_size - 1) * frame_rate_of_this_layer
+        # 6 = (3 - 1) * 1 + (3 - 1) * 2
         self.right_context = 6
 
     def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
@@ -160,10 +160,10 @@ class Conv2dSubsampling6(BaseSubsampling):
         # when Padding == 0, O = (I - F - S) // S
         self.linear = nn.Linear(odim * (((idim - 1) // 2 - 2) // 3), odim)
         # The right context for every conv layer is computed by:
-        # (kernel_size - 1) / 2 * stride  * frame_rate_of_this_layer
-        # 14 = (3 - 1) / 2 * 2 * 1 + (5 - 1) / 2 * 3 * 2
+        # (kernel_size - 1) * frame_rate_of_this_layer
+        # 10 = (3 - 1) * 1 + (5 - 1) * 2
         self.subsampling_rate = 6
-        self.right_context = 14
+        self.right_context = 10
 
     def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
                 ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
@@ -214,8 +214,8 @@ class Conv2dSubsampling8(BaseSubsampling):
                                 odim)
         self.subsampling_rate = 8
         # The right context for every conv layer is computed by:
-        # (kernel_size - 1) / 2 * stride  * frame_rate_of_this_layer
-        # 14 = (3 - 1) / 2 * 2 * 1 + (3 - 1) / 2 * 2 * 2 + (3 - 1) / 2 * 2 * 4
+        # (kernel_size - 1) * frame_rate_of_this_layer
+        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
         self.right_context = 14
 
     def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
diff --git a/examples/aishell/s0/README.md b/examples/aishell/s0/README.md
index 537496a6..e5ebfcba 100644
--- a/examples/aishell/s0/README.md
+++ b/examples/aishell/s0/README.md
@@ -10,7 +10,7 @@
 
 | Model | Params | Release | Config | Test set | Loss | CER |  
 | --- | --- | --- | --- | --- | --- | --- |  
-| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 |  
+| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug | test | 5.71956205368042 | 0.064287 |  
 | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |  
 | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
 | DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |  
diff --git a/examples/aishell/s0/conf/deepspeech2.yaml b/examples/aishell/s0/conf/deepspeech2.yaml
index c4ff246f..7f0a1462 100644
--- a/examples/aishell/s0/conf/deepspeech2.yaml
+++ b/examples/aishell/s0/conf/deepspeech2.yaml
@@ -42,7 +42,7 @@ model:
   share_rnn_weights: False
 
 training:
-  n_epoch: 50
+  n_epoch: 80
   lr: 2e-3
   lr_decay: 0.83
   weight_decay: 1e-06
diff --git a/examples/aishell/s0/local/train.sh b/examples/aishell/s0/local/train.sh
index d42e51fa..3438a735 100755
--- a/examples/aishell/s0/local/train.sh
+++ b/examples/aishell/s0/local/train.sh
@@ -19,7 +19,7 @@ fi
 
 mkdir -p exp
 
-seed=1024
+seed=10086
 if [ ${seed} ]; then
     export FLAGS_cudnn_deterministic=True
 fi
diff --git a/utils/avg.sh b/utils/avg.sh
index c8a6ddfe..399c9574 100755
--- a/utils/avg.sh
+++ b/utils/avg.sh
@@ -1,19 +1,29 @@
 #! /usr/bin/env bash
 
-if [ $# != 2 ]; then
-    echo "usage: ${0} ckpt_dir avg_num"
+if [ $# != 3 ]; then
+    echo "usage: ${0} [best|latest] ckpt_dir avg_num"
     exit -1
 fi
 
 ckpt_dir=${1}
-average_num=${2}
+avg_mode=${2} # best,latest
+average_num=${3}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
 
-avg_model.py \
---dst_model ${decode_checkpoint} \
---ckpt_dir ${ckpt_dir}  \
---num ${average_num} \
---val_best
+if [ $avg_mode == best ];then
+    # best
+    avg_model.py \
+    --dst_model ${decode_checkpoint} \
+    --ckpt_dir ${ckpt_dir}  \
+    --num ${average_num} \
+    --val_best
+else
+    # latest
+    avg_model.py \
+    --dst_model ${decode_checkpoint} \
+    --ckpt_dir ${ckpt_dir}  \
+    --num ${average_num}
+fi
 
 if [ $? -ne 0 ]; then
     echo "Failed in avg ckpt!"
diff --git a/utils/tarball.sh b/utils/tarball.sh
index 5f7c21a3..ac8bdb39 100755
--- a/utils/tarball.sh
+++ b/utils/tarball.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 4 ];then
-    echo "usage: $0 ckpt_prefix model_config mean_std vocab"
+if [ $# != 5 ];then
+    echo "usage: $0 ckpt_prefix model_config mean_std vocab pack_name"
     exit -1
 fi
 
@@ -9,6 +9,7 @@ ckpt_prefix=$1
 model_config=$2
 mean_std=$3
 vocab=$4
+pack_name=$5
 
 output=release
 
@@ -27,6 +28,6 @@ cp ${ckpt_prefix}.*  ${output}
 # model config, mean std, vocab
 cp ${model_config} ${mean_std} ${vocab} ${output}
 
-tar zcvf release.tar.gz ${output}
+tar zcvf ${pack_name}.release.tar.gz ${output}
 
-echo "tarball done!"
+echo "tarball: ${pack_name}.release.tar.gz done!"
-- 
GitLab