refactor egs; add utils; add tools; rm notebook;add speechnn; more docs;

472cf70e · Hui Zhang · 5ef4a34e · 472cf70e · 472cf70e · 472cf70e
255 changed file
--- a/.bashrc
+++ b/.bashrc
+unset GREP_OPTIONS
+# https://zhuanlan.zhihu.com/p/33050965
+alias nvs='nvidia-smi'
+alias his='history'
+alias jobs='jobs -l'
+alias ports='netstat -tulanp'
+alias wget='wget -c'
+## Colorize the grep command output for ease of use (good for log files)##
+alias grep='grep --color=auto'
+alias egrep='egrep --color=auto'
+alias fgrep='fgrep --color=auto'
--- a/.flake8
+++ b/.flake8
@@ -42,6 +42,10 @@ ignore =
    # these ignores are from flake8-comprehensions; please fix!
    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
+per-file-ignores =
+    */__init__.py: F401
 # Specify the list of error codes you wish Flake8 to report.
 select =
    E,

--- a/.gitignore
+++ b/.gitignore
@@ -10,8 +10,15 @@
 .ipynb_checkpoints
 *.npz
 *.done
+*.whl
 tools/venv
 tools/kenlm
 tools/sox-14.4.2
 tools/soxbindings
+tools/montreal-forced-aligner/
+tools/Montreal-Forced-Aligner/
+tools/sctk
+tools/sctk-20159b5/
+*output/
--- a/.mergify.yml
+++ b/.mergify.yml
@@ -87,3 +87,9 @@ pull_request_rules:
    actions:
      label:
        add: ["Docker"]
+  - name: "auto add label=Deployment"
+    conditions:
+      - files~=^speechnn/
+    actions:
+      label:
+        add: ["Deployment"]
--- a/.notebook/Linear_test.ipynb
+++ b/.notebook/Linear_test.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "academic-surname",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "from paddle import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "fundamental-treasure",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    }
-   ],
-   "source": [
-    "L = nn.Linear(256, 2048)\n",
-    "L2 = nn.Linear(2048, 256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "consolidated-elephant",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "moderate-noise",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "float64\n",
-      "Tensor(shape=[2, 51, 256], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[[-1.54171216, -2.61531472, -1.79881978, ..., -0.31395876,  0.56513089, -0.44516513],\n",
-      "         [-0.79492962,  1.91157901,  0.66567147, ...,  0.54825783, -1.01471853, -0.84924090],\n",
-      "         [-1.22556651, -0.36225814,  0.65063190, ...,  0.65726501,  0.05563191,  0.09009409],\n",
-      "         ...,\n",
-      "         [ 0.38615900, -0.77905393,  0.99732304, ..., -1.38463700, -3.32365036, -1.31089687],\n",
-      "         [ 0.05579993,  0.06885809, -1.66662002, ..., -0.23346378, -3.29372883,  1.30561364],\n",
-      "         [ 1.90676069,  1.95093191, -0.28849599, ..., -0.06860496,  0.95347673,  1.00475824]],\n",
-      "\n",
-      "        [[-0.91453546,  0.55298805, -1.06146812, ..., -0.86378336,  1.00454640,  1.26062179],\n",
-      "         [ 0.10223761,  0.81301165,  2.36865163, ...,  0.16821407,  0.29240361,  1.05408621],\n",
-      "         [-1.33196676,  1.94433689,  0.01934209, ...,  0.48036841,  0.51585966,  1.22893548],\n",
-      "         ...,\n",
-      "         [-0.19558455, -0.47075930,  0.90796155, ..., -1.28598249, -0.24321797,  0.17734711],\n",
-      "         [ 0.89819717, -1.39516675,  0.17138045, ...,  2.39761519,  1.76364994, -0.52177650],\n",
-      "         [ 0.94122332, -0.18581429,  1.36099780, ...,  0.67647684, -0.04699665,  1.51205540]]])\n",
-      "tensor([[[-1.5417, -2.6153, -1.7988,  ..., -0.3140,  0.5651, -0.4452],\n",
-      "         [-0.7949,  1.9116,  0.6657,  ...,  0.5483, -1.0147, -0.8492],\n",
-      "         [-1.2256, -0.3623,  0.6506,  ...,  0.6573,  0.0556,  0.0901],\n",
-      "         ...,\n",
-      "         [ 0.3862, -0.7791,  0.9973,  ..., -1.3846, -3.3237, -1.3109],\n",
-      "         [ 0.0558,  0.0689, -1.6666,  ..., -0.2335, -3.2937,  1.3056],\n",
-      "         [ 1.9068,  1.9509, -0.2885,  ..., -0.0686,  0.9535,  1.0048]],\n",
-      "\n",
-      "        [[-0.9145,  0.5530, -1.0615,  ..., -0.8638,  1.0045,  1.2606],\n",
-      "         [ 0.1022,  0.8130,  2.3687,  ...,  0.1682,  0.2924,  1.0541],\n",
-      "         [-1.3320,  1.9443,  0.0193,  ...,  0.4804,  0.5159,  1.2289],\n",
-      "         ...,\n",
-      "         [-0.1956, -0.4708,  0.9080,  ..., -1.2860, -0.2432,  0.1773],\n",
-      "         [ 0.8982, -1.3952,  0.1714,  ...,  2.3976,  1.7636, -0.5218],\n",
-      "         [ 0.9412, -0.1858,  1.3610,  ...,  0.6765, -0.0470,  1.5121]]])\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 51, 256)\n",
-    "print(x.dtype)\n",
-    "px = paddle.to_tensor(x, dtype='float32')\n",
-    "tx = torch.tensor(x, dtype=torch.float32)\n",
-    "print(px)\n",
-    "print(tx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cooked-progressive",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "mechanical-prisoner",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = np.load('enc_0_ff_out.npz', allow_pickle=True)\n",
-    "t_norm_ff = data['norm_ff']\n",
-    "t_ff_out = data['ff_out']\n",
-    "t_ff_l_x = data['ff_l_x']\n",
-    "t_ff_l_a_x = data['ff_l_a_x']\n",
-    "t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
-    "t_ps = data['ps']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "indie-marriage",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "assured-zambia",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "L.set_state_dict({'weight': t_ps[0].T, 'bias': t_ps[1]})\n",
-    "L2.set_state_dict({'weight': t_ps[2].T, 'bias': t_ps[3]})\n",
-    "\n",
-    "ps = []\n",
-    "for n, p in L.named_parameters():\n",
-    "   ps.append(p)\n",
-    "\n",
-    "for n, p in L2.state_dict().items():\n",
-    "    ps.append(p)\n",
-    "    \n",
-    "for p, tp in zip(ps, t_ps):\n",
-    "    print(np.allclose(p.numpy(), tp.T))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "committed-jacob",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "extreme-traffic",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "optimum-milwaukee",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "viral-indian",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "# data = np.load('enc_0_ff_out.npz', allow_pickle=True)\n",
-    "# t_norm_ff = data['norm_ff']\n",
-    "# t_ff_out = data['ff_out']\n",
-    "# t_ff_l_x = data['ff_l_x']\n",
-    "# t_ff_l_a_x = data['ff_l_a_x']\n",
-    "# t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
-    "# t_ps = data['ps']\n",
-    "TL = torch.nn.Linear(256, 2048)\n",
-    "TL2 = torch.nn.Linear(2048, 256)\n",
-    "TL.load_state_dict({'weight': torch.tensor(t_ps[0]), 'bias': torch.tensor(t_ps[1])})\n",
-    "TL2.load_state_dict({'weight': torch.tensor(t_ps[2]), 'bias': torch.tensor(t_ps[3])})\n",
-    "\n",
-    "# for n, p in TL.named_parameters():\n",
-    "#    print(n, p)\n",
-    "# for n, p in TL2.named_parameters():\n",
-    "#    print(n, p)\n",
-    "\n",
-    "ps = []\n",
-    "for n, p in TL.state_dict().items():\n",
-    "    ps.append(p.data.numpy())\n",
-    "    \n",
-    "for n, p in TL2.state_dict().items():\n",
-    "    ps.append(p.data.numpy())\n",
-    "    \n",
-    "for p, tp in zip(ps, t_ps):\n",
-    "    print(np.allclose(p, tp))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "skilled-vietnamese",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[[ 0.67277956  0.08313607 -0.62761104 ... -0.17480263  0.42718208\n",
-      "   -0.5787626 ]\n",
-      "  [ 0.91516656  0.5393416   1.7159258  ...  0.06144593  0.06486575\n",
-      "   -0.03350811]\n",
-      "  [ 0.438351    0.6227843   0.24096036 ...  1.0912522  -0.90929437\n",
-      "   -1.012989  ]\n",
-      "  ...\n",
-      "  [ 0.68631977  0.14240924  0.10763275 ... -0.11513516  0.48065388\n",
-      "    0.04070369]\n",
-      "  [-0.9525228   0.23197874  0.31264272 ...  0.5312439   0.18773697\n",
-      "   -0.8450228 ]\n",
-      "  [ 0.42024016 -0.04561988  0.54541194 ... -0.41933843 -0.00436018\n",
-      "   -0.06663495]]\n",
-      "\n",
-      " [[-0.11638781 -0.33566502 -0.20887226 ...  0.17423287 -0.9195841\n",
-      "   -0.8161046 ]\n",
-      "  [-0.3469874   0.88269687 -0.11887559 ... -0.15566081  0.16357468\n",
-      "   -0.20766167]\n",
-      "  [-0.3847657   0.3984318  -0.06963477 ... -0.00360622  1.2360432\n",
-      "   -0.26811332]\n",
-      "  ...\n",
-      "  [ 0.08230796 -0.46158582  0.54582864 ...  0.15747628 -0.44790155\n",
-      "    0.06020184]\n",
-      "  [-0.8095085   0.43163058 -0.42837143 ...  0.8627463   0.90656304\n",
-      "    0.15847842]\n",
-      "  [-1.485811   -0.18216592 -0.8882585  ...  0.32596245  0.7822631\n",
-      "   -0.6460344 ]]]\n",
-      "[[[ 0.67278004  0.08313602 -0.6276114  ... -0.17480245  0.42718196\n",
-      "   -0.5787625 ]\n",
-      "  [ 0.91516703  0.5393413   1.7159253  ...  0.06144581  0.06486579\n",
-      "   -0.03350812]\n",
-      "  [ 0.43835106  0.62278455  0.24096027 ...  1.0912521  -0.9092943\n",
-      "   -1.0129892 ]\n",
-      "  ...\n",
-      "  [ 0.6863195   0.14240888  0.10763284 ... -0.11513527  0.48065376\n",
-      "    0.04070365]\n",
-      "  [-0.9525231   0.23197863  0.31264275 ...  0.53124386  0.18773702\n",
-      "   -0.84502304]\n",
-      "  [ 0.42024007 -0.04561983  0.545412   ... -0.41933888 -0.00436005\n",
-      "   -0.066635  ]]\n",
-      "\n",
-      " [[-0.11638767 -0.33566508 -0.20887226 ...  0.17423296 -0.9195838\n",
-      "   -0.8161046 ]\n",
-      "  [-0.34698725  0.88269705 -0.11887549 ... -0.15566081  0.16357464\n",
-      "   -0.20766166]\n",
-      "  [-0.3847657   0.3984319  -0.06963488 ... -0.00360619  1.2360426\n",
-      "   -0.26811326]\n",
-      "  ...\n",
-      "  [ 0.08230786 -0.4615857   0.5458287  ...  0.15747619 -0.44790167\n",
-      "    0.06020182]\n",
-      "  [-0.8095083   0.4316307  -0.42837155 ...  0.862746    0.9065631\n",
-      "    0.15847899]\n",
-      "  [-1.485811   -0.18216613 -0.8882584  ...  0.32596254  0.7822631\n",
-      "   -0.6460344 ]]]\n",
-      "True\n",
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "y = L(px)\n",
-    "print(y.numpy())\n",
-    "\n",
-    "ty = TL(tx)\n",
-    "print(ty.data.numpy())\n",
-    "print(np.allclose(px.numpy(), tx.detach().numpy()))\n",
-    "print(np.allclose(y.numpy(), ty.detach().numpy()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "incorrect-allah",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "prostate-cameroon",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "governmental-surge",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[ 0.04476918  0.554463   -0.3027508  ... -0.49600336  0.3751858\n",
-      "   0.8254095 ]\n",
-      " [ 0.95594174 -0.29528382 -1.2899452  ...  0.43718258  0.05584608\n",
-      "  -0.06974669]]\n",
-      "[[ 0.04476918  0.5544631  -0.3027507  ... -0.49600336  0.37518573\n",
-      "   0.8254096 ]\n",
-      " [ 0.95594174 -0.29528376 -1.2899454  ...  0.4371827   0.05584623\n",
-      "  -0.0697467 ]]\n",
-      "True\n",
-      "False\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 256)\n",
-    "px = paddle.to_tensor(x, dtype='float32')\n",
-    "tx = torch.tensor(x, dtype=torch.float32)\n",
-    "y = L(px)\n",
-    "print(y.numpy())\n",
-    "ty = TL(tx)\n",
-    "print(ty.data.numpy())\n",
-    "print(np.allclose(px.numpy(), tx.detach().numpy()))\n",
-    "print(np.allclose(y.numpy(), ty.detach().numpy()))\n",
-    "print(np.allclose(y.numpy(), ty.detach().numpy(), atol=1e-5))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "confidential-jacket",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "improved-civilization",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "5e7e7c9fde8350084abf1898cf52651cfc84b17a\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(paddle.version.commit)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "d1e2d3b4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['__builtins__',\n",
-       " '__cached__',\n",
-       " '__doc__',\n",
-       " '__file__',\n",
-       " '__loader__',\n",
-       " '__name__',\n",
-       " '__package__',\n",
-       " '__spec__',\n",
-       " 'commit',\n",
-       " 'full_version',\n",
-       " 'istaged',\n",
-       " 'major',\n",
-       " 'minor',\n",
-       " 'mkl',\n",
-       " 'patch',\n",
-       " 'rc',\n",
-       " 'show',\n",
-       " 'with_mkl']"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dir(paddle.version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "c880c719",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.1.0\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(paddle.version.full_version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "f26977bf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "commit: 5e7e7c9fde8350084abf1898cf52651cfc84b17a\n",
-      "None\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(paddle.version.show())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "04ad47f6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1.6.0\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(torch.__version__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "e1e03830",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['__builtins__',\n",
-       " '__cached__',\n",
-       " '__doc__',\n",
-       " '__file__',\n",
-       " '__loader__',\n",
-       " '__name__',\n",
-       " '__package__',\n",
-       " '__spec__',\n",
-       " '__version__',\n",
-       " 'cuda',\n",
-       " 'debug',\n",
-       " 'git_version',\n",
-       " 'hip']"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dir(torch.version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "4ad0389b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'b31f58de6fa8bbda5353b3c77d9be4914399724d'"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "torch.version.git_version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "7870ea10",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'10.2'"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "torch.version.cuda"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "db8ee5a7",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6321ec2a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/WarmupLR.ipynb
+++ b/.notebook/WarmupLR.ipynb
--- a/.notebook/compute_cmvn_loader_test.ipynb
+++ b/.notebook/compute_cmvn_loader_test.ipynb
--- a/.notebook/dataloader.ipynb
+++ b/.notebook/dataloader.ipynb
--- a/.notebook/dataloader_with_tokens_tokenids.ipynb
+++ b/.notebook/dataloader_with_tokens_tokenids.ipynb
--- a/.notebook/hack_api_test.ipynb
+++ b/.notebook/hack_api_test.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "breeding-haven",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'/home/ssd5/zhanghui/DeepSpeech2.x'"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%cd ..\n",
-    "%pwd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "appropriate-theta",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LICENSE       deepspeech  examples\t\t    requirements.txt  tools\r\n",
-      "README.md     docs\t  libsndfile-1.0.28\t    setup.sh\t      utils\r\n",
-      "README_cn.md  env.sh\t  libsndfile-1.0.28.tar.gz  tests\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!ls"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "entire-bloom",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  def convert_to_list(value, n, name, dtype=np.int):\n",
-      "WARNING:root:override cat of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "WARNING:root:register user masked_fill to paddle.Tensor, remove this when fixed!\n",
-      "WARNING:root:register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
-      "WARNING:root:register user repeat to paddle.Tensor, remove this when fixed!\n",
-      "WARNING:root:register user glu to paddle.nn.functional, remove this when fixed!\n",
-      "WARNING:root:register user GLU to paddle.nn, remove this when fixed!\n",
-      "WARNING:root:register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
-      "WARNING:root:override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n"
-     ]
-    }
-   ],
-   "source": [
-    "from deepspeech.modules import loss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "governmental-aircraft",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import paddle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "proprietary-disaster",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<function deepspeech.modules.repeat(xs: paddle.VarBase, *size: Any) -> paddle.VarBase>"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "paddle.Tensor.repeat"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "first-diagram",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<property at 0x7fb515eeeb88>"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "paddle.Tensor.size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "intelligent-david",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<function paddle.tensor.manipulation.concat(x, axis=0, name=None)>"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "paddle.Tensor.cat"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "bronze-tenant",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a = paddle.to_tensor([12,32, 10, 12, 123,32 ,4])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "balanced-bearing",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "7"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "a.size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "extreme-republic",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def size(xs: paddle.Tensor, *args: int) -> paddle.Tensor:\n",
-    "    nargs = len(args)\n",
-    "    assert (nargs <= 1)\n",
-    "    s = paddle.shape(xs)\n",
-    "    if nargs == 1:\n",
-    "        return s[args[0]]\n",
-    "    else:\n",
-    "        return s\n",
-    "\n",
-    "# logger.warn(\n",
-    "#     \"override size of paddle.Tensor if exists or register, remove this when fixed!\"\n",
-    "# )\n",
-    "paddle.Tensor.size = size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "gross-addiction",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [7])"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "a.size(0)\n",
-    "a.size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "adverse-dining",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [7])"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "a.size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "popular-potato",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/jit_infer.ipynb
+++ b/.notebook/jit_infer.ipynb
--- a/.notebook/layer_norm_test.ipynb
+++ b/.notebook/layer_norm_test.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "academic-surname",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "from paddle import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "fundamental-treasure",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Parameter containing:\n",
-      "Tensor(shape=[256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
-      "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])\n",
-      "Parameter containing:\n",
-      "Tensor(shape=[256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
-      "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])\n"
-     ]
-    }
-   ],
-   "source": [
-    "L = nn.LayerNorm(256, epsilon=1e-12)\n",
-    "for p in L.parameters():\n",
-    "    print(p)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "consolidated-elephant",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "moderate-noise",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "float64\n"
-     ]
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 51, 256)\n",
-    "print(x.dtype)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "cooked-progressive",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y = L(paddle.to_tensor(x, dtype='float32'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "optimum-milwaukee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "viral-indian",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Parameter containing:\n",
-      "tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1.], requires_grad=True)\n",
-      "Parameter containing:\n",
-      "tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
-      "       requires_grad=True)\n"
-     ]
-    }
-   ],
-   "source": [
-    "TL = torch.nn.LayerNorm(256, eps=1e-12)\n",
-    "for p in TL.parameters():\n",
-    "    print(p)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "skilled-vietnamese",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ty = TL(torch.tensor(x, dtype=torch.float32))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "incorrect-allah",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.allclose(y.numpy(), ty.detach().numpy())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "prostate-cameroon",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "governmental-surge",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 256)\n",
-    "y = L(paddle.to_tensor(x, dtype='float32'))\n",
-    "ty = TL(torch.tensor(x, dtype=torch.float32))\n",
-    "np.allclose(y.numpy(), ty.detach().numpy())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "confidential-jacket",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/mask_and_masked_fill_test.ipynb
+++ b/.notebook/mask_and_masked_fill_test.ipynb
--- a/.notebook/position_embeding_check.ipynb
+++ b/.notebook/position_embeding_check.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "designing-borough",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
-      "   0.0000000e+00  0.0000000e+00]\n",
-      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
-      "   1.1547816e-04  1.0746076e-04]\n",
-      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
-      "   2.3095631e-04  2.1492151e-04]\n",
-      " ...\n",
-      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
-      "   1.1201146e-02  1.0423505e-02]\n",
-      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
-      "   1.1316618e-02  1.0530960e-02]\n",
-      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
-      "   1.1432089e-02  1.0638415e-02]]\n",
-      "True\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "import math\n",
-    "import numpy as np\n",
-    "\n",
-    "max_len=100\n",
-    "d_model=256\n",
-    "\n",
-    "pe = torch.zeros(max_len, d_model)\n",
-    "position = torch.arange(0, max_len,\n",
-    "                        dtype=torch.float32).unsqueeze(1)\n",
-    "toruch_position = position\n",
-    "div_term = torch.exp(\n",
-    "    torch.arange(0, d_model, 2, dtype=torch.float32) *\n",
-    "    -(math.log(10000.0) / d_model))\n",
-    "tourch_div_term = div_term.cpu().detach().numpy()\n",
-    "\n",
-    "\n",
-    "\n",
-    "torhc_sin = torch.sin(position * div_term)\n",
-    "torhc_cos = torch.cos(position * div_term)\n",
-    "print(torhc_sin.cpu().detach().numpy())\n",
-    "np_sin = np.sin((position * div_term).cpu().detach().numpy())\n",
-    "np_cos = np.cos((position * div_term).cpu().detach().numpy())\n",
-    "print(np.allclose(np_sin, torhc_sin.cpu().detach().numpy()))\n",
-    "print(np.allclose(np_cos, torhc_cos.cpu().detach().numpy()))\n",
-    "pe[:, 0::2] = torhc_sin\n",
-    "pe[:, 1::2] = torhc_cos\n",
-    "tourch_pe = pe.cpu().detach().numpy()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "swiss-referral",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "False\n",
-      "False\n",
-      "False\n",
-      "False\n",
-      "[[ 1.          1.          1.         ...  1.          1.\n",
-      "   1.        ]\n",
-      " [ 0.5403023   0.59737533  0.6479059  ...  1.          1.\n",
-      "   1.        ]\n",
-      " [-0.41614684 -0.28628543 -0.1604359  ...  0.99999994  1.\n",
-      "   1.        ]\n",
-      " ...\n",
-      " [-0.92514753 -0.66694194 -0.67894876 ...  0.9999276   0.99993724\n",
-      "   0.9999457 ]\n",
-      " [-0.81928825 -0.9959641  -0.999139   ...  0.99992603  0.999936\n",
-      "   0.99994457]\n",
-      " [ 0.03982088 -0.52298605 -0.6157435  ...  0.99992454  0.9999347\n",
-      "   0.99994344]]\n",
-      "----\n",
-      "[[ 1.          1.          1.         ...  1.          1.\n",
-      "   1.        ]\n",
-      " [ 0.54030234  0.59737533  0.6479059  ...  1.          1.\n",
-      "   1.        ]\n",
-      " [-0.41614684 -0.28628543 -0.1604359  ...  1.          1.\n",
-      "   1.        ]\n",
-      " ...\n",
-      " [-0.92514753 -0.66694194 -0.67894876 ...  0.9999276   0.9999373\n",
-      "   0.9999457 ]\n",
-      " [-0.81928825 -0.9959641  -0.999139   ...  0.99992603  0.999936\n",
-      "   0.99994457]\n",
-      " [ 0.03982088 -0.5229861  -0.6157435  ...  0.99992454  0.9999347\n",
-      "   0.99994344]]\n",
-      ")))))))\n",
-      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
-      "   0.0000000e+00  0.0000000e+00]\n",
-      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
-      "   1.1547816e-04  1.0746076e-04]\n",
-      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
-      "   2.3095631e-04  2.1492151e-04]\n",
-      " ...\n",
-      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
-      "   1.1201146e-02  1.0423505e-02]\n",
-      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
-      "   1.1316618e-02  1.0530960e-02]\n",
-      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
-      "   1.1432089e-02  1.0638415e-02]]\n",
-      "----\n",
-      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
-      "   0.0000000e+00  0.0000000e+00]\n",
-      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
-      "   1.1547816e-04  1.0746076e-04]\n",
-      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
-      "   2.3095631e-04  2.1492151e-04]\n",
-      " ...\n",
-      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
-      "   1.1201146e-02  1.0423505e-02]\n",
-      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
-      "   1.1316618e-02  1.0530960e-02]\n",
-      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
-      "   1.1432089e-02  1.0638415e-02]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "import paddle\n",
-    "paddle.set_device('cpu')\n",
-    "ppe = paddle.zeros((max_len, d_model), dtype='float32')\n",
-    "position = paddle.arange(0, max_len,\n",
-    "                        dtype='float32').unsqueeze(1)\n",
-    "print(np.allclose(position.numpy(), toruch_position))\n",
-    "div_term = paddle.exp(\n",
-    "    paddle.arange(0, d_model, 2, dtype='float32') *\n",
-    "    -(math.log(10000.0) / d_model))\n",
-    "print(np.allclose(div_term.numpy(), tourch_div_term))\n",
-    "\n",
-    "\n",
-    "\n",
-    "p_sin = paddle.sin(position * div_term)\n",
-    "p_cos = paddle.cos(position * div_term)\n",
-    "print(np.allclose(np_sin, p_sin.numpy(), rtol=1.e-6, atol=0))\n",
-    "print(np.allclose(np_cos, p_cos.numpy(), rtol=1.e-6, atol=0))\n",
-    "ppe[:, 0::2] = p_sin\n",
-    "ppe[:, 1::2] = p_cos\n",
-    "print(np.allclose(p_sin.numpy(), torhc_sin.cpu().detach().numpy()))\n",
-    "print(np.allclose(p_cos.numpy(), torhc_cos.cpu().detach().numpy()))\n",
-    "print(p_cos.numpy())\n",
-    "print(\"----\")\n",
-    "print(torhc_cos.cpu().detach().numpy())\n",
-    "print(\")))))))\")\n",
-    "print(p_sin.numpy())\n",
-    "print(\"----\")\n",
-    "print(torhc_sin.cpu().detach().numpy())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "integrated-boards",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(np.allclose(ppe.numpy(), pe.numpy()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "flying-reserve",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "revised-divide",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/python_test.ipynb
+++ b/.notebook/python_test.ipynb
--- a/.notebook/train_test.ipynb
+++ b/.notebook/train_test.ipynb
--- a/.notebook/u2_confermer_model_wenet.ipynb
+++ b/.notebook/u2_confermer_model_wenet.ipynb
--- a/.notebook/u2_tansformer_model_espnet.ipynb
+++ b/.notebook/u2_tansformer_model_espnet.ipynb
--- a/.notebook/wenet_model.ipynb
+++ b/.notebook/wenet_model.ipynb
--- a/README.md
+++ b/README.md
-[中文版](README_cn.md)
+# PaddlePaddle Speech to Any toolkit
-# PaddlePaddle ASR toolkit
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
-*PaddleASR* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient, samller and scalable implementation, including training, inference & testing module, and deployment.
+*DeepSpeech* is an open-source implementation of end-to-end Automatic Speech Recognition engine, with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient, samller and scalable implementation, including training, inference & testing module, and deployment.
 ## Features
- See [feature list](doc/src/feature_list.md) for more information.
+ See [feature list](docs/src/feature_list.md) for more information.
 ## Setup
+All tested under:  
+* Ubuntu 16.04
 * python>=3.7
-* paddlepaddle>=2.1.0
+* paddlepaddle>=2.1.2
-Please see [install](doc/src/install.md).
+Please see [install](docs/src/install.md).
 ## Getting Started
-Please see [Getting Started](doc/src/getting_started.md) and [tiny egs](examples/tiny/s0/README.md).
+Please see [Getting Started](docs/src/getting_started.md) and [tiny egs](examples/tiny/s0/README.md).
 ## More Information  
-* [Data Prepration](doc/src/data_preparation.md)  
+* [Data Prepration](docs/src/data_preparation.md)  
-* [Data Augmentation](doc/src/augmentation.md)  
+* [Data Augmentation](docs/src/augmentation.md)  
-* [Ngram LM](doc/src/ngram_lm.md)  
+* [Ngram LM](docs/src/ngram_lm.md)  
-* [Server Demo](doc/src/server.md)  
+* [Benchmark](docs/src/benchmark.md)  
-* [Benchmark](doc/src/benchmark.md)  
+* [Relased Model](docs/src/released_model.md)  
-* [Relased Model](doc/src/released_model.md)  
-* [FAQ](doc/src/faq.md)  
 ## Questions and Help
@@ -43,8 +41,8 @@ You are welcome to submit questions in [Github Discussions](https://github.com/P
 ## License
-DeepASR is provided under the [Apache-2.0 License](./LICENSE).
+DeepSpeech is provided under the [Apache-2.0 License](./LICENSE).
 ## Acknowledgement
-We depends on many open source repos. See [References](doc/src/reference.md) for more information.
+We depends on many open source repos. See [References](docs/src/reference.md) for more information.
--- a/README_cn.md
+++ b/README_cn.md
-[English](README.md)
-# PaddlePaddle ASR toolkit
-![License](https://img.shields.io/badge/license-Apache%202-red.svg)
-![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
-![support os](https://img.shields.io/badge/os-linux-yellow.svg)
-*PaddleASR*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别（ASR）引擎的开源项目，
-我们的愿景是为语音识别在工业应用和学术研究上，提供易于使用、高效、小型化和可扩展的工具，包括训练，推理，以及  部署。
-## 特性
- 参看 [特性列表](doc/src/feature_list.md)。
-## 安装
-* python>=3.7
-* paddlepaddle>=2.1.0
-参看 [安装](doc/src/install.md)。
-## 开始
-请查看 [开始](doc/src/getting_started.md) 和 [tiny egs](examples/tiny/s0/README.md)。
-## 更多信息
-* [数据处理](doc/src/data_preparation.md)  
-* [数据增强](doc/src/augmentation.md)  
-* [语言模型](doc/src/ngram_lm.md)  
-* [服务部署](doc/src/server.md)  
-* [Benchmark](doc/src/benchmark.md)  
-* [Relased Model](doc/src/released_model.md)  
-* [FAQ](doc/src/faq.md)  
-## 问题和帮助
-欢迎您在[Github讨论](https://github.com/PaddlePaddle/DeepSpeech/discussions)提交问题，[Github问题](https://github.com/PaddlePaddle/models/issues)中反馈bug。也欢迎您为这个项目做出贡献。
-## License
-DeepASR 遵循[Apache-2.0开源协议](./LICENSE)。
-## 感谢
-开发中参考一些优秀的仓库，详情参见 [References](doc/src/reference.md)。
--- a/doc/images/multi_gpu_speedup.png
+++ b/doc/images/multi_gpu_speedup.png
--- a/doc/images/tuning_error_surface.png
+++ b/doc/images/tuning_error_surface.png
--- a/doc/src/benchmark.md
+++ b/doc/src/benchmark.md
-# Benchmarks
-## Acceleration with Multi-GPUs
-We compare the training time with 1, 2, 4, 8 Tesla V100 GPUs (with a subset of LibriSpeech samples whose audio durations are between 6.0 and 7.0 seconds).  And it shows that a **near-linear** acceleration with multiple GPUs has been achieved. In the following figure, the time (in seconds) cost for training is printed on the blue bars.
-<img src="../images/multi_gpu_speedup.png" width=450>
-| # of GPU  | Acceleration Rate |
-| --------  | --------------:   |
-| 1         | 1.00 X |
-| 2         | 1.98 X |
-| 4         | 3.73 X |
-| 8         | 6.95 X |
-`utils/profile.sh` provides such a demo profiling tool, you can change it as need.
--- a/doc/src/faq.md
+++ b/doc/src/faq.md
-# FAQ
-1. 音频变速快慢到达什么晨读会影响识别率？
-   变速会提升识别效果，一般用0.9， 1.0， 1.1 的变速。
-2. 音量大小到什么程度会影响识别率？
-   一般训练会固定音量到一个范围内，波动过大会影响训练，估计在10dB ~ 20dB吧。
-3. 语音模型训练数据的最小时长要求时多少？
-   Aishell-1大约178h的数据，数据越多越好。
-4. 那些噪声或背景生会影响识别率？  
-   主要是人生干扰和低信噪比会影响识别率。
-5. 单条语音数据的长度限制是多少？  
-   一般训练的语音长度会限制在1s~6s之间，和训练配置有关。
-6. 背景声在识别前是否需要分离出来，或做降噪处理？  
-   需要分离的，需要结合具体场景考虑。
-7. 模型是否带有VAD人生激活识别能力？  
-   VAD是单独的模型或模块，模型不包含此能力。
-8. 是否支持长语音识别？  
-   一般过VAD后识别。
-9. Mandarin LM Large语言模型需要的硬件配置时怎样的？  
-   内存能放得下LM即可。
--- a/doc/src/reference.md
+++ b/doc/src/reference.md
-# Reference
-* [wenet](https://github.com/mobvoi/wenet)
--- a/doc/src/released_model.md
+++ b/doc/src/released_model.md
-# Released Models
-## Language Model Released
-Language Model | Training Data | Token-based | Size | Descriptions
-:-------------:| :------------:| :-----: | -----: | :-----------------
-[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) |  [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1; <br/> About 1.85 billion n-grams; <br/> 'trie'  binary with '-a 22 -q 8 -b 8'
-[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4; <br/> About 0.13 billion n-grams; <br/> 'probing' binary with default settings
-[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning; <br/> About 3.7 billion n-grams; <br/> 'probing' binary with default settings
--- a/doc/src/server.md
+++ b/doc/src/server.md
--- a/docs/images/ds2offlineModel.png
+++ b/docs/images/ds2offlineModel.png
--- a/docs/images/ds2onlineModel.png
+++ b/docs/images/ds2onlineModel.png
--- a/doc/src/augmentation.md
+++ b/doc/src/augmentation.md
--- a/doc/src/data_preparation.md
+++ b/doc/src/data_preparation.md
@@ -21,7 +21,7 @@ To perform z-score normalization (zero-mean, unit stddev) upon audio features, w
 ```bash
 python3 utils/compute_mean_std.py \
 --num_samples 2000 \
--specgram_type linear \
+--spectrum_type linear \
 --manifest_path examples/librispeech/data/manifest.train \
 --output_path examples/librispeech/data/mean_std.npz
 ```

--- a/docs/src/deepspeech_architecture.md
+++ b/docs/src/deepspeech_architecture.md
--- a/doc/src/feature_list.md
+++ b/doc/src/feature_list.md
--- a/doc/src/getting_started.md
+++ b/doc/src/getting_started.md
--- a/doc/src/install.md
+++ b/doc/src/install.md
--- a/doc/src/ngram_lm.md
+++ b/doc/src/ngram_lm.md
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
+# Reference
+We refer these repos to build `model` and `engine`:
+* [delta](https://github.com/Delta-ML/delta.git)
+* [espnet](https://github.com/espnet/espnet.git)
+* [kaldi](https://github.com/kaldi-asr/kaldi.git)
+* [wenet](https://github.com/mobvoi/wenet)
--- a/docs/src/released_model.md
+++ b/docs/src/released_model.md
--- a/env.sh
+++ b/env.sh
--- a/examples/1xt2x/.gitignore
+++ b/examples/1xt2x/.gitignore
+tmp
--- a/examples/1xt2x/README.md
+++ b/examples/1xt2x/README.md
--- a/examples/1xt2x/aishell/.gitignore
+++ b/examples/1xt2x/aishell/.gitignore
--- a/examples/1xt2x/aishell/conf/augmentation.json
+++ b/examples/1xt2x/aishell/conf/augmentation.json
+[]
--- a/examples/1xt2x/aishell/conf/deepspeech2.yaml
+++ b/examples/1xt2x/aishell/conf/deepspeech2.yaml
--- a/examples/1xt2x/aishell/local/data.sh
+++ b/examples/1xt2x/aishell/local/data.sh
--- a/examples/1xt2x/aishell/local/download_lm_ch.sh
+++ b/examples/1xt2x/aishell/local/download_lm_ch.sh
--- a/examples/1xt2x/aishell/local/download_model.sh
+++ b/examples/1xt2x/aishell/local/download_model.sh
--- a/examples/1xt2x/aishell/local/test.sh
+++ b/examples/1xt2x/aishell/local/test.sh
--- a/examples/1xt2x/aishell/path.sh
+++ b/examples/1xt2x/aishell/path.sh
--- a/examples/1xt2x/aishell/run.sh
+++ b/examples/1xt2x/aishell/run.sh
--- a/examples/1xt2x/baidu_en8k/.gitignore
+++ b/examples/1xt2x/baidu_en8k/.gitignore
--- a/examples/1xt2x/baidu_en8k/conf/augmentation.json
+++ b/examples/1xt2x/baidu_en8k/conf/augmentation.json
--- a/examples/1xt2x/baidu_en8k/conf/deepspeech2.yaml
+++ b/examples/1xt2x/baidu_en8k/conf/deepspeech2.yaml
--- a/examples/1xt2x/baidu_en8k/local/data.sh
+++ b/examples/1xt2x/baidu_en8k/local/data.sh
--- a/examples/1xt2x/baidu_en8k/local/download_lm_en.sh
+++ b/examples/1xt2x/baidu_en8k/local/download_lm_en.sh
--- a/examples/1xt2x/baidu_en8k/local/download_model.sh
+++ b/examples/1xt2x/baidu_en8k/local/download_model.sh
--- a/examples/1xt2x/baidu_en8k/local/test.sh
+++ b/examples/1xt2x/baidu_en8k/local/test.sh
--- a/examples/1xt2x/baidu_en8k/path.sh
+++ b/examples/1xt2x/baidu_en8k/path.sh
--- a/examples/1xt2x/baidu_en8k/run.sh
+++ b/examples/1xt2x/baidu_en8k/run.sh
--- a/examples/1xt2x/librispeech/.gitignore
+++ b/examples/1xt2x/librispeech/.gitignore
--- a/examples/1xt2x/librispeech/conf/augmentation.json
+++ b/examples/1xt2x/librispeech/conf/augmentation.json
--- a/examples/1xt2x/librispeech/conf/deepspeech2.yaml
+++ b/examples/1xt2x/librispeech/conf/deepspeech2.yaml
--- a/examples/1xt2x/librispeech/local/data.sh
+++ b/examples/1xt2x/librispeech/local/data.sh
--- a/examples/1xt2x/librispeech/local/download_lm_en.sh
+++ b/examples/1xt2x/librispeech/local/download_lm_en.sh
--- a/examples/1xt2x/librispeech/local/download_model.sh
+++ b/examples/1xt2x/librispeech/local/download_model.sh
--- a/examples/1xt2x/librispeech/local/test.sh
+++ b/examples/1xt2x/librispeech/local/test.sh
--- a/examples/1xt2x/librispeech/path.sh
+++ b/examples/1xt2x/librispeech/path.sh
--- a/examples/1xt2x/librispeech/run.sh
+++ b/examples/1xt2x/librispeech/run.sh
--- a/examples/1xt2x/src_deepspeech2x/__init__.py
+++ b/examples/1xt2x/src_deepspeech2x/__init__.py
--- a/examples/1xt2x/src_deepspeech2x/bin/test.py
+++ b/examples/1xt2x/src_deepspeech2x/bin/test.py
--- a/examples/1xt2x/src_deepspeech2x/models/__init__.py
+++ b/examples/1xt2x/src_deepspeech2x/models/__init__.py
--- a/examples/1xt2x/src_deepspeech2x/models/ds2/__init__.py
+++ b/examples/1xt2x/src_deepspeech2x/models/ds2/__init__.py
--- a/examples/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
+++ b/examples/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
--- a/examples/1xt2x/src_deepspeech2x/models/ds2/rnn.py
+++ b/examples/1xt2x/src_deepspeech2x/models/ds2/rnn.py
--- a/examples/1xt2x/src_deepspeech2x/test_model.py
+++ b/examples/1xt2x/src_deepspeech2x/test_model.py
--- a/examples/aug_conf/augmentation.json
+++ b/examples/aug_conf/augmentation.json
--- a/examples/aug_conf/augmentation.example.json
+++ b/examples/aug_conf/augmentation.example.json
--- a/examples/cc-cedict/README.md
+++ b/examples/cc-cedict/README.md
--- a/examples/cc-cedict/path.sh
+++ b/examples/cc-cedict/path.sh
--- a/examples/chinese_g2p/README.md
+++ b/examples/chinese_g2p/README.md
--- a/examples/dataset/aidatatang_200zh/.gitignore
+++ b/examples/dataset/aidatatang_200zh/.gitignore
--- a/examples/dataset/aidatatang_200zh/README.md
+++ b/examples/dataset/aidatatang_200zh/README.md
--- a/examples/dataset/aidatatang_200zh/aidatatang_200zh.py
+++ b/examples/dataset/aidatatang_200zh/aidatatang_200zh.py
--- a/examples/dataset/aishell/.gitignore
+++ b/examples/dataset/aishell/.gitignore
--- a/examples/dataset/aishell/README.md
+++ b/examples/dataset/aishell/README.md
--- a/examples/dataset/aishell/aishell.py
+++ b/examples/dataset/aishell/aishell.py
--- a/examples/dataset/aishell3/README.md
+++ b/examples/dataset/aishell3/README.md
--- a/examples/dataset/gigaspeech/.gitignore
+++ b/examples/dataset/gigaspeech/.gitignore
--- a/examples/dataset/gigaspeech/README.md
+++ b/examples/dataset/gigaspeech/README.md
--- a/examples/dataset/gigaspeech/gigaspeech.py
+++ b/examples/dataset/gigaspeech/gigaspeech.py
--- a/examples/dataset/gigaspeech/run.sh
+++ b/examples/dataset/gigaspeech/run.sh
--- a/examples/dataset/librispeech/.gitignore
+++ b/examples/dataset/librispeech/.gitignore
--- a/examples/dataset/librispeech/librispeech.py
+++ b/examples/dataset/librispeech/librispeech.py
--- a/examples/dataset/magicdata/README.md
+++ b/examples/dataset/magicdata/README.md
--- a/examples/dataset/multi_cn/README.md
+++ b/examples/dataset/multi_cn/README.md
--- a/examples/dataset/primewords/README.md
+++ b/examples/dataset/primewords/README.md
--- a/examples/dataset/st-cmds/README.md
+++ b/examples/dataset/st-cmds/README.md
--- a/examples/dataset/ted_en_zh/.gitignore
+++ b/examples/dataset/ted_en_zh/.gitignore
--- a/examples/dataset/ted_en_zh/ted_en_zh.py
+++ b/examples/dataset/ted_en_zh/ted_en_zh.py
--- a/examples/dataset/thchs30/.gitignore
+++ b/examples/dataset/thchs30/.gitignore
--- a/examples/dataset/thchs30/README.md
+++ b/examples/dataset/thchs30/README.md
--- a/examples/dataset/thchs30/thchs30.py
+++ b/examples/dataset/thchs30/thchs30.py
--- a/examples/dataset/timit/.gitignore
+++ b/examples/dataset/timit/.gitignore
--- a/examples/dataset/timit/timit.py
+++ b/examples/dataset/timit/timit.py
--- a/examples/dataset/timit/timit_kaldi_standard_split.py
+++ b/examples/dataset/timit/timit_kaldi_standard_split.py
--- a/examples/chinese_g2p/.gitignore
+++ b/examples/chinese_g2p/.gitignore
--- a/examples/g2p/README.md
+++ b/examples/g2p/README.md
--- a/examples/g2p/zh/README.md
+++ b/examples/g2p/zh/README.md
--- a/examples/chinese_g2p/local/convert_transcription.py
+++ b/examples/chinese_g2p/local/convert_transcription.py
--- a/examples/chinese_g2p/local/extract_pinyin_label.py
+++ b/examples/chinese_g2p/local/extract_pinyin_label.py
--- a/examples/chinese_g2p/local/ignore_sandhi.py
+++ b/examples/chinese_g2p/local/ignore_sandhi.py
--- a/examples/chinese_g2p/local/prepare_dataset.sh
+++ b/examples/chinese_g2p/local/prepare_dataset.sh
--- a/examples/chinese_g2p/path.sh
+++ b/examples/chinese_g2p/path.sh
--- a/examples/chinese_g2p/requirements.txt
+++ b/examples/chinese_g2p/requirements.txt
--- a/examples/chinese_g2p/run.sh
+++ b/examples/chinese_g2p/run.sh
--- a/examples/ngram_lm/READEME.md
+++ b/examples/ngram_lm/READEME.md
--- a/examples/ngram_lm/README.md
+++ b/examples/ngram_lm/README.md
--- a/examples/ngram_lm/s0/.gitignore
+++ b/examples/ngram_lm/s0/.gitignore
--- a/examples/ngram_lm/s0/README.md
+++ b/examples/ngram_lm/s0/README.md
--- a/examples/ngram_lm/data/README.md
+++ b/examples/ngram_lm/data/README.md
--- a/examples/ngram_lm/data/custom_confusion.txt
+++ b/examples/ngram_lm/data/custom_confusion.txt
--- a/examples/ngram_lm/data/text_correct.txt
+++ b/examples/ngram_lm/data/text_correct.txt
--- a/examples/ngram_lm/local/build_zh_lm.sh
+++ b/examples/ngram_lm/local/build_zh_lm.sh
--- a/examples/ngram_lm/local/download_lm_zh.sh
+++ b/examples/ngram_lm/local/download_lm_zh.sh
--- a/examples/ngram_lm/local/kenlm_score_test.py
+++ b/examples/ngram_lm/local/kenlm_score_test.py
--- a/examples/ngram_lm/path.sh
+++ b/examples/ngram_lm/path.sh
--- a/examples/ngram_lm/requirements.txt
+++ b/examples/ngram_lm/requirements.txt
--- a/examples/ngram_lm/run.sh
+++ b/examples/ngram_lm/run.sh
--- a/examples/punctuation_restoration/README.md
+++ b/examples/punctuation_restoration/README.md
--- a/examples/spm/README.md
+++ b/examples/spm/README.md
--- a/examples/spm/path.sh
+++ b/examples/spm/path.sh
--- a/examples/thchs30/README.md
+++ b/examples/thchs30/README.md
--- a/examples/thchs30/a0/README.md
+++ b/examples/thchs30/a0/README.md
--- a/examples/thchs30/a0/data/dict/syllable.lexicon
+++ b/examples/thchs30/a0/data/dict/syllable.lexicon
--- a/examples/thchs30/a0/local/data.sh
+++ b/examples/thchs30/a0/local/data.sh
--- a/examples/thchs30/a0/local/gen_word2phone.py
+++ b/examples/thchs30/a0/local/gen_word2phone.py
--- a/examples/thchs30/a0/local/reorganize_thchs30.py
+++ b/examples/thchs30/a0/local/reorganize_thchs30.py
--- a/examples/thchs30/a0/path.sh
+++ b/examples/thchs30/a0/path.sh
--- a/examples/thchs30/a0/run.sh
+++ b/examples/thchs30/a0/run.sh
--- a/examples/tn/.gitignore
+++ b/examples/tn/.gitignore
--- a/examples/tn/README.md
+++ b/examples/tn/README.md
--- a/examples/tn/data/sentences.txt
+++ b/examples/tn/data/sentences.txt
--- a/examples/tn/local/test_normalization.py
+++ b/examples/tn/local/test_normalization.py
--- a/examples/tn/path.sh
+++ b/examples/tn/path.sh
--- a/examples/tn/run.sh
+++ b/examples/tn/run.sh
--- a/hub/requirements.txt
+++ b/hub/requirements.txt
--- a/hub/setup_hub.sh
+++ b/hub/setup_hub.sh
--- a/requirements.txt
+++ b/requirements.txt
--- a/setup.sh
+++ b/setup.sh
--- a/speechnn/.gitignore
+++ b/speechnn/.gitignore
--- a/speechnn/CMakeLists.txt
+++ b/speechnn/CMakeLists.txt
--- a/speechnn/cmake/third_party.cmake
+++ b/speechnn/cmake/third_party.cmake
--- a/speechnn/cmake/third_party/absl.cmake
+++ b/speechnn/cmake/third_party/absl.cmake
--- a/speechnn/cmake/third_party/boost.cmake
+++ b/speechnn/cmake/third_party/boost.cmake
--- a/speechnn/cmake/third_party/eigen.cmake
+++ b/speechnn/cmake/third_party/eigen.cmake
--- a/speechnn/cmake/third_party/libsndfile.cmake
+++ b/speechnn/cmake/third_party/libsndfile.cmake
--- a/speechnn/cmake/third_party/openfst.cmake
+++ b/speechnn/cmake/third_party/openfst.cmake
--- a/speechnn/cmake/third_party/openfst_lib_target.cmake
+++ b/speechnn/cmake/third_party/openfst_lib_target.cmake
--- a/speechnn/cmake/third_party/threadpool.cmake
+++ b/speechnn/cmake/third_party/threadpool.cmake
--- a/speechnn/cmake/third_party/version.cmake
+++ b/speechnn/cmake/third_party/version.cmake
--- a/speechnn/core/transformers/.gitkeep
+++ b/speechnn/core/transformers/.gitkeep
--- a/speechnn/core/transformers/README.md
+++ b/speechnn/core/transformers/README.md
--- a/speechnn/examples/.gitkeep
+++ b/speechnn/examples/.gitkeep
--- a/speechnn/examples/CMakeLists.txt
+++ b/speechnn/examples/CMakeLists.txt
--- a/speechnn/speechnn/CMakeLists.txt
+++ b/speechnn/speechnn/CMakeLists.txt
--- a/speechnn/speechnn/decoder/CMakeLists.txt
+++ b/speechnn/speechnn/decoder/CMakeLists.txt
--- a/speechnn/speechnn/frontend/CMakeLists.txt
+++ b/speechnn/speechnn/frontend/CMakeLists.txt
--- a/speechnn/speechnn/frontend/audio/CMakeLists.txt
+++ b/speechnn/speechnn/frontend/audio/CMakeLists.txt
--- a/speechnn/speechnn/frontend/text/CMakeLists.txt
+++ b/speechnn/speechnn/frontend/text/CMakeLists.txt
--- a/speechnn/speechnn/model/CMakeLists.txt
+++ b/speechnn/speechnn/model/CMakeLists.txt
--- a/speechnn/speechnn/nn/CMakeLists.txt
+++ b/speechnn/speechnn/nn/CMakeLists.txt
--- a/speechnn/speechnn/protocol/CMakeLists.txt
+++ b/speechnn/speechnn/protocol/CMakeLists.txt
--- a/speechnn/speechnn/utils/CMakeLists.txt
+++ b/speechnn/speechnn/utils/CMakeLists.txt
--- a/third_party/__init__.py
+++ b/third_party/__init__.py
--- a/third_party/nnAudio/.gitignore
+++ b/third_party/nnAudio/.gitignore
--- a/third_party/nnAudio/nnAudio/Spectrogram.py
+++ b/third_party/nnAudio/nnAudio/Spectrogram.py
--- a/third_party/nnAudio/nnAudio/__init__.py
+++ b/third_party/nnAudio/nnAudio/__init__.py
--- a/third_party/nnAudio/nnAudio/librosa_functions.py
+++ b/third_party/nnAudio/nnAudio/librosa_functions.py
--- a/third_party/nnAudio/nnAudio/utils.py
+++ b/third_party/nnAudio/nnAudio/utils.py
--- a/third_party/nnAudio/setup.py
+++ b/third_party/nnAudio/setup.py
--- a/third_party/nnAudio/tests/parameters.py
+++ b/third_party/nnAudio/tests/parameters.py
--- a/third_party/nnAudio/tests/test_spectrogram.py
+++ b/third_party/nnAudio/tests/test_spectrogram.py
--- a/third_party/paddle_audio/__init__.py
+++ b/third_party/paddle_audio/__init__.py
--- a/third_party/paddle_audio/frontend/common.py
+++ b/third_party/paddle_audio/frontend/common.py
--- a/third_party/paddle_audio/frontend/english.wav
+++ b/third_party/paddle_audio/frontend/english.wav
--- a/third_party/paddle_audio/frontend/kaldi.py
+++ b/third_party/paddle_audio/frontend/kaldi.py
--- a/third_party/paddle_audio/frontend/kaldi_test.py
+++ b/third_party/paddle_audio/frontend/kaldi_test.py
--- a/third_party/text_processing/__ini__.py
+++ b/third_party/text_processing/__ini__.py
--- a/third_party/text_processing/__init__.py
+++ b/third_party/text_processing/__init__.py
--- a/third_party/text_processing/normalization/__init__.py
+++ b/third_party/text_processing/normalization/__init__.py
--- a/third_party/text_processing/normalization/char_convert.py
+++ b/third_party/text_processing/normalization/char_convert.py
--- a/third_party/text_processing/normalization/chronology.py
+++ b/third_party/text_processing/normalization/chronology.py
--- a/third_party/text_processing/normalization/constants.py
+++ b/third_party/text_processing/normalization/constants.py
--- a/third_party/text_processing/normalization/num.py
+++ b/third_party/text_processing/normalization/num.py
--- a/third_party/text_processing/normalization/phone.py
+++ b/third_party/text_processing/normalization/phone.py
--- a/third_party/text_processing/normalization/quantifier.py
+++ b/third_party/text_processing/normalization/quantifier.py
--- a/third_party/text_processing/normalization/sentence_split.py
+++ b/third_party/text_processing/normalization/sentence_split.py
--- a/tools/Makefile
+++ b/tools/Makefile
--- a/tools/extras/README.md
+++ b/tools/extras/README.md
--- a/tools/extras/install_gcc.sh
+++ b/tools/extras/install_gcc.sh
--- a/tools/extras/install_kaldi.sh
+++ b/tools/extras/install_kaldi.sh
--- a/tools/extras/install_kenlm.sh
+++ b/tools/extras/install_kenlm.sh
--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
--- a/tools/extras/install_mfa.sh
+++ b/tools/extras/install_mfa.sh
--- a/tools/extras/install_miniconda.sh
+++ b/tools/extras/install_miniconda.sh
--- a/tools/extras/install_mkl.sh
+++ b/tools/extras/install_mkl.sh
--- a/tools/extras/install_ngram.sh
+++ b/tools/extras/install_ngram.sh
--- a/tools/extras/install_openblas.sh
+++ b/tools/extras/install_openblas.sh
--- a/tools/extras/install_openfst.sh
+++ b/tools/extras/install_openfst.sh
--- a/tools/extras/install_pynini.sh
+++ b/tools/extras/install_pynini.sh
--- a/tools/extras/install_srilm.sh
+++ b/tools/extras/install_srilm.sh
--- a/tools/extras/srilm.patch
+++ b/tools/extras/srilm.patch
--- a/utils/README.md
+++ b/utils/README.md
--- a/utils/__init__.py
+++ b/utils/__init__.py
--- a/utils/avg.sh
+++ b/utils/avg.sh
--- a/utils/avg_model.py
+++ b/utils/avg_model.py
--- a/utils/build_kenlm_model_from_arpa.sh
+++ b/utils/build_kenlm_model_from_arpa.sh
--- a/utils/build_vocab.py
+++ b/utils/build_vocab.py
--- a/utils/compute_mean_std.py
+++ b/utils/compute_mean_std.py
--- a/utils/dump_manifest.py
+++ b/utils/dump_manifest.py
--- a/utils/duration_from_maniefst.sh
+++ b/utils/duration_from_maniefst.sh
--- a/utils/filter.py
+++ b/utils/filter.py
--- a/utils/filter_scp.pl
+++ b/utils/filter_scp.pl
--- a/utils/format_data.py
+++ b/utils/format_data.py
--- a/utils/format_triplet_data.py
+++ b/utils/format_triplet_data.py
--- a/utils/fst/add_lex_disambig.pl
+++ b/utils/fst/add_lex_disambig.pl
--- a/utils/fst/compile_lexicon_token_fst.sh
+++ b/utils/fst/compile_lexicon_token_fst.sh
--- a/utils/fst/ctc_token_fst.py
+++ b/utils/fst/ctc_token_fst.py
--- a/utils/fst/ctc_token_fst_corrected.py
+++ b/utils/fst/ctc_token_fst_corrected.py
--- a/utils/fst/eps2disambig.pl
+++ b/utils/fst/eps2disambig.pl
--- a/utils/fst/make_lexicon_fst.pl
+++ b/utils/fst/make_lexicon_fst.pl
--- a/utils/fst/make_tlg.sh
+++ b/utils/fst/make_tlg.sh
--- a/utils/fst/prepare_dict.py
+++ b/utils/fst/prepare_dict.py
--- a/utils/fst/remove_oovs.pl
+++ b/utils/fst/remove_oovs.pl
--- a/utils/fst/rnnt_token_fst.py
+++ b/utils/fst/rnnt_token_fst.py
--- a/utils/fst/s2eps.pl
+++ b/utils/fst/s2eps.pl
--- a/utils/log.sh
+++ b/utils/log.sh
--- a/utils/manifest_key_value.py
+++ b/utils/manifest_key_value.py
--- a/utils/ngram_train.sh
+++ b/utils/ngram_train.sh
--- a/utils/parallel/run.pl
+++ b/utils/parallel/run.pl
--- a/utils/parse_options.sh
+++ b/utils/parse_options.sh
--- a/utils/pd_env_collect.sh
+++ b/utils/pd_env_collect.sh
--- a/utils/profile.sh
+++ b/utils/profile.sh
--- a/utils/run.pl
+++ b/utils/run.pl
--- a/utils/score_sclite.sh
+++ b/utils/score_sclite.sh
--- a/utils/spk2utt_to_utt2spk.pl
+++ b/utils/spk2utt_to_utt2spk.pl
--- a/utils/split_data.sh
+++ b/utils/split_data.sh
--- a/utils/split_json.sh
+++ b/utils/split_json.sh
--- a/utils/split_scp.pl
+++ b/utils/split_scp.pl
--- a/utils/tarball.sh
+++ b/utils/tarball.sh
--- a/utils/train_arpa_with_kenlm.sh
+++ b/utils/train_arpa_with_kenlm.sh
--- a/utils/utility.py
+++ b/utils/utility.py
--- a/utils/utility.sh
+++ b/utils/utility.sh
--- a/utils/utt2spk_to_spk2utt.pl
+++ b/utils/utt2spk_to_spk2utt.pl