diff --git a/.gitignore b/.gitignore
index cc8fff8770b97a3f31eb49270ad32ac25af30fad..778824f5e8a3c655cea60c81f259625da45dd40f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 *.pyc
 .vscode
 *log
+*.wav
 *.pdmodel
 *.pdiparams*
 *.zip
@@ -30,5 +31,8 @@ tools/OpenBLAS/
 tools/Miniconda3-latest-Linux-x86_64.sh
 tools/activate_python.sh
 tools/miniconda.sh
+tools/CRF++-0.58/
+
+speechx/fc_patch/
 
 *output/
diff --git a/README.md b/README.md
index 66178662ad84881f363b4a164b3c5497ff29cab1..837d24783d8b10952d3821580ec7bd0219d8dad6 100644
--- a/README.md
+++ b/README.md
@@ -196,16 +196,18 @@ Developers can have a try of our models with [PaddleSpeech Command Line](./paddl
 ```shell
 paddlespeech cls --input input.wav
 ```
+
 **Automatic Speech Recognition**
 ```shell
 paddlespeech asr --lang zh --input input_16k.wav
 ```
-**Speech Translation** (English to Chinese)
 
+**Speech Translation** (English to Chinese)
 (not support for Mac and Windows now)
 ```shell
 paddlespeech st --input input_16k.wav
 ```
+
 **Text-to-Speech** 
 ```shell
 paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --output output.wav
@@ -218,7 +220,16 @@ paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --ou
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```
 
-  
+**Batch Process**
+```
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+```  
+
+**Shell Pipeline**   
+- ASR + Punctuation Restoration
+```
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+```
 
 For more command lines, please see: [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
 
diff --git a/README_cn.md b/README_cn.md
index 1196eec11ed181d875fa1050433b7a8c1ee41753..5c00637d3c86cb42f12edb1be8e6a0565988ec12 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -216,6 +216,17 @@ paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！
    paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
    ```
 
+**批处理**
+```
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+```  
+
+**Shell管道**
+ASR + Punc:
+```
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+```
+
 更多命令行命令请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
 > Note: 如果需要训练或者微调，请查看[语音识别](./docs/source/asr/quick_start.md)， [语音合成](./docs/source/tts/quick_start.md)。
 
@@ -558,6 +569,7 @@ year={2021}
 - 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。
 - 非常感谢 [awmmmm](https://github.com/awmmmm) 提供 fastspeech2 aishell3 conformer 预训练模型。
 - 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于 PaddleSpeech 的 TTS 模型搭建带 GUI 操作界面的配音工具。
+
   
 此外，PaddleSpeech 依赖于许多开源存储库。有关更多信息，请参阅 [references](./docs/source/reference.md)。
 
diff --git a/demos/speech_recognition/.gitignore b/demos/speech_recognition/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d8dd7532abcc65af52e9db03c516274e3d674dc1
--- /dev/null
+++ b/demos/speech_recognition/.gitignore
@@ -0,0 +1 @@
+*.wav
diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
index c49afa35c2d8027011c333eb110eb22b1d08924d..5d964fceac73f60632b2b31a750941e958b59966 100644
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -27,6 +27,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
   paddlespeech asr --input ./zh.wav
   # English
   paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+  # Chinese ASR + Punctuation Restoration
+  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
   ```
   (It doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
   
diff --git a/demos/speech_recognition/README_cn.md b/demos/speech_recognition/README_cn.md
index c2e38c91bc6b6374e8ab93f720b5c59330f3e05c..ba1f1d65c5ca9dec435cc1e998117238077407be 100644
--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
@@ -25,6 +25,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
   paddlespeech asr --input ./zh.wav
   # 英文
   paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+  # 中文 + 标点恢复
+  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
   ```
   (如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error，没有关系，这个包是非必须的。)
   
diff --git a/demos/speech_recognition/run.sh b/demos/speech_recognition/run.sh
index 5efc8b81f97f818753059c6fa19e718f7f3f05ae..06466928611f51bfec65529cad5d04966bf2607a 100755
--- a/demos/speech_recognition/run.sh
+++ b/demos/speech_recognition/run.sh
@@ -1,4 +1,10 @@
 #!/bin/bash
 
 wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+
+# asr
 paddlespeech asr --input ./zh.wav
+
+
+# asr + punc
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
\ No newline at end of file
diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md
index 9d3c4ac539a1afcd62a03c4f98b2dfe4cb622aae..2df72a82dec88ddc55505c9575721aee2de09536 100644
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@@ -17,11 +17,14 @@ The input of this demo should be a text of the specific language that can be pas
 ### 3. Usage
 - Command Line (Recommended)
     - Chinese
-    
         The default acoustic model is `Fastspeech2`, and the default vocoder is `Parallel WaveGAN`.
         ```bash
         paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
         ```
+    - Batch Process
+        ```bash
+        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+        ```
     - Chinese, use `SpeedySpeech` as the acoustic model
         ```bash
         paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
diff --git a/demos/text_to_speech/README_cn.md b/demos/text_to_speech/README_cn.md
index f075efdafc1a236b4517764568b31499159c151b..7e02b962483b4b0959fa9b9fe0c082bb0a6fdc3e 100644
--- a/demos/text_to_speech/README_cn.md
+++ b/demos/text_to_speech/README_cn.md
@@ -24,6 +24,10 @@
         ```bash
         paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
         ```
+    - 批处理
+        ```bash
+        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+        ```
     - 中文，使用 `SpeedySpeech` 作为声学模型
         ```bash
         paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
diff --git a/demos/text_to_speech/run.sh b/demos/text_to_speech/run.sh
index c2487aeed38ed5b0e3bc7e5c256eff0139bcca2b..b1340241bf833129de9ae5581ada4a542253f96c 100755
--- a/demos/text_to_speech/run.sh
+++ b/demos/text_to_speech/run.sh
@@ -1,3 +1,7 @@
 #!/bin/bash
 
+# single process
 paddlespeech tts --input 今天的天气不错啊
+
+# Batch process
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
\ No newline at end of file
diff --git a/docs/topic/ctc/ctc_loss_speed_compare.ipynb b/docs/topic/ctc/ctc_loss_speed_compare.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..eb7a030c7e6be03e43016d8a47aa049ea3e40eee
--- /dev/null
+++ b/docs/topic/ctc/ctc_loss_speed_compare.ipynb
@@ -0,0 +1,369 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a1e738e0",
+   "metadata": {},
+   "source": [
+    "## 获取测试的 logit 数据"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "29d3368b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "hlens.npy\n",
+      "logits.npy\n",
+      "ys_lens.npy\n",
+      "ys_pad.npy\n"
+     ]
+    }
+   ],
+   "source": [
+    "!mkdir -p ./test_data\n",
+    "!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n",
+    "!tar xzvf test_data/ctc_loss_compare_data.tgz -C ./test_data\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "240caf1d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "import time\n",
+    "\n",
+    "data_dir=\"./test_data\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "91bad949",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n",
+    "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n",
+    "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n",
+    "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4cef2f15",
+   "metadata": {},
+   "source": [
+    "## 使用 torch 的 ctc loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "90612004",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.10.1+cu102'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "torch.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "00799f97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def torch_ctc_loss(use_cpu):\n",
+    "    if use_cpu:\n",
+    "        device = torch.device(\"cpu\")\n",
+    "    else:\n",
+    "        device = torch.device(\"cuda\")\n",
+    "\n",
+    "    reduction_type = \"sum\" \n",
+    "\n",
+    "    ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)\n",
+    "\n",
+    "    ys_hat = torch.tensor(logits_np, device = device)\n",
+    "    ys_pad = torch.tensor(ys_pad_np, device = device)\n",
+    "    hlens = torch.tensor(hlens_np, device = device)\n",
+    "    ys_lens = torch.tensor(ys_lens_np, device = device)\n",
+    "\n",
+    "    ys_hat = ys_hat.transpose(0, 1)\n",
+    "    \n",
+    "    # 开始计算时间\n",
+    "    start_time = time.time()\n",
+    "    ys_hat = ys_hat.log_softmax(2)\n",
+    "    loss = ctc_loss(ys_hat, ys_pad, hlens, ys_lens)\n",
+    "    end_time = time.time()\n",
+    "    \n",
+    "    loss = loss / ys_hat.size(1)\n",
+    "    return end_time - start_time, loss.item()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ba47b5a4",
+   "metadata": {},
+   "source": [
+    "## 使用 paddle 的 ctc loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "6882a06e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'2.2.2'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import paddle\n",
+    "paddle.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3cfa3b7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def paddle_ctc_loss(use_cpu):    \n",
+    "    import paddle.nn as pn\n",
+    "    if use_cpu:\n",
+    "        device = \"cpu\"\n",
+    "    else:\n",
+    "        device = \"gpu\"\n",
+    "\n",
+    "    paddle.set_device(device)\n",
+    "\n",
+    "    logits = paddle.to_tensor(logits_np)\n",
+    "    ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n",
+    "    hlens = paddle.to_tensor(hlens_np, dtype='int64')\n",
+    "    ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n",
+    "\n",
+    "    logits = logits.transpose([1,0,2])\n",
+    "\n",
+    "    ctc_loss = pn.CTCLoss(reduction='sum')\n",
+    "    # 开始计算时间\n",
+    "    start_time = time.time()\n",
+    "    pn_loss = ctc_loss(logits, ys_pad, hlens, ys_lens)\n",
+    "    end_time = time.time()\n",
+    "    \n",
+    "    pn_loss = pn_loss / logits.shape[1]\n",
+    "    return end_time - start_time, pn_loss.item()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "40413ef9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU, iteration 10\n",
+      "torch_ctc_loss 159.17137145996094\n",
+      "paddle_ctc_loss 159.16574096679688\n",
+      "paddle average time 1.718252992630005\n",
+      "torch average time 0.17536230087280275\n",
+      "paddle time / torch time (cpu) 9.798303193320452\n",
+      "\n",
+      "GPU, iteration 10\n",
+      "torch_ctc_loss 159.172119140625\n",
+      "paddle_ctc_loss 159.17205810546875\n",
+      "paddle average time 0.018606925010681154\n",
+      "torch average time 0.0026710033416748047\n",
+      "paddle time / torch time (gpu) 6.966267963938231\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 使用 CPU\n",
+    "\n",
+    "iteration = 10\n",
+    "use_cpu = True\n",
+    "torch_total_time = 0\n",
+    "paddle_total_time = 0\n",
+    "for _ in range(iteration):\n",
+    "    cost_time, torch_loss = torch_ctc_loss(use_cpu)\n",
+    "    torch_total_time += cost_time\n",
+    "for _ in range(iteration):\n",
+    "    cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n",
+    "    paddle_total_time += cost_time\n",
+    "print (\"CPU, iteration\", iteration)\n",
+    "print (\"torch_ctc_loss\", torch_loss)\n",
+    "print (\"paddle_ctc_loss\", paddle_loss)\n",
+    "print (\"paddle average time\", paddle_total_time / iteration)\n",
+    "print (\"torch average time\", torch_total_time / iteration)\n",
+    "print (\"paddle time / torch time (cpu)\" , paddle_total_time/ torch_total_time)\n",
+    "\n",
+    "print (\"\")\n",
+    "\n",
+    "# 使用 GPU\n",
+    "\n",
+    "use_cpu = False\n",
+    "torch_total_time = 0\n",
+    "paddle_total_time = 0\n",
+    "for _ in range(iteration):\n",
+    "    cost_time, torch_loss  = torch_ctc_loss(use_cpu)\n",
+    "    torch_total_time += cost_time\n",
+    "for _ in range(iteration):\n",
+    "    cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n",
+    "    paddle_total_time += cost_time\n",
+    "print (\"GPU, iteration\", iteration)\n",
+    "print (\"torch_ctc_loss\", torch_loss)\n",
+    "print (\"paddle_ctc_loss\", paddle_loss)\n",
+    "print (\"paddle average time\", paddle_total_time / iteration)\n",
+    "print (\"torch average time\", torch_total_time / iteration)\n",
+    "print (\"paddle time / torch time (gpu)\" , paddle_total_time/ torch_total_time)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7cdf8697",
+   "metadata": {},
+   "source": [
+    "## 其他: 使用 PaddleSpeech 中的 ctcloss 查一下loss值"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "73fad81d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n",
+    "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n",
+    "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n",
+    "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2b41e45d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2022-02-25 11:34:34.143 | INFO     | paddlespeech.s2t.modules.loss:__init__:41 - CTCLoss Loss reduction: sum, div-bs: True\n",
+      "2022-02-25 11:34:34.143 | INFO     | paddlespeech.s2t.modules.loss:__init__:42 - CTCLoss Grad Norm Type: instance\n",
+      "2022-02-25 11:34:34.144 | INFO     | paddlespeech.s2t.modules.loss:__init__:73 - CTCLoss() kwargs:{'norm_by_times': True}, not support: {'norm_by_batchsize': False, 'norm_by_total_logits_len': False}\n",
+      "loss 159.17205810546875\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/dygraph/math_op_patch.py:253: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32\n",
+      "  format(lhs_dtype, rhs_dtype, lhs_dtype))\n"
+     ]
+    }
+   ],
+   "source": [
+    "use_cpu = False\n",
+    "\n",
+    "from paddlespeech.s2t.modules.loss import CTCLoss\n",
+    "\n",
+    "if use_cpu:\n",
+    "    device = \"cpu\"\n",
+    "else:\n",
+    "    device = \"gpu\"\n",
+    "\n",
+    "paddle.set_device(device)\n",
+    "\n",
+    "blank_id=0\n",
+    "reduction_type='sum'\n",
+    "batch_average= True\n",
+    "grad_norm_type='instance'\n",
+    "\n",
+    "criterion = CTCLoss(\n",
+    "        blank=blank_id,\n",
+    "        reduction=reduction_type,\n",
+    "        batch_average=batch_average,\n",
+    "        grad_norm_type=grad_norm_type)\n",
+    "\n",
+    "logits = paddle.to_tensor(logits_np)\n",
+    "ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n",
+    "hlens = paddle.to_tensor(hlens_np, dtype='int64')\n",
+    "ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n",
+    "\n",
+    "pn_ctc_loss = criterion(logits, ys_pad, hlens, ys_lens)\n",
+    "print(\"loss\", pn_ctc_loss.item())\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de525d38",
+   "metadata": {},
+   "source": [
+    "## 结论\n",
+    "在 CPU 环境下： torch 的 CTC loss 的计算速度是 paddle 的 9.8 倍  \n",
+    "在 GPU 环境下： torch 的 CTC loss 的计算速度是 paddle 的 6.87 倍\n",
+    "\n",
+    "## 其他结论\n",
+    "torch 的 ctc loss 在 CPU 和 GPU 下 都没有完全对齐。其中CPU的前向对齐精度大约为 1e-2。 GPU 的前向对齐精度大约为 1e-4 。"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md
index 281ad836b0144e6bb14e4b8278bfaceb026b65b4..d02ad1b6373c26f0cd0ffa4d58c3bd4af57f9e72 100644
--- a/examples/aishell3/tts3/README.md
+++ b/examples/aishell3/tts3/README.md
@@ -225,7 +225,9 @@ optional arguments:
 9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
 
 ## Pretrained Model
-Pretrained FastSpeech2 model with no silence in the edge of audios. [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip)
+Pretrained FastSpeech2 model with no silence in the edge of audios:
+- [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip)
+- [fastspeech2_conformer_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_aishell3_ckpt_0.2.0.zip) (Thanks for [@awmmmm](https://github.com/awmmmm)'s contribution)
 
 FastSpeech2 checkpoint contains files listed below.
 
diff --git a/examples/aishell3/tts3/conf/conformer.yaml b/examples/aishell3/tts3/conf/conformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ea73593d77a3117e8b46baab9785bd576a66a093
--- /dev/null
+++ b/examples/aishell3/tts3/conf/conformer.yaml
@@ -0,0 +1,110 @@
+###########################################################
+#                FEATURE EXTRACTION SETTING               #
+###########################################################
+
+fs: 24000          # sr
+n_fft: 2048        # FFT size (samples).
+n_shift: 300       # Hop size (samples). 12.5ms
+win_length: 1200   # Window length (samples). 50ms
+                   # If set to null, it will be the same as fft_size.
+window: "hann"     # Window function.
+
+# Only used for feats_type != raw
+
+fmin: 80           # Minimum frequency of Mel basis.
+fmax: 7600         # Maximum frequency of Mel basis.
+n_mels: 80         # The number of mel basis.
+
+# Only used for the model using pitch features (e.g. FastSpeech2)
+f0min: 80          # Maximum f0 for pitch extraction.
+f0max: 400         # Minimum f0 for pitch extraction.
+
+
+###########################################################
+#                       DATA SETTING                      #
+###########################################################
+batch_size: 32
+num_workers: 4
+
+
+###########################################################
+#                       MODEL SETTING                     #
+###########################################################
+model:
+    adim: 384         # attention dimension
+    aheads: 2         # number of attention heads
+    elayers: 4        # number of encoder layers
+    eunits: 1536      # number of encoder ff units
+    dlayers: 4        # number of decoder layers
+    dunits: 1536      # number of decoder ff units
+    positionwise_layer_type: conv1d   # type of position-wise layer
+    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
+    duration_predictor_layers: 2      # number of layers of duration predictor
+    duration_predictor_chans: 256     # number of channels of duration predictor
+    duration_predictor_kernel_size: 3 # filter size of duration predictor
+    postnet_layers: 5                 # number of layers of postnset
+    postnet_filts: 5                  # filter size of conv layers in postnet
+    postnet_chans: 256                # number of channels of conv layers in postnet
+    encoder_normalize_before: True    # whether to perform layer normalization before the input
+    decoder_normalize_before: True    # whether to perform layer normalization before the input
+    reduction_factor: 1               # reduction factor
+    encoder_type: conformer           # encoder type
+    decoder_type: conformer           # decoder type
+    conformer_pos_enc_layer_type: rel_pos        # conformer positional encoding type
+    conformer_self_attn_layer_type: rel_selfattn # conformer self-attention type
+    conformer_activation_type: swish             # conformer activation type
+    use_macaron_style_in_conformer: true         # whether to use macaron style in conformer
+    use_cnn_in_conformer: true                   # whether to use CNN in conformer
+    conformer_enc_kernel_size: 7                 # kernel size in CNN module of conformer-based encoder
+    conformer_dec_kernel_size: 31                # kernel size in CNN module of conformer-based decoder
+    init_type: xavier_uniform         # initialization type
+    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
+    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
+    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
+    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
+    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
+    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
+    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
+    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
+    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
+    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
+    stop_gradient_from_pitch_predictor: true   # whether to stop the gradient from pitch predictor to encoder
+    energy_predictor_layers: 2                 # number of conv layers in energy predictor
+    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
+    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
+    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
+    stop_gradient_from_energy_predictor: false # whether to stop the gradient from energy predictor to encoder
+    spk_embed_dim: 256                         # speaker embedding dimension
+    spk_embed_integration_type: concat         # speaker embedding integration type
+
+
+###########################################################
+#                       UPDATER SETTING                   #
+###########################################################
+updater:
+    use_masking: True                 # whether to apply masking for padded part in loss calculation
+
+
+
+###########################################################
+#                     OPTIMIZER SETTING                   #
+###########################################################
+optimizer:
+  optim: adam              # optimizer type
+  learning_rate: 0.001     # learning rate
+
+###########################################################
+#                     TRAINING SETTING                    #
+###########################################################
+max_epoch: 1000
+num_snapshots: 5
+
+
+###########################################################
+#                       OTHER SETTING                     #
+###########################################################
+seed: 10086
diff --git a/examples/other/g2p/README.md b/examples/other/g2p/README.md
index c0f55bd42130a34a32ed21e34b5d5e297fff2f7c..141f7f7412891b44be81fc5e026c175c3fe83bb1 100644
--- a/examples/other/g2p/README.md
+++ b/examples/other/g2p/README.md
@@ -10,7 +10,7 @@ Run the command below to get the results of the test.
 ```bash
 ./run.sh
 ```
-The `avg WER` of g2p is: 0.027124048652822204
+The `avg WER` of g2p is: 0.026014352515701198
 ```text
      ,--------------------------------------------------------------------.
      |        | # Snt    # Wrd  | Corr    Sub    Del    Ins    Err  S.Err |
diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py
index cecf76fee5b6e8e73c3e7d588698f0cb890461cf..12ff9919a29f453a11853571eb3dad836f824556 100644
--- a/paddlespeech/cli/__init__.py
+++ b/paddlespeech/cli/__init__.py
@@ -20,5 +20,6 @@ from .cls import CLSExecutor
 from .st import STExecutor
 from .text import TextExecutor
 from .tts import TTSExecutor
+from .stats import StatsExecutor
 
 _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])
diff --git a/paddlespeech/cli/stats/__init__.py b/paddlespeech/cli/stats/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fe6c4abaf10de2f24f751ddd62f456768a82475
--- /dev/null
+++ b/paddlespeech/cli/stats/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .infer import StatsExecutor
diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ef50449c37e08c1a3c5f9b8894a5b4141e1c33f
--- /dev/null
+++ b/paddlespeech/cli/stats/infer.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from typing import List
+
+from prettytable import PrettyTable
+
+from ..log import logger
+from ..utils import cli_register
+from ..utils import stats_wrapper
+
+__all__ = ['StatsExecutor']
+
+model_name_format = {
+    'asr': 'Model-Language-Sample Rate',
+    'cls': 'Model-Sample Rate',
+    'st': 'Model-Source language-Target language',
+    'text': 'Model-Task-Language',
+    'tts': 'Model-Language'
+}
+
+
+@cli_register(
+    name='paddlespeech.stats',
+    description='Get speech tasks support models list.')
+class StatsExecutor():
+    def __init__(self):
+        super(StatsExecutor, self).__init__()
+
+        self.parser = argparse.ArgumentParser(
+            prog='paddlespeech.stats', add_help=True)
+        self.parser.add_argument(
+            '--task',
+            type=str,
+            default='asr',
+            choices=['asr', 'cls', 'st', 'text', 'tts'],
+            help='Choose speech task.',
+            required=True)
+        self.task_choices = ['asr', 'cls', 'st', 'text', 'tts']
+
+    def show_support_models(self, pretrained_models: dict):
+        fields = model_name_format[self.task].split("-")
+        table = PrettyTable(fields)
+        for key in pretrained_models:
+            table.add_row(key.split("-"))
+        print(table)
+
+    def execute(self, argv: List[str]) -> bool:
+        """
+            Command line entry.
+        """
+        parser_args = self.parser.parse_args(argv)
+        self.task = parser_args.task
+        if self.task not in self.task_choices:
+            logger.error(
+                "Please input correct speech task, choices = ['asr', 'cls', 'st', 'text', 'tts']"
+            )
+            return False
+
+        elif self.task == 'asr':
+            try:
+                from ..asr.infer import pretrained_models
+                logger.info(
+                    "Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+                return True
+            except BaseException:
+                logger.error("Failed to get the list of ASR pretrained models.")
+                return False
+
+        elif self.task == 'cls':
+            try:
+                from ..cls.infer import pretrained_models
+                logger.info(
+                    "Here is the list of CLS pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+                return True
+            except BaseException:
+                logger.error("Failed to get the list of CLS pretrained models.")
+                return False
+
+        elif self.task == 'st':
+            try:
+                from ..st.infer import pretrained_models
+                logger.info(
+                    "Here is the list of ST pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+                return True
+            except BaseException:
+                logger.error("Failed to get the list of ST pretrained models.")
+                return False
+
+        elif self.task == 'text':
+            try:
+                from ..text.infer import pretrained_models
+                logger.info(
+                    "Here is the list of TEXT pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+                return True
+            except BaseException:
+                logger.error(
+                    "Failed to get the list of TEXT pretrained models.")
+                return False
+
+        elif self.task == 'tts':
+            try:
+                from ..tts.infer import pretrained_models
+                logger.info(
+                    "Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+                return True
+            except BaseException:
+                logger.error("Failed to get the list of TTS pretrained models.")
+                return False
+
+    @stats_wrapper
+    def __call__(
+            self,
+            task: str=None, ):
+        """
+            Python API to call an executor.
+        """
+        self.task = task
+        if self.task not in self.task_choices:
+            print(
+                "Please input correct speech task, choices = ['asr', 'cls', 'st', 'text', 'tts']"
+            )
+
+        elif self.task == 'asr':
+            try:
+                from ..asr.infer import pretrained_models
+                print(
+                    "Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+            except BaseException:
+                print("Failed to get the list of ASR pretrained models.")
+
+        elif self.task == 'cls':
+            try:
+                from ..cls.infer import pretrained_models
+                print(
+                    "Here is the list of CLS pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+            except BaseException:
+                print("Failed to get the list of CLS pretrained models.")
+
+        elif self.task == 'st':
+            try:
+                from ..st.infer import pretrained_models
+                print(
+                    "Here is the list of ST pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+            except BaseException:
+                print("Failed to get the list of ST pretrained models.")
+
+        elif self.task == 'text':
+            try:
+                from ..text.infer import pretrained_models
+                print(
+                    "Here is the list of TEXT pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+            except BaseException:
+                print("Failed to get the list of TEXT pretrained models.")
+
+        elif self.task == 'tts':
+            try:
+                from ..tts.infer import pretrained_models
+                print(
+                    "Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API"
+                )
+                self.show_support_models(pretrained_models)
+            except BaseException:
+                print("Failed to get the list of TTS pretrained models.")
diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py
index 89752bb9fdb98faecc0ccc5b8f59ea1f09efc8b6..ac55af1236f11d175e9e7717220980cf95c7d79b 100644
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
     """
     rng = np.random.RandomState(epoch)
     shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
     rng.shuffle(batch_indices)
     batch_indices = [item for batch in batch_indices for item in batch]
     assert clipped is False
diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py
index f7b05714ef6e9961a1bff79027015889815d5811..999723e5100309976c1b89cbf256ac106d8829e6 100644
--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -33,8 +33,6 @@ from paddlespeech.s2t.modules.decoder import TransformerDecoder
 from paddlespeech.s2t.modules.encoder import ConformerEncoder
 from paddlespeech.s2t.modules.encoder import TransformerEncoder
 from paddlespeech.s2t.modules.loss import LabelSmoothingLoss
-from paddlespeech.s2t.modules.mask import mask_finished_preds
-from paddlespeech.s2t.modules.mask import mask_finished_scores
 from paddlespeech.s2t.modules.mask import subsequent_mask
 from paddlespeech.s2t.utils import checkpoint
 from paddlespeech.s2t.utils import layer_tools
@@ -291,7 +289,7 @@ class U2STBaseModel(nn.Layer):
         device = speech.place
 
         # Let's assume B = batch_size and N = beam_size
-        # 1. Encoder and init hypothesis 
+        # 1. Encoder and init hypothesis
         encoder_out, encoder_mask = self._forward_encoder(
             speech, speech_lengths, decoding_chunk_size,
             num_decoding_left_chunks,
diff --git a/paddlespeech/server/bin/__init__.py b/paddlespeech/server/bin/__init__.py
index bd75747f79948ea42229b8c164174dbe4240d4b1..025aab098f2b6d56ced56d499ce619feb190ab2d 100644
--- a/paddlespeech/server/bin/__init__.py
+++ b/paddlespeech/server/bin/__init__.py
@@ -14,3 +14,4 @@
 from .paddlespeech_client import ASRClientExecutor
 from .paddlespeech_client import TTSClientExecutor
 from .paddlespeech_server import ServerExecutor
+from .paddlespeech_server import ServerStatsExecutor
diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py
index aff77d54436eac55fda46c8e2ed218cc115a0085..21fc5c65e965a87c483046d66e45036d1b091b5d 100644
--- a/paddlespeech/server/bin/paddlespeech_server.py
+++ b/paddlespeech/server/bin/paddlespeech_server.py
@@ -16,15 +16,17 @@ from typing import List
 
 import uvicorn
 from fastapi import FastAPI
+from prettytable import PrettyTable
 
 from ..executor import BaseExecutor
 from ..util import cli_server_register
 from ..util import stats_wrapper
+from paddlespeech.cli.log import logger
 from paddlespeech.server.engine.engine_pool import init_engine_pool
 from paddlespeech.server.restful.api import setup_router
 from paddlespeech.server.utils.config import get_config
 
-__all__ = ['ServerExecutor']
+__all__ = ['ServerExecutor', 'ServerStatsExecutor']
 
 app = FastAPI(
     title="PaddleSpeech Serving API", description="Api", version="0.0.1")
@@ -86,3 +88,139 @@ class ServerExecutor(BaseExecutor):
         config = get_config(config_file)
         if self.init(config):
             uvicorn.run(app, host=config.host, port=config.port, debug=True)
+
+
+@cli_server_register(
+    name='paddlespeech_server.stats',
+    description='Get the models supported by each speech task in the service.')
+class ServerStatsExecutor():
+    def __init__(self):
+        super(ServerStatsExecutor, self).__init__()
+
+        self.parser = argparse.ArgumentParser(
+            prog='paddlespeech_server.stats', add_help=True)
+        self.parser.add_argument(
+            '--task',
+            type=str,
+            default=None,
+            choices=['asr', 'tts'],
+            help='Choose speech task.',
+            required=True)
+        self.task_choices = ['asr', 'tts']
+        self.model_name_format = {
+            'asr': 'Model-Language-Sample Rate',
+            'tts': 'Model-Language'
+        }
+
+    def show_support_models(self, pretrained_models: dict):
+        fields = self.model_name_format[self.task].split("-")
+        table = PrettyTable(fields)
+        for key in pretrained_models:
+            table.add_row(key.split("-"))
+        print(table)
+
+    def execute(self, argv: List[str]) -> bool:
+        """
+            Command line entry.
+        """
+        parser_args = self.parser.parse_args(argv)
+        self.task = parser_args.task
+        if self.task not in self.task_choices:
+            logger.error(
+                "Please input correct speech task, choices = ['asr', 'tts']")
+            return False
+
+        elif self.task == 'asr':
+            try:
+                from paddlespeech.cli.asr.infer import pretrained_models
+                logger.info(
+                    "Here is the table of ASR pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+                # show ASR static pretrained model
+                from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models
+                logger.info(
+                    "Here is the table of ASR static pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+                return True
+            except BaseException:
+                logger.error(
+                    "Failed to get the table of ASR pretrained models supported in the service."
+                )
+                return False
+
+        elif self.task == 'tts':
+            try:
+                from paddlespeech.cli.tts.infer import pretrained_models
+                logger.info(
+                    "Here is the table of TTS pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+                # show TTS static pretrained model
+                from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models
+                logger.info(
+                    "Here is the table of TTS static pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+                return True
+            except BaseException:
+                logger.error(
+                    "Failed to get the table of TTS pretrained models supported in the service."
+                )
+                return False
+
+    @stats_wrapper
+    def __call__(
+            self,
+            task: str=None, ):
+        """
+            Python API to call an executor.
+        """
+        self.task = task
+        if self.task not in self.task_choices:
+            print("Please input correct speech task, choices = ['asr', 'tts']")
+
+        elif self.task == 'asr':
+            try:
+                from paddlespeech.cli.asr.infer import pretrained_models
+                print(
+                    "Here is the table of ASR pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+                # show ASR static pretrained model
+                from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models
+                print(
+                    "Here is the table of ASR static pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+            except BaseException:
+                print(
+                    "Failed to get the table of ASR pretrained models supported in the service."
+                )
+
+        elif self.task == 'tts':
+            try:
+                from paddlespeech.cli.tts.infer import pretrained_models
+                print(
+                    "Here is the table of TTS pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+                # show TTS static pretrained model
+                from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models
+                print(
+                    "Here is the table of TTS static pretrained models supported in the service."
+                )
+                self.show_support_models(pretrained_models)
+
+            except BaseException:
+                print(
+                    "Failed to get the table of TTS pretrained models supported in the service."
+                )
diff --git a/paddlespeech/t2s/frontend/tone_sandhi.py b/paddlespeech/t2s/frontend/tone_sandhi.py
index 5264e0687557c75023eb8f004350869346e7df6c..07f7fa2b8f8615af73fd656b0abd381e551179f9 100644
--- a/paddlespeech/t2s/frontend/tone_sandhi.py
+++ b/paddlespeech/t2s/frontend/tone_sandhi.py
@@ -63,7 +63,7 @@ class ToneSandhi():
             '扫把', '惦记'
         }
         self.must_not_neural_tone_words = {
-            "男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子"
+            "男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子", "人人", "虎虎"
         }
         self.punc = "：，；。？！“”‘’':,;.?!"
 
@@ -77,7 +77,9 @@ class ToneSandhi():
 
         # reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
         for j, item in enumerate(word):
-            if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}:
+            if j - 1 >= 0 and item == word[j - 1] and pos[0] in {
+                    "n", "v", "a"
+            } and word not in self.must_not_neural_tone_words:
                 finals[j] = finals[j][:-1] + "5"
         ge_idx = word.find("个")
         if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶":
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index a905c412d4d9c901fae1d5b80677a472a24c6071..bb8ed5b4919ecfb67d3f54aade65b0d31e1d1a00 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -20,7 +20,10 @@ import numpy as np
 import paddle
 from g2pM import G2pM
 from pypinyin import lazy_pinyin
+from pypinyin import load_phrases_dict
+from pypinyin import load_single_dict
 from pypinyin import Style
+from pypinyin_dict.phrase_pinyin_data import large_pinyin
 
 from paddlespeech.t2s.frontend.generate_lexicon import generate_lexicon
 from paddlespeech.t2s.frontend.tone_sandhi import ToneSandhi
@@ -41,6 +44,8 @@ class Frontend():
             self.g2pM_model = G2pM()
             self.pinyin2phone = generate_lexicon(
                 with_tone=True, with_erhua=False)
+        else:
+            self.__init__pypinyin()
         self.must_erhua = {"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿"}
         self.not_erhua = {
             "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
@@ -62,6 +67,23 @@ class Frontend():
             for tone, id in tone_id:
                 self.vocab_tones[tone] = int(id)
 
+    def __init__pypinyin(self):
+        large_pinyin.load()
+
+        load_phrases_dict({u'开户行': [[u'ka1i'], [u'hu4'], [u'hang2']]})
+        load_phrases_dict({u'发卡行': [[u'fa4'], [u'ka3'], [u'hang2']]})
+        load_phrases_dict({u'放款行': [[u'fa4ng'], [u'kua3n'], [u'hang2']]})
+        load_phrases_dict({u'茧行': [[u'jia3n'], [u'hang2']]})
+        load_phrases_dict({u'行号': [[u'hang2'], [u'ha4o']]})
+        load_phrases_dict({u'各地': [[u'ge4'], [u'di4']]})
+        load_phrases_dict({u'借还款': [[u'jie4'], [u'hua2n'], [u'kua3n']]})
+        load_phrases_dict({u'时间为': [[u'shi2'], [u'jia1n'], [u'we2i']]})
+        load_phrases_dict({u'为准': [[u'we2i'], [u'zhu3n']]})
+        load_phrases_dict({u'色差': [[u'se4'], [u'cha1']]})
+
+        # 调整字的拼音顺序
+        load_single_dict({ord(u'地'): u'de,di4'})
+
     def _get_initials_finals(self, word: str) -> List[List[str]]:
         initials = []
         finals = []
diff --git a/paddlespeech/t2s/frontend/zh_normalization/chronology.py b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
index bfa7d2b1969ddb26c72c1846e2cd7a9a0d29bfee..ea51891353ad8c6fe942edcdf7efb22ec60526ce 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/chronology.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
@@ -63,7 +63,10 @@ def replace_time(match) -> str:
 
     result = f"{num2str(hour)}点"
     if minute.lstrip('0'):
-        result += f"{_time_num2str(minute)}分"
+        if int(minute) == 30:
+            result += f"半"
+        else:
+            result += f"{_time_num2str(minute)}分"
     if second and second.lstrip('0'):
         result += f"{_time_num2str(second)}秒"
 
@@ -71,7 +74,10 @@ def replace_time(match) -> str:
         result += "至"
         result += f"{num2str(hour_2)}点"
         if minute_2.lstrip('0'):
-            result += f"{_time_num2str(minute_2)}分"
+            if int(minute) == 30:
+                result += f"半"
+            else:
+                result += f"{_time_num2str(minute_2)}分"
         if second_2 and second_2.lstrip('0'):
             result += f"{_time_num2str(second_2)}秒"
 
diff --git a/paddlespeech/t2s/frontend/zh_normalization/num.py b/paddlespeech/t2s/frontend/zh_normalization/num.py
index 27a2f84651759e50d75c97adb7dcfd2225d9beb7..a83b42a47b70b30452d5908e58d6e7a5b1c2f93c 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/num.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/num.py
@@ -28,7 +28,7 @@ UNITS = OrderedDict({
     8: '亿',
 })
 
-COM_QUANTIFIERS = '(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
+COM_QUANTIFIERS = '(所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
 
 # 分数表达式
 RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')
@@ -110,7 +110,7 @@ def replace_default_num(match):
 # 纯小数
 RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
 # 正整数 + 量词
-RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几])?" + COM_QUANTIFIERS)
+RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
 RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')
 
 
@@ -123,6 +123,8 @@ def replace_positive_quantifier(match) -> str:
     """
     number = match.group(1)
     match_2 = match.group(2)
+    if match_2 == "+":
+        match_2 = "多"
     match_2: str = match_2 if match_2 else ""
     quantifiers: str = match.group(3)
     number: str = num2str(number)
@@ -151,6 +153,7 @@ def replace_number(match) -> str:
 
 # 范围表达式
 # match.group(1) and match.group(8) are copy from RE_NUMBER
+
 RE_RANGE = re.compile(
     r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')
 
diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
index f9d1b8cb859ab5f449a4bc573c6133a101096fa1..bc663c70d77da24c9ef9b21fea64a5b1fc6cf2e9 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -63,11 +63,19 @@ class TextNormalizer():
         # Only for pure Chinese here
         if lang == "zh":
             text = text.replace(" ", "")
+            # 过滤掉特殊字符
+            text = re.sub(r'[《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
         text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
         text = text.strip()
         sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
         return sentences
 
+    def _post_replace(self, sentence: str) -> str:
+        sentence = sentence.replace('/', '每')
+        sentence = sentence.replace('~', '至')
+
+        return sentence
+
     def normalize_sentence(self, sentence: str) -> str:
         # basic character conversions
         sentence = tranditional_to_simplified(sentence)
@@ -97,6 +105,7 @@ class TextNormalizer():
                                                sentence)
         sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
         sentence = RE_NUMBER.sub(replace_number, sentence)
+        sentence = self._post_replace(sentence)
 
         return sentence
 
diff --git a/paddlespeech/t2s/modules/transformer/repeat.py b/paddlespeech/t2s/modules/transformer/repeat.py
index 2073a78b9330201dba15b42badf77cee0caceab1..1e946adf7e469fd6c05c2a8c8d9e6f16f638524e 100644
--- a/paddlespeech/t2s/modules/transformer/repeat.py
+++ b/paddlespeech/t2s/modules/transformer/repeat.py
@@ -36,4 +36,4 @@ def repeat(N, fn):
     Returns:
         MultiSequential: Repeated model instance.
     """
-    return MultiSequential(*[fn(n) for n in range(N)])
+    return MultiSequential(* [fn(n) for n in range(N)])
diff --git a/setup.py b/setup.py
index 3f3632b37f2c2d0642eddb727ca0739b79fe3e41..f86758bab25d9b5283126054834777f4a3e7f478 100644
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@ base = [
     "paddlespeech_feat",
     "praatio==5.0.0",
     "pypinyin",
+    "pypinyin-dict",
     "python-dateutil",
     "pyworld",
     "resampy==0.2.2",
@@ -62,6 +63,7 @@ base = [
     "visualdl",
     "webrtcvad",
     "yacs~=0.1.8",
+    "prettytable",
 ]
 
 server = [
diff --git a/tests/test_tipc/configs/conformer/train_benchmark.txt b/tests/test_tipc/configs/conformer/train_infer_python.txt
similarity index 91%
rename from tests/test_tipc/configs/conformer/train_benchmark.txt
rename to tests/test_tipc/configs/conformer/train_infer_python.txt
index 3833f144c6f9642ca3720caf0a0ddbaeaae5bd5d..33b1debdc59a8bfb22c1787064940020815dd9df 100644
--- a/tests/test_tipc/configs/conformer/train_benchmark.txt
+++ b/tests/test_tipc/configs/conformer/train_infer_python.txt
@@ -54,4 +54,4 @@ batch_size:16|30
 fp_items:fp32
 iteration:50
 --profiler-options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile"
-flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
+flags:null
diff --git a/tests/test_tipc/configs/pwgan/train_benchmark.txt b/tests/test_tipc/configs/pwgan/train_infer_python.txt
similarity index 91%
rename from tests/test_tipc/configs/pwgan/train_benchmark.txt
rename to tests/test_tipc/configs/pwgan/train_infer_python.txt
index e936da3c2bc1ebc3e289e3d47b323c147d885562..c64984dcfc0439c6fc458d34d55adafa4dcbcdad 100644
--- a/tests/test_tipc/configs/pwgan/train_benchmark.txt
+++ b/tests/test_tipc/configs/pwgan/train_infer_python.txt
@@ -54,4 +54,4 @@ batch_size:6|16
 fp_items:fp32
 iteration:50
 --profiler_options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile"
-flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
+flags:null
diff --git a/tests/test_tipc/prepare.sh b/tests/test_tipc/prepare.sh
index 0280e5d411d156ebd99452db9100db1fddce82fe..b46b203223fca9cc9de88b34d868b09c99446e0c 100644
--- a/tests/test_tipc/prepare.sh
+++ b/tests/test_tipc/prepare.sh
@@ -26,15 +26,19 @@ if [ ${MODE} = "benchmark_train" ];then
     curPath=$(readlink -f "$(dirname "$0")")
         echo "curPath:"${curPath}
     cd ${curPath}/../..
-    pip install .
+    apt-get install libsndfile1
+    pip install pytest-runner kaldiio setuptools_scm -i https://pypi.tuna.tsinghua.edu.cn/simple 
+    pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple 
     cd -
     if [ ${model_name} == "conformer" ]; then
         # set the URL for aishell_tiny dataset
-        URL='None'
+        URL=${conformer_data_URL:-"None"}
         echo "URL:"${URL}
         if [ ${URL} == 'None' ];then
             echo "please contact author to get the URL.\n"
             exit
+	else
+	    wget -P ${curPath}/../../dataset/aishell/ ${URL} 
         fi
         sed -i "s#^URL_ROOT_TAG#URL_ROOT = '${URL}'#g" ${curPath}/conformer/scripts/aishell_tiny.py
         cp ${curPath}/conformer/scripts/aishell_tiny.py ${curPath}/../../dataset/aishell/
@@ -42,6 +46,7 @@ if [ ${MODE} = "benchmark_train" ];then
         source path.sh
         # download audio data
         sed -i "s#aishell.py#aishell_tiny.py#g" ./local/data.sh
+	sed -i "s#python3#python#g" ./local/data.sh
         bash ./local/data.sh || exit -1
         if [ $? -ne 0 ]; then
         exit 1
@@ -56,7 +61,6 @@ if [ ${MODE} = "benchmark_train" ];then
         sed -i "s#conf/#test_tipc/conformer/benchmark_train/conf/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
         sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/tuning/decode.yaml
         sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/preprocess.yaml
-
     fi
 
     if [ ${model_name} == "pwgan" ]; then
@@ -73,4 +77,4 @@ if [ ${MODE} = "benchmark_train" ];then
         python ../paddlespeech/t2s/exps/gan_vocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy
     fi
 
-fi
\ No newline at end of file
+fi
diff --git a/tests/unit/asr/deepspeech2_online_model_test.py b/tests/unit/asr/deepspeech2_online_model_test.py
index f623c5acd5066795cfa1cae43c622254a5ac88e0..f23c49263ec033280dc9b1ed0ad1b74b68d714c1 100644
--- a/tests/unit/asr/deepspeech2_online_model_test.py
+++ b/tests/unit/asr/deepspeech2_online_model_test.py
@@ -11,11 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+import pickle
 import unittest
 
 import numpy as np
 import paddle
+from paddle import inference
 
+from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline
 from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
 
 
@@ -182,5 +186,77 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase):
                 paddle.allclose(final_state_c_box, final_state_c_box_chk), True)
 
 
+class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
+    def setUp(self):
+        export_prefix = "exp/deepspeech2_online/checkpoints/test_export"
+        if not os.path.exists(os.path.dirname(export_prefix)):
+            os.makedirs(os.path.dirname(export_prefix), mode=0o755)
+        infer_model = DeepSpeech2InferModelOnline(
+            feat_size=161,
+            dict_size=4233,
+            num_conv_layers=2,
+            num_rnn_layers=5,
+            rnn_size=1024,
+            num_fc_layers=0,
+            fc_layers_size_list=[-1],
+            use_gru=False)
+        static_model = infer_model.export()
+        paddle.jit.save(static_model, export_prefix)
+
+        with open("test_data/static_ds2online_inputs.pickle", "rb") as f:
+            self.data_dict = pickle.load(f)
+
+        self.setup_model(export_prefix)
+
+    def setup_model(self, export_prefix):
+        deepspeech_config = inference.Config(export_prefix + ".pdmodel",
+                                             export_prefix + ".pdiparams")
+        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and
+                os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
+            deepspeech_config.enable_use_gpu(100, 0)
+            deepspeech_config.enable_memory_optim()
+        deepspeech_predictor = inference.create_predictor(deepspeech_config)
+        self.predictor = deepspeech_predictor
+
+    def test_unit(self):
+        input_names = self.predictor.get_input_names()
+        audio_handle = self.predictor.get_input_handle(input_names[0])
+        audio_len_handle = self.predictor.get_input_handle(input_names[1])
+        h_box_handle = self.predictor.get_input_handle(input_names[2])
+        c_box_handle = self.predictor.get_input_handle(input_names[3])
+
+        x_chunk = self.data_dict["audio_chunk"]
+        x_chunk_lens = self.data_dict["audio_chunk_lens"]
+        chunk_state_h_box = self.data_dict["chunk_state_h_box"]
+        chunk_state_c_box = self.data_dict["chunk_state_c_bos"]
+
+        audio_handle.reshape(x_chunk.shape)
+        audio_handle.copy_from_cpu(x_chunk)
+
+        audio_len_handle.reshape(x_chunk_lens.shape)
+        audio_len_handle.copy_from_cpu(x_chunk_lens)
+
+        h_box_handle.reshape(chunk_state_h_box.shape)
+        h_box_handle.copy_from_cpu(chunk_state_h_box)
+
+        c_box_handle.reshape(chunk_state_c_box.shape)
+        c_box_handle.copy_from_cpu(chunk_state_c_box)
+
+        output_names = self.predictor.get_output_names()
+        output_handle = self.predictor.get_output_handle(output_names[0])
+        output_lens_handle = self.predictor.get_output_handle(output_names[1])
+        output_state_h_handle = self.predictor.get_output_handle(
+            output_names[2])
+        output_state_c_handle = self.predictor.get_output_handle(
+            output_names[3])
+        self.predictor.run()
+
+        output_chunk_probs = output_handle.copy_to_cpu()
+        output_chunk_lens = output_lens_handle.copy_to_cpu()
+        chunk_state_h_box = output_state_h_handle.copy_to_cpu()
+        chunk_state_c_box = output_state_c_handle.copy_to_cpu()
+        return True
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/unit/asr/deepspeech2_online_model_test.sh b/tests/unit/asr/deepspeech2_online_model_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..629238fd04716a5844156898c8697e9b0e158c9f
--- /dev/null
+++ b/tests/unit/asr/deepspeech2_online_model_test.sh
@@ -0,0 +1,3 @@
+mkdir -p ./test_data
+wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle
+python deepspeech2_online_model_test.py