Merge pull request #1507 from zh794390558/cli

[cli] add cli batch/pipe example to readme

Merge pull request #1507 from zh794390558/cli
[cli] add cli batch/pipe example to readme
e8f2d8f1 · Hui Zhang · GitHub · 2517df92 · 335638ba · e8f2d8f1
14 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 *.pyc
 .vscode
 *log
+*.wav
 *.pdmodel
 *.pdiparams*
 *.zip
@@ -30,5 +31,8 @@ tools/OpenBLAS/
 tools/Miniconda3-latest-Linux-x86_64.sh
 tools/activate_python.sh
 tools/miniconda.sh
+tools/CRF++-0.58/
+
+speechx/fc_patch/

 *output/
--- a/README.md
+++ b/README.md
@@ -196,16 +196,18 @@ Developers can have a try of our models with [PaddleSpeech Command Line](./paddl
 ```shell
 paddlespeech cls --input input.wav
 ```
+
 **Automatic Speech Recognition**
 ```shell
 paddlespeech asr --lang zh --input input_16k.wav
 ```
-**Speech Translation** (English to Chinese)

+**Speech Translation** (English to Chinese)
 (not support for Mac and Windows now)
 ```shell
 paddlespeech st --input input_16k.wav
 ```
+
 **Text-to-Speech** 
 ```shell
 paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --output output.wav
@@ -218,7 +220,16 @@ paddlespeech tts --input "你好，欢迎使用飞桨深度学习框架！" --ou
  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
  ```

+**Batch Process**
+```
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+```  

+**Shell Pipeline**
+ASR + Punc:
+```
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+```

 For more command lines, please see: [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)


--- a/README_cn.md
+++ b/README_cn.md
@@ -216,6 +216,17 @@ paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！
   paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
   ```

+**批处理**
+```
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+```  
+
+**Shell管道**
+ASR + Punc:
+```
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
+```
+
 更多命令行命令请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos)
 > Note: 如果需要训练或者微调，请查看[语音识别](./docs/source/asr/quick_start.md)， [语音合成](./docs/source/tts/quick_start.md)。


--- a/demos/speech_recognition/.gitignore
+++ b/demos/speech_recognition/.gitignore
+*.wav
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -27,6 +27,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
  paddlespeech asr --input ./zh.wav
  # English
  paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+  # Chinese ASR + Punctuation Restoration
+  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
  ```
  (It doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
  

--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
@@ -25,6 +25,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
  paddlespeech asr --input ./zh.wav
  # 英文
  paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav
+  # 中文 + 标点恢复
+  paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
  ```
  (如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error，没有关系，这个包是非必须的。)
  

--- a/demos/speech_recognition/run.sh
+++ b/demos/speech_recognition/run.sh
 #!/bin/bash

 wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+
+# asr
 paddlespeech asr --input ./zh.wav
+
+
+# asr + punc
+paddlespeech asr --input ./zh.wav | paddlespeech text --task punc
\ No newline at end of file
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@@ -17,11 +17,14 @@ The input of this demo should be a text of the specific language that can be pas
 ### 3. Usage
 - Command Line (Recommended)
    - Chinese
-    
        The default acoustic model is `Fastspeech2`, and the default vocoder is `Parallel WaveGAN`.
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
+    - Batch Process
+        ```bash
+        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+        ```
    - Chinese, use `SpeedySpeech` as the acoustic model
        ```bash
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"

--- a/demos/text_to_speech/README_cn.md
+++ b/demos/text_to_speech/README_cn.md
@@ -24,6 +24,10 @@
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
+    - 批处理
+        ```bash
+        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
+        ```
    - 中文，使用 `SpeedySpeech` 作为声学模型
        ```bash
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"

--- a/demos/text_to_speech/run.sh
+++ b/demos/text_to_speech/run.sh
 #!/bin/bash

+# single process
 paddlespeech tts --input 今天的天气不错啊
+
+# Batch process
+echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
\ No newline at end of file
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
    """
    rng = np.random.RandomState(epoch)
    shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
    rng.shuffle(batch_indices)
    batch_indices = [item for batch in batch_indices for item in batch]
    assert clipped is False

--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -33,8 +33,6 @@ from paddlespeech.s2t.modules.decoder import TransformerDecoder
 from paddlespeech.s2t.modules.encoder import ConformerEncoder
 from paddlespeech.s2t.modules.encoder import TransformerEncoder
 from paddlespeech.s2t.modules.loss import LabelSmoothingLoss
-from paddlespeech.s2t.modules.mask import mask_finished_preds
-from paddlespeech.s2t.modules.mask import mask_finished_scores
 from paddlespeech.s2t.modules.mask import subsequent_mask
 from paddlespeech.s2t.utils import checkpoint
 from paddlespeech.s2t.utils import layer_tools

--- a/paddlespeech/t2s/modules/transformer/repeat.py
+++ b/paddlespeech/t2s/modules/transformer/repeat.py
@@ -36,4 +36,4 @@ def repeat(N, fn):
    Returns:
        MultiSequential: Repeated model instance.
    """
-    return MultiSequential(*[fn(n) for n in range(N)])
+    return MultiSequential(* [fn(n) for n in range(N)])
--- a/tests/unit/asr/deepspeech2_online_model_test.py
+++ b/tests/unit/asr/deepspeech2_online_model_test.py
@@ -11,16 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+import pickle
 import unittest

 import numpy as np
 import paddle
-import pickle
-import os
 from paddle import inference

-from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
 from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline
+from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
+

 class TestDeepSpeech2ModelOnline(unittest.TestCase):
    def setUp(self):
@@ -185,10 +186,7 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase):
                paddle.allclose(final_state_c_box, final_state_c_box_chk), True)


-
-
 class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
-    
    def setUp(self):
        export_prefix = "exp/deepspeech2_online/checkpoints/test_export"
        if not os.path.exists(os.path.dirname(export_prefix)):
@@ -210,12 +208,11 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):

        self.setup_model(export_prefix)

-
    def setup_model(self, export_prefix):
-        deepspeech_config = inference.Config(
-            export_prefix + ".pdmodel",
+        deepspeech_config = inference.Config(export_prefix + ".pdmodel",
                                             export_prefix + ".pdiparams")
-        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
+        if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and
+                os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
            deepspeech_config.enable_use_gpu(100, 0)
            deepspeech_config.enable_memory_optim()
        deepspeech_predictor = inference.create_predictor(deepspeech_config)
@@ -228,7 +225,6 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
        h_box_handle = self.predictor.get_input_handle(input_names[2])
        c_box_handle = self.predictor.get_input_handle(input_names[3])

-
        x_chunk = self.data_dict["audio_chunk"]
        x_chunk_lens = self.data_dict["audio_chunk_lens"]
        chunk_state_h_box = self.data_dict["chunk_state_h_box"]
@@ -246,13 +242,9 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase):
        c_box_handle.reshape(chunk_state_c_box.shape)
        c_box_handle.copy_from_cpu(chunk_state_c_box)

-
-
        output_names = self.predictor.get_output_names()
-        output_handle = self.predictor.get_output_handle(
-            output_names[0])
-        output_lens_handle = self.predictor.get_output_handle(
-            output_names[1])
+        output_handle = self.predictor.get_output_handle(output_names[0])
+        output_lens_handle = self.predictor.get_output_handle(output_names[1])
        output_state_h_handle = self.predictor.get_output_handle(
            output_names[2])
        output_state_c_handle = self.predictor.get_output_handle(