fixed comments, test=doc

2ec8d608 · WilliamZhang06 · d847fe29 · 2ec8d608 · 2ec8d608 · 2ec8d608
6 changed file
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@@ -3,18 +3,15 @@
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
-host: 0.0.0.0
+host: 127.0.0.1
 port: 8090

 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
-# protocol: 'http'
-# engine_list: ['asr_python', 'tts_python', 'cls_python']
-
-
-# websocket, http (only choose one). websocket only support online engine type.
-protocol: 'websocket'
-engine_list: ['asr_online']
+# protocol = ['websocket', 'http'] (only one can be selected). 
+# http only support offline engine type.
+protocol: 'http'
+engine_list: ['asr_python', 'tts_python', 'cls_python']


 #################################################################################

--- a/paddlespeech/server/conf/ws_application.yaml
+++ b/paddlespeech/server/conf/ws_application.yaml
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8091
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online', 'tts_online']
+# protocol = ['websocket', 'http'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'deepspeech2online_aishell'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        frame_duration_ms: 80
+        shift_ms: 40
+        sample_rate: 16000
+        sample_width: 2
+
+    vad_conf:
+        aggressiveness: 2
+        sample_rate: 16000
+        frame_duration_ms: 20
+        sample_width: 2
+        padding_ms: 200
+        padding_ratio: 0.9
--- a/paddlespeech/server/tests/asr/online/microphone_client.py
+++ b/paddlespeech/server/tests/asr/online/microphone_client.py
@@ -11,25 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 record wave from the mic
 """
-
+import asyncio
+import json
+import logging
 import threading
-import pyaudio
 import wave
-import logging
-import asyncio
+from signal import SIGINT
+from signal import SIGTERM
+
+import pyaudio
 import websockets
-import json
-from signal import SIGINT, SIGTERM


 class ASRAudioHandler(threading.Thread):
-    def __init__(self,
-                 url="127.0.0.1",
-                 port=8090):
+    def __init__(self, url="127.0.0.1", port=8091):
        threading.Thread.__init__(self)
        self.url = url
        self.port = port
@@ -56,12 +54,13 @@ class ASRAudioHandler(threading.Thread):
        self._running = True
        self._frames = []
        p = pyaudio.PyAudio()
-        stream = p.open(format=self.format,
+        stream = p.open(
+            format=self.format,
            channels=self.channels,
            rate=self.rate,
            input=True,
            frames_per_buffer=self.chunk)
-        while(self._running):
+        while (self._running):
            data = stream.read(self.chunk)
            self._frames.append(data)
            self.data_backup.append(data)
@@ -97,11 +96,15 @@ class ASRAudioHandler(threading.Thread):

            async with websockets.connect(self.url) as ws:
                # 发送开始指令
-                audio_info = json.dumps({
+                audio_info = json.dumps(
+                    {
                        "name": "test.wav",
                        "signal": "start",
                        "nbest": 5
-                                }, sort_keys=True, indent=4, separators=(',', ': '))
+                    },
+                    sort_keys=True,
+                    indent=4,
+                    separators=(',', ': '))
                await ws.send(audio_info)
                msg = await ws.recv()
                logging.info("receive msg={}".format(msg))
@@ -117,11 +120,15 @@ class ASRAudioHandler(threading.Thread):
                except asyncio.CancelledError:
                    # quit
                    # send finished 
-                    audio_info = json.dumps({
+                    audio_info = json.dumps(
+                        {
                            "name": "test.wav",
                            "signal": "end",
                            "nbest": 5
-                                    }, sort_keys=True, indent=4, separators=(',', ': '))
+                        },
+                        sort_keys=True,
+                        indent=4,
+                        separators=(',', ': '))
                    await ws.send(audio_info)
                    msg = await ws.recv()
                    logging.info("receive msg={}".format(msg))
@@ -141,7 +148,7 @@ if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    logging.info("asr websocket client start")

-    handler = ASRAudioHandler("127.0.0.1", 8090)
+    handler = ASRAudioHandler("127.0.0.1", 8091)
    loop = asyncio.get_event_loop()
    main_task = asyncio.ensure_future(handler.run())
    for signal in [SIGINT, SIGTERM]:

--- a/paddlespeech/server/tests/asr/online/websocket_client.py
+++ b/paddlespeech/server/tests/asr/online/websocket_client.py
@@ -11,26 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 #!/usr/bin/python
 # -*- coding: UTF-8 -*-
-
 import argparse
-import logging
-import time
-import os
+import asyncio
 import json
-import wave
+import logging
+
 import numpy as np
-import asyncio
-import websockets
 import soundfile
+import websockets


 class ASRAudioHandler:
-    def __init__(self,
-                 url="127.0.0.1",
-                 port=8090):
+    def __init__(self, url="127.0.0.1", port=8090):
        self.url = url
        self.port = port
        self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
@@ -42,13 +36,11 @@ class ASRAudioHandler:
        chunk_size = 80 * 16  #80ms, sample_rate = 16kHz

        if (x_len - chunk_size) % chunk_stride != 0:
-            padding_len_x = chunk_stride - (x_len - chunk_size
-                                            ) % chunk_stride
+            padding_len_x = chunk_stride - (x_len - chunk_size) % chunk_stride
        else:
            padding_len_x = 0

-        padding = np.zeros(
-            (padding_len_x), dtype=samples.dtype)
+        padding = np.zeros((padding_len_x), dtype=samples.dtype)
        padded_x = np.concatenate([samples, padding], axis=0)

        num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1
@@ -68,11 +60,15 @@ class ASRAudioHandler:
        async with websockets.connect(self.url) as ws:
            # server 端已经接收到 handshake 协议头
            # 发送开始指令
-            audio_info = json.dumps({
+            audio_info = json.dumps(
+                {
                    "name": "test.wav",
                    "signal": "start",
                    "nbest": 5
-                            }, sort_keys=True, indent=4, separators=(',', ': '))
+                },
+                sort_keys=True,
+                indent=4,
+                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()
            logging.info("receive msg={}".format(msg))
@@ -84,11 +80,15 @@ class ASRAudioHandler:
                logging.info("receive msg={}".format(msg))

            # finished 
-            audio_info = json.dumps({
+            audio_info = json.dumps(
+                {
                    "name": "test.wav",
                    "signal": "end",
                    "nbest": 5
-                            }, sort_keys=True, indent=4, separators=(',', ': '))
+                },
+                sort_keys=True,
+                indent=4,
+                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()
            logging.info("receive msg={}".format(msg))
@@ -97,7 +97,7 @@ class ASRAudioHandler:
 def main(args):
    logging.basicConfig(level=logging.INFO)
    logging.info("asr websocket client start")
-    handler = ASRAudioHandler("127.0.0.1", 8090)
+    handler = ASRAudioHandler("127.0.0.1", 8091)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(handler.run(args.wavfile))
    logging.info("asr websocket client finished")

--- a/paddlespeech/server/utils/vad.py
+++ b/paddlespeech/server/utils/vad.py
@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections
-import logging

 import webrtcvad


 class VADAudio():
    def __init__(self,
-                 aggressiveness,
-                 rate,
-                 frame_duration_ms,
+                 aggressiveness=2,
+                 rate=16000,
+                 frame_duration_ms=20,
                 sample_width=2,
                 padding_ms=200,
                 padding_ratio=0.9):

--- a/paddlespeech/server/ws/asr_socket.py
+++ b/paddlespeech/server/ws/asr_socket.py
@@ -11,35 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import base64
-import traceback
-from typing import Union
-import random
-import numpy as np
 import json

+import numpy as np
 from fastapi import APIRouter
 from fastapi import WebSocket
 from fastapi import WebSocketDisconnect
 from starlette.websockets import WebSocketState as WebSocketState

-from paddlespeech.server.engine.asr.online.asr_engine import ASREngine
 from paddlespeech.server.engine.engine_pool import get_engine_pool
 from paddlespeech.server.utils.buffer import ChunkBuffer
 from paddlespeech.server.utils.vad import VADAudio

-
 router = APIRouter()

+
 @router.websocket('/ws/asr')
 async def websocket_endpoint(websocket: WebSocket):

    await websocket.accept()

+    engine_pool = get_engine_pool()
+    asr_engine = engine_pool['asr']
    # init buffer
-    chunk_buffer = ChunkBuffer(sample_width=2)
+    chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
+    chunk_buffer = ChunkBuffer(
+        sample_rate=chunk_buffer_conf['sample_rate'],
+        sample_width=chunk_buffer_conf['sample_width'])
    # init vad
-    vad = VADAudio(2, 16000, 20)
+    vad_conf = asr_engine.config.vad_conf
+    vad = VADAudio(
+        aggressiveness=vad_conf['aggressiveness'],
+        rate=vad_conf['sample_rate'],
+        frame_duration_ms=vad_conf['frame_duration_ms'])

    try:
        while True:
@@ -50,17 +54,11 @@ async def websocket_endpoint(websocket: WebSocket):
            if "text" in message:
                message = json.loads(message["text"])
                if 'signal' not in message:
-                    resp = {
-                            "status": "ok",
-                            "message": "no valid json data"
-                            }
+                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)

                if message['signal'] == 'start':
-                    resp = {
-                            "status": "ok",
-                            "signal": "server_ready"
-                            }
+                    resp = {"status": "ok", "signal": "server_ready"}
                    # do something at begining here
                    await websocket.send_json(resp)
                elif message['signal'] == 'end':
@@ -68,24 +66,19 @@ async def websocket_endpoint(websocket: WebSocket):
                    asr_engine = engine_pool['asr']
                    # reset single  engine for an new connection
                    asr_engine.reset()
-                    resp = {
-                            "status": "ok",
-                            "signal": "finished"
-                            }
+                    resp = {"status": "ok", "signal": "finished"}
                    await websocket.send_json(resp)
                    break
                else:
-                    resp = {
-                            "status": "ok",
-                            "message": "no valid json data"
-                            }
+                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)
            elif "bytes" in message:
                message = message["bytes"]

                # vad for input bytes audio
                vad.add_audio(message)
-                message = b''.join(f for f in vad.vad_collector() if f is not None)
+                message = b''.join(f for f in vad.vad_collector()
+                                   if f is not None)

                engine_pool = get_engine_pool()
                asr_engine = engine_pool['asr']
@@ -94,7 +87,8 @@ async def websocket_endpoint(websocket: WebSocket):
                for frame in frames:
                    samples = np.frombuffer(frame.bytes, dtype=np.int16)
                    sample_rate = asr_engine.config.sample_rate
-                    x_chunk, x_chunk_lens = asr_engine.preprocess(samples, sample_rate)
+                    x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
+                                                                  sample_rate)
                    asr_engine.run(x_chunk, x_chunk_lens)
                    asr_results = asr_engine.postprocess()