diff --git a/paddlespeech/server/ws/asr_socket.py b/paddlespeech/server/ws/asr_socket.py index a865703dd338eaecfd66902d18fe2a4d58c857d4..10967f2850aca19fbc1a416b4c9dafa3fa2c8c9f 100644 --- a/paddlespeech/server/ws/asr_socket.py +++ b/paddlespeech/server/ws/asr_socket.py @@ -20,50 +20,52 @@ from starlette.websockets import WebSocketState as WebSocketState from paddlespeech.server.engine.asr.online.asr_engine import PaddleASRConnectionHanddler from paddlespeech.server.engine.engine_pool import get_engine_pool -from paddlespeech.server.utils.buffer import ChunkBuffer -from paddlespeech.server.utils.vad import VADAudio router = APIRouter() @router.websocket('/ws/asr') async def websocket_endpoint(websocket: WebSocket): + """PaddleSpeech Online ASR Server api + + Args: + websocket (WebSocket): the websocket instance + """ + + #1. the interface wait to accept the websocket protocal header + # and only we receive the header, it establish the connection with specific thread await websocket.accept() + #2. if we accept the websocket headers, we will get the online asr engine instance engine_pool = get_engine_pool() asr_engine = engine_pool['asr'] - connection_handler = None - # init buffer - # each websocekt connection has its own chunk buffer - chunk_buffer_conf = asr_engine.config.chunk_buffer_conf - chunk_buffer = ChunkBuffer( - window_n=chunk_buffer_conf.window_n, - shift_n=chunk_buffer_conf.shift_n, - window_ms=chunk_buffer_conf.window_ms, - shift_ms=chunk_buffer_conf.shift_ms, - sample_rate=chunk_buffer_conf.sample_rate, - sample_width=chunk_buffer_conf.sample_width) - # init vad - vad_conf = asr_engine.config.get('vad_conf', None) - if vad_conf: - vad = VADAudio( - aggressiveness=vad_conf['aggressiveness'], - rate=vad_conf['sample_rate'], - frame_duration_ms=vad_conf['frame_duration_ms']) + #3. each websocket connection, we will create an PaddleASRConnectionHanddler to process such audio + # and each connection has its own connection instance to process the request + # and only if client send the start signal, we create the PaddleASRConnectionHanddler instance + connection_handler = None try: + #4. we do a loop to process the audio package by package according the protocal + # and only if the client send finished signal, we will break the loop while True: # careful here, changed the source code from starlette.websockets + # 4.1 we wait for the client signal for the specific action assert websocket.application_state == WebSocketState.CONNECTED message = await websocket.receive() websocket._raise_on_disconnect(message) + + #4.2 text for the action command and bytes for pcm data if "text" in message: + # we first parse the specific command message = json.loads(message["text"]) if 'signal' not in message: resp = {"status": "ok", "message": "no valid json data"} await websocket.send_json(resp) + # start command, we create the PaddleASRConnectionHanddler instance to process the audio data + # end command, we process the all the last audio pcm and return the final result + # and we break the loop if message['signal'] == 'start': resp = {"status": "ok", "signal": "server_ready"} # do something at begining here @@ -72,6 +74,7 @@ async def websocket_endpoint(websocket: WebSocket): await websocket.send_json(resp) elif message['signal'] == 'end': # reset single engine for an new connection + # and we will destroy the connection connection_handler.decode(is_finished=True) connection_handler.rescoring() asr_results = connection_handler.get_result() @@ -88,12 +91,17 @@ async def websocket_endpoint(websocket: WebSocket): resp = {"status": "ok", "message": "no valid json data"} await websocket.send_json(resp) elif "bytes" in message: + # bytes for the pcm data message = message["bytes"] + # we extract the remained audio pcm + # and decode for the result in this package data connection_handler.extract_feat(message) connection_handler.decode(is_finished=False) asr_results = connection_handler.get_result() + # return the current period result + # if the engine create the vad instance, this connection will have many period results resp = {'asr_results': asr_results} await websocket.send_json(resp) except WebSocketDisconnect: