diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index 687b51f10aca14936b20f6d6667d13644049c380..17a01f0bba2573050452ae5961b8fffce5550bb0 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -85,6 +85,10 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee - 命令行 (推荐使用) ``` paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + + # 流式ASR + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav + ``` 使用帮助: @@ -191,7 +195,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` - ### 5. CLS 客户端使用方法 + ### 6. CLS 客户端使用方法 **注意:** 初次使用客户端时响应时间会略长 - 命令行 (推荐使用) ``` diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 413f00872327b1ef364146d12b8cd8540eec421f..4e0166d38523bbeef1ecfa4deccd2d23ef544f09 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -19,6 +19,8 @@ import os import random import time from typing import List +import logging +import asyncio import numpy as np import requests @@ -30,6 +32,7 @@ from ..util import stats_wrapper from paddlespeech.cli.log import logger from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 +from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler __all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor'] @@ -230,6 +233,75 @@ class ASRClientExecutor(BaseExecutor): return res +@cli_client_register( + name='paddlespeech_client.asr_online', description='visit asr online service') +class ASRClientExecutor(BaseExecutor): + def __init__(self): + super(ASRClientExecutor, self).__init__() + self.parser = argparse.ArgumentParser( + prog='paddlespeech_client.asr', add_help=True) + self.parser.add_argument( + '--server_ip', type=str, default='127.0.0.1', help='server ip') + self.parser.add_argument( + '--port', type=int, default=8091, help='server port') + self.parser.add_argument( + '--input', + type=str, + default=None, + help='Audio file to be recognized', + required=True) + self.parser.add_argument( + '--sample_rate', type=int, default=16000, help='audio sample rate') + self.parser.add_argument( + '--lang', type=str, default="zh_cn", help='language') + self.parser.add_argument( + '--audio_format', type=str, default="wav", help='audio format') + + def execute(self, argv: List[str]) -> bool: + args = self.parser.parse_args(argv) + input_ = args.input + server_ip = args.server_ip + port = args.port + sample_rate = args.sample_rate + lang = args.lang + audio_format = args.audio_format + + try: + time_start = time.time() + res = self( + input=input_, + server_ip=server_ip, + port=port, + sample_rate=sample_rate, + lang=lang, + audio_format=audio_format) + time_end = time.time() + logger.info(res.json()) + logger.info("Response time %f s." % (time_end - time_start)) + return True + except Exception as e: + logger.error("Failed to speech recognition.") + return False + + @stats_wrapper + def __call__(self, + input: str, + server_ip: str="127.0.0.1", + port: int=8091, + sample_rate: int=16000, + lang: str="zh_cn", + audio_format: str="wav"): + """ + Python API to call an executor. + """ + logging.basicConfig(level=logging.INFO) + logging.info("asr websocket client start") + handler = ASRAudioHandler(server_ip, port) + loop = asyncio.get_event_loop() + loop.run_until_complete(handler.run(input)) + logging.info("asr websocket client finished") + + @cli_client_register( name='paddlespeech_client.cls', description='visit cls service') class CLSClientExecutor(BaseExecutor): diff --git a/paddlespeech/server/tests/asr/http_client.py b/paddlespeech/server/tests/asr/offline/http_client.py similarity index 100% rename from paddlespeech/server/tests/asr/http_client.py rename to paddlespeech/server/tests/asr/offline/http_client.py diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..3e7d7a15cd417413c1dd7bd051d38fd9fc1ab863 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/README_cn.md @@ -0,0 +1,49 @@ +([简体中文](./README_cn.md)|English) + +# 语音服务 + +## 介绍 +本文档介绍如何使用流式ASR的三种不同客户端:网页、麦克风、Python模拟流式服务。 + + +## 使用方法 +### 1. 安装 +请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +推荐使用 **paddlepaddle 2.2.1** 或以上版本。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + + +### 2. 准备测试文件 + +这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 + +可以下载此 ASR client的示例音频: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav +``` + +### 2. 流式 ASR 客户端使用方法 + +- Python模拟流式服务命令行 + ``` + + # 流式ASR + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav + + ``` + + +- 麦克风 + ``` + # 直接调用麦克风设备 + python microphone_client.py + + ``` + + +- 网页 + ``` + # 进入web目录后参考相关readme.md + + ``` diff --git a/paddlespeech/server/tests/asr/online/web/paddle_web_demo.png b/paddlespeech/server/tests/asr/online/web/paddle_web_demo.png new file mode 100644 index 0000000000000000000000000000000000000000..214edffd076bd4f6df18b4faa3587239154b958a Binary files /dev/null and b/paddlespeech/server/tests/asr/online/web/paddle_web_demo.png differ diff --git a/paddlespeech/server/tests/asr/online/web/readme.md b/paddlespeech/server/tests/asr/online/web/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..f8b7ddf56a4d938b02f50f10f7c6ba280395d924 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/web/readme.md @@ -0,0 +1,18 @@ +# paddlespeech serving 网页Demo + +- 感谢[wenet](https://github.com/wenet-e2e/wenet)团队的前端demo代码. + + +## 使用方法 +### 1. 在本地电脑启动网页服务 + ``` + python app.py + + ``` + +### 2. 本地电脑浏览器 + +在浏览器中输入127.0.0.1:19999 即可看到相关网页Demo。 + +![图片](./paddle_web_demo.png) + \ No newline at end of file