Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
12f3532b
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
12f3532b
编写于
6月 28, 2022
作者:
小湉湉
提交者:
GitHub
6月 28, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2080 from PaddlePaddle/dev-hym
[demos] use new engine api for speech_web
上级
02681b72
308a90ef
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
117 addition
and
228 deletion
+117
-228
demos/speech_web/README_cn.md
demos/speech_web/README_cn.md
+8
-55
demos/speech_web/speech_server/main.py
demos/speech_web/speech_server/main.py
+1
-1
demos/speech_web/speech_server/src/AudioManeger.py
demos/speech_web/speech_server/src/AudioManeger.py
+0
-23
demos/speech_web/speech_server/src/SpeechBase/asr.py
demos/speech_web/speech_server/src/SpeechBase/asr.py
+2
-27
demos/speech_web/speech_server/src/SpeechBase/nlp.py
demos/speech_web/speech_server/src/SpeechBase/nlp.py
+0
-5
demos/speech_web/speech_server/src/SpeechBase/sql_helper.py
demos/speech_web/speech_server/src/SpeechBase/sql_helper.py
+0
-36
demos/speech_web/speech_server/src/SpeechBase/tts.py
demos/speech_web/speech_server/src/SpeechBase/tts.py
+104
-16
demos/speech_web/speech_server/src/SpeechBase/vpr.py
demos/speech_web/speech_server/src/SpeechBase/vpr.py
+0
-34
demos/speech_web/speech_server/src/SpeechBase/vpr_encode.py
demos/speech_web/speech_server/src/SpeechBase/vpr_encode.py
+1
-7
demos/speech_web/speech_server/src/robot.py
demos/speech_web/speech_server/src/robot.py
+1
-24
未找到文件。
demos/speech_web/README_cn.md
浏览文件 @
12f3532b
...
...
@@ -24,6 +24,12 @@ PaddleSpeechDemo是一个以PaddleSpeech的语音交互功能为主体开发的D
# 安装环境
cd speech_server
pip install -r requirements.txt
# 下载 ie 模型,针对地点进行微调,效果更好,不下载的话会使用其它版本,效果没有这个好
cd source
mkdir model
cd model
wget https://bj.bcebos.com/paddlenlp/applications/speech-cmd-analysis/finetune/model_state.pdparams
```
...
...
@@ -61,59 +67,6 @@ yarn dev --port 8011
```
默认配置下,前端中配置的后台地址信息是localhost,确保后端服务器和打开页面的游览器在同一台机器上,不在一台机器的配置方式见下方的FAQ:【后端如果部署在其它机器或者别的端口如何修改】
## Docker启动
### 后端docker
后端docker使用
[
paddlepaddle官方docker
](
https://www.paddlepaddle.org.cn
)
,这里演示CPU版本
```
# 拉取PaddleSpeech项目
cd PaddleSpeechServer
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
# 拉取镜像
docker pull registry.baidubce.com/paddlepaddle/paddle:2.3.0
# 启动容器
docker run --name paddle -it -p 8010:8010 -v $PWD:/paddle registry.baidubce.com/paddlepaddle/paddle:2.3.0 /bin/bash
# 进入容器
cd /paddle
# 安装依赖
pip install -r requirements
# 启动服务
python main --port 8010
```
### 前端docker
前端docker直接使用
[
node官方的docker
](
https://hub.docker.com/_/node
)
即可
```
shell
docker pull node
```
镜像中安装依赖
```
shell
cd
PaddleSpeechWebClient
# 映射外部8011端口
docker run
-it
-p
8011:8011
-v
$PWD
:/paddle node:latest bin/bash
# 进入容器中
cd
/paddle
# 安装依赖
yarn
install
# 启动前端
yarn dev
--port
8011
```
## FAQ
#### Q: 如何安装node.js
...
...
@@ -126,7 +79,7 @@ A:后端的配置地址有分散在两个文件中
修改第一个文件
`PaddleSpeechWebClient/vite.config.js`
```
json
```
server: {
host: "0.0.0.0",
proxy: {
...
...
@@ -141,7 +94,7 @@ server: {
修改第二个文件
`PaddleSpeechWebClient/src/api/API.js`
(Websocket代理配置失败,所以需要在这个文件中修改)
```
javascript
```
// websocket (这里改成后端所在的接口)
CHAT_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/offlineStream', // ChatBot websocket 接口
ASR_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/onlineStream', // Stream ASR 接口
...
...
demos/speech_web/speech_server/main.py
浏览文件 @
12f3532b
...
...
@@ -30,7 +30,7 @@ from src.robot import Robot
from
src.WebsocketManeger
import
ConnectionManager
from
src.SpeechBase.vpr
import
VPR
from
paddlespeech.server.engine.asr.online.asr_engine
import
PaddleASRConnectionHanddler
from
paddlespeech.server.engine.asr.online.
python.
asr_engine
import
PaddleASRConnectionHanddler
from
paddlespeech.server.utils.audio_process
import
float2pcm
...
...
demos/speech_web/speech_server/src/AudioManeger.py
浏览文件 @
12f3532b
...
...
@@ -145,29 +145,6 @@ class AudioMannger:
def
resume
(
self
):
self
.
is_pause
=
False
if
__name__
==
'__main__'
:
from
robot
import
Robot
chatbot
=
Robot
()
chatbot
.
init
()
audio_manger
=
AudioMannger
(
chatbot
)
file_list
=
[
"source/20220418145230qbenc.pcm"
,
]
for
file
in
file_list
:
with
open
(
file
,
"rb"
)
as
f
:
pcm_bin
=
f
.
read
()
print
(
len
(
pcm_bin
))
asr_
=
audio_manger
.
stream_asr
(
pcm_bin
=
pcm_bin
)
print
(
asr_
)
print
(
audio_manger
.
end
())
print
(
chatbot
.
speech2text
(
"source/20220418145230zrxia.wav"
))
\ No newline at end of file
demos/speech_web/speech_server/src/SpeechBase/asr.py
浏览文件 @
12f3532b
...
...
@@ -4,8 +4,8 @@ import paddle
import
librosa
import
soundfile
from
paddlespeech.server.engine.asr.online.asr_engine
import
ASREngine
from
paddlespeech.server.engine.asr.online.asr_engine
import
PaddleASRConnectionHanddler
from
paddlespeech.server.engine.asr.online.
python.
asr_engine
import
ASREngine
from
paddlespeech.server.engine.asr.online.
python.
asr_engine
import
PaddleASRConnectionHanddler
from
paddlespeech.server.utils.config
import
get_config
def
readWave
(
samples
):
...
...
@@ -59,29 +59,4 @@ class ASR:
self
.
connection_handler
.
reset
()
return
asr_results
if
__name__
==
'__main__'
:
config_path
=
r
"../../PaddleSpeech/paddlespeech/server/conf/ws_conformer_application.yaml"
wav_path
=
r
"../../source/demo/demo_16k.wav"
samples
,
sample_rate
=
soundfile
.
read
(
wav_path
,
dtype
=
'int16'
)
asr
=
ASR
(
config_path
=
config_path
)
end_result
=
asr
.
offlineASR
(
samples
=
samples
,
sample_rate
=
sample_rate
)
print
(
"端到端识别结果:"
,
end_result
)
for
sub_wav
in
readWave
(
samples
=
samples
):
# print(sub_wav)
message
=
sub_wav
.
tobytes
()
offline_result
=
asr
.
onlineASR
(
message
,
is_finished
=
False
)
print
(
"流式识别结果: "
,
offline_result
)
offline_result
=
asr
.
onlineASR
(
is_finished
=
True
)
print
(
"流式识别结果: "
,
offline_result
)
\ No newline at end of file
demos/speech_web/speech_server/src/SpeechBase/nlp.py
浏览文件 @
12f3532b
...
...
@@ -20,9 +20,4 @@ class NLP:
result
=
self
.
ie_model
(
text
)
return
result
if
__name__
==
'__main__'
:
ie_model_path
=
"../../source/model/"
nlp
=
NLP
(
ie_model_path
=
ie_model_path
)
text
=
"今天早上我从大牛坊去百度科技园花了七百块钱"
print
(
nlp
.
ie
(
text
))
\ No newline at end of file
demos/speech_web/speech_server/src/SpeechBase/sql_helper.py
浏览文件 @
12f3532b
...
...
@@ -113,40 +113,4 @@ class DataBase(object):
b
=
base64
.
b64decode
(
vector_base64
)
vc
=
np
.
frombuffer
(
b
,
dtype
=
dtype
)
return
vc
if
__name__
==
'__main__'
:
db_path
=
"../../source/db/vpr.sqlite"
db
=
DataBase
(
db_path
)
# 准备数据
import
numpy
as
np
vector
=
np
.
random
.
randn
((
192
)).
astype
(
np
.
float32
).
tobytes
()
vector_base64
=
base64
.
b64encode
(
vector
).
decode
(
'utf8'
)
username
=
"sss"
wav_path
=
r
"../../source/demo/demo_16k.wav"
# 插入数据
db
.
insert_one
(
username
,
vector_base64
,
wav_path
)
# 查询数据
res_all
=
db
.
select_all
()
print
(
"res_all: "
,
res_all
)
s_id
=
res_all
[
0
][
'id'
]
res_id
=
db
.
select_by_id
(
s_id
)
print
(
"res_id: "
,
res_id
)
res_uername
=
db
.
select_by_username
(
username
)
print
(
"res_username: "
,
res_uername
)
# base64还原
b
=
base64
.
b64decode
(
res_uername
[
0
][
'vector'
])
vc
=
np
.
frombuffer
(
b
,
dtype
=
np
.
float32
)
print
(
vc
)
# 删除数据
db
.
drop_by_username
(
username
)
res_all
=
db
.
select_all
()
print
(
"删除后 res_all: "
,
res_all
)
db
.
drop_all
()
\ No newline at end of file
demos/speech_web/speech_server/src/SpeechBase/tts.py
浏览文件 @
12f3532b
...
...
@@ -7,7 +7,8 @@
# 4. 流式推理
import
base64
import
math
import
logging
import
numpy
as
np
from
paddlespeech.server.utils.onnx_infer
import
get_sess
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
...
...
@@ -17,14 +18,14 @@ from paddlespeech.server.utils.config import get_config
from
paddlespeech.server.engine.tts.online.onnx.tts_engine
import
TTSEngine
class
TTS
:
def
__init__
(
self
,
config_path
):
self
.
config
=
get_config
(
config_path
)[
'tts_online-onnx'
]
self
.
config
[
'voc_block'
]
=
36
self
.
engine
=
TTSEngine
()
self
.
engine
=
TTSEngine
()
self
.
engine
.
init
(
self
.
config
)
self
.
engine
.
warm_up
()
self
.
executor
=
self
.
engine
.
executor
#self.engine.warm_up()
# 前端初始化
self
.
frontend
=
Frontend
(
...
...
@@ -81,8 +82,105 @@ class TTS:
return
wavs
def
streamTTS
(
self
,
text
):
for
sub_wav_base64
in
self
.
engine
.
run
(
sentence
=
text
):
yield
sub_wav_base64
get_tone_ids
=
False
merge_sentences
=
False
# front
input_ids
=
self
.
frontend
.
get_input_ids
(
text
,
merge_sentences
=
merge_sentences
,
get_tone_ids
=
get_tone_ids
)
phone_ids
=
input_ids
[
"phone_ids"
]
for
i
in
range
(
len
(
phone_ids
)):
part_phone_ids
=
phone_ids
[
i
].
numpy
()
voc_chunk_id
=
0
# fastspeech2_csmsc
if
self
.
config
.
am
==
"fastspeech2_csmsc_onnx"
:
# am
mel
=
self
.
executor
.
am_sess
.
run
(
output_names
=
None
,
input_feed
=
{
'text'
:
part_phone_ids
})
mel
=
mel
[
0
]
# voc streaming
mel_chunks
=
get_chunks
(
mel
,
self
.
config
.
voc_block
,
self
.
config
.
voc_pad
,
"voc"
)
voc_chunk_num
=
len
(
mel_chunks
)
for
i
,
mel_chunk
in
enumerate
(
mel_chunks
):
sub_wav
=
self
.
executor
.
voc_sess
.
run
(
output_names
=
None
,
input_feed
=
{
'logmel'
:
mel_chunk
})
sub_wav
=
self
.
depadding
(
sub_wav
[
0
],
voc_chunk_num
,
i
,
self
.
config
.
voc_block
,
self
.
config
.
voc_pad
,
self
.
config
.
voc_upsample
)
yield
self
.
after_process
(
sub_wav
)
# fastspeech2_cnndecoder_csmsc
elif
self
.
config
.
am
==
"fastspeech2_cnndecoder_csmsc_onnx"
:
# am
orig_hs
=
self
.
executor
.
am_encoder_infer_sess
.
run
(
None
,
input_feed
=
{
'text'
:
part_phone_ids
})
orig_hs
=
orig_hs
[
0
]
# streaming voc chunk info
mel_len
=
orig_hs
.
shape
[
1
]
voc_chunk_num
=
math
.
ceil
(
mel_len
/
self
.
config
.
voc_block
)
start
=
0
end
=
min
(
self
.
config
.
voc_block
+
self
.
config
.
voc_pad
,
mel_len
)
# streaming am
hss
=
get_chunks
(
orig_hs
,
self
.
config
.
am_block
,
self
.
config
.
am_pad
,
"am"
)
am_chunk_num
=
len
(
hss
)
for
i
,
hs
in
enumerate
(
hss
):
am_decoder_output
=
self
.
executor
.
am_decoder_sess
.
run
(
None
,
input_feed
=
{
'xs'
:
hs
})
am_postnet_output
=
self
.
executor
.
am_postnet_sess
.
run
(
None
,
input_feed
=
{
'xs'
:
np
.
transpose
(
am_decoder_output
[
0
],
(
0
,
2
,
1
))
})
am_output_data
=
am_decoder_output
+
np
.
transpose
(
am_postnet_output
[
0
],
(
0
,
2
,
1
))
normalized_mel
=
am_output_data
[
0
][
0
]
sub_mel
=
denorm
(
normalized_mel
,
self
.
executor
.
am_mu
,
self
.
executor
.
am_std
)
sub_mel
=
self
.
depadding
(
sub_mel
,
am_chunk_num
,
i
,
self
.
config
.
am_block
,
self
.
config
.
am_pad
,
1
)
if
i
==
0
:
mel_streaming
=
sub_mel
else
:
mel_streaming
=
np
.
concatenate
(
(
mel_streaming
,
sub_mel
),
axis
=
0
)
# streaming voc
# 当流式AM推理的mel帧数大于流式voc推理的chunk size,开始进行流式voc 推理
while
(
mel_streaming
.
shape
[
0
]
>=
end
and
voc_chunk_id
<
voc_chunk_num
):
voc_chunk
=
mel_streaming
[
start
:
end
,
:]
sub_wav
=
self
.
executor
.
voc_sess
.
run
(
output_names
=
None
,
input_feed
=
{
'logmel'
:
voc_chunk
})
sub_wav
=
self
.
depadding
(
sub_wav
[
0
],
voc_chunk_num
,
voc_chunk_id
,
self
.
config
.
voc_block
,
self
.
config
.
voc_pad
,
self
.
config
.
voc_upsample
)
yield
self
.
after_process
(
sub_wav
)
voc_chunk_id
+=
1
start
=
max
(
0
,
voc_chunk_id
*
self
.
config
.
voc_block
-
self
.
config
.
voc_pad
)
end
=
min
(
(
voc_chunk_id
+
1
)
*
self
.
config
.
voc_block
+
self
.
config
.
voc_pad
,
mel_len
)
else
:
logging
.
error
(
"Only support fastspeech2_csmsc or fastspeech2_cnndecoder_csmsc on streaming tts."
)
def
streamTTSBytes
(
self
,
text
):
for
wav
in
self
.
engine
.
executor
.
infer
(
...
...
@@ -106,16 +204,6 @@ class TTS:
# 用 TVM 优化
pass
if
__name__
==
'__main__'
:
text
=
"啊哈哈哈哈哈哈啊哈哈哈哈哈哈啊哈哈哈哈哈哈啊哈哈哈哈哈哈啊哈哈哈哈哈哈"
config_path
=
"../../PaddleSpeech/demos/streaming_tts_server/conf/tts_online_application.yaml"
tts
=
TTS
(
config_path
)
for
sub_wav
in
tts
.
streamTTS
(
text
):
print
(
"sub_wav_base64: "
,
len
(
sub_wav
))
end_wav
=
tts
.
offlineTTS
(
text
)
print
(
end_wav
)
\ No newline at end of file
demos/speech_web/speech_server/src/SpeechBase/vpr.py
浏览文件 @
12f3532b
...
...
@@ -116,37 +116,3 @@ class VPR:
# 清空 faiss
self
.
index_ip
.
reset
()
if
__name__
==
'__main__'
:
db_path
=
"../../source/db/vpr.sqlite"
dim
=
192
top_k
=
5
vpr
=
VPR
(
db_path
,
dim
,
top_k
)
# 准备测试数据
username
=
"sss"
wav_path
=
r
"../../source/demo/demo_16k.wav"
# 注册声纹
vpr
.
vpr_enroll
(
username
,
wav_path
)
# 获取数据
print
(
vpr
.
vpr_list
())
# 识别声纹
recolist
=
vpr
.
vpr_recog
(
wav_path
)
print
(
recolist
)
# 通过 id 获取数据
idx
=
recolist
[
0
][
1
]
print
(
vpr
.
vpr_data
(
idx
))
# 删除声纹
vpr
.
vpr_del
(
username
)
vpr
.
vpr_droptable
()
\ No newline at end of file
demos/speech_web/speech_server/src/SpeechBase/vpr_encode.py
浏览文件 @
12f3532b
from
paddlespeech.cli
import
VectorExecutor
from
paddlespeech.cli
.vector
import
VectorExecutor
import
numpy
as
np
import
logging
...
...
@@ -17,10 +17,4 @@ def get_audio_embedding(path):
logging
.
error
(
f
"Error with embedding:
{
e
}
"
)
return
None
if
__name__
==
'__main__'
:
audio_path
=
r
"../../source/demo/demo_16k.wav"
emb
=
get_audio_embedding
(
audio_path
)
print
(
emb
.
shape
)
print
(
emb
.
dtype
)
print
(
type
(
emb
))
\ No newline at end of file
demos/speech_web/speech_server/src/robot.py
浏览文件 @
12f3532b
...
...
@@ -35,7 +35,7 @@ class Robot:
# asr model初始化
self
.
asr_model
(
asr_init_path
,
model
=
self
.
asr_name
,
lang
=
'zh'
,
sample_rate
=
16000
)
sample_rate
=
16000
,
force_yes
=
True
)
def
speech2text
(
self
,
audio_file
):
...
...
@@ -67,27 +67,4 @@ class Robot:
result
=
self
.
nlp
.
ie
(
text
)
return
result
if
__name__
==
'__main__'
:
tts_config
=
"../PaddleSpeech/demos/streaming_tts_server/conf/tts_online_application.yaml"
asr_config
=
"../PaddleSpeech/demos/streaming_asr_server/conf/ws_conformer_application.yaml"
demo_wav
=
"../source/demo/demo_16k.wav"
ie_model_path
=
"../source/model"
tts_wav
=
"../source/demo/tts.wav"
text
=
"今天天气真不错"
ie_text
=
"今天晚上我从大牛坊出发去三里屯花了六十五块钱"
robot
=
Robot
(
asr_config
,
tts_config
,
asr_init_path
=
demo_wav
)
res
=
robot
.
speech2text
(
demo_wav
)
print
(
res
)
res
=
robot
.
chat
(
text
)
print
(
res
)
print
(
"tts offline"
)
robot
.
text2speech
(
res
,
tts_wav
)
print
(
"ie test"
)
res
=
robot
.
ie
(
ie_text
)
print
(
res
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录