Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
8ef92a94
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8ef92a94
编写于
3月 10, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify, test=doc
上级
87ec33a6
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
79 addition
and
96 deletion
+79
-96
demos/speech_server/cls_client.sh
demos/speech_server/cls_client.sh
+4
-0
docs/source/released_model.md
docs/source/released_model.md
+1
-1
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+68
-91
paddlespeech/server/engine/tts/paddleinference/tts_engine.py
paddlespeech/server/engine/tts/paddleinference/tts_engine.py
+1
-1
paddlespeech/server/engine/tts/python/tts_engine.py
paddlespeech/server/engine/tts/python/tts_engine.py
+1
-1
paddlespeech/server/restful/response.py
paddlespeech/server/restful/response.py
+2
-1
paddlespeech/server/restful/tts_api.py
paddlespeech/server/restful/tts_api.py
+2
-1
未找到文件。
demos/speech_server/cls_client.sh
0 → 100644
浏览文件 @
8ef92a94
#!/bin/bash
wget
-c
https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
paddlespeech_client cls
--server_ip
127.0.0.1
--port
8090
--input
./zh.wav
--topk
1
docs/source/released_model.md
浏览文件 @
8ef92a94
...
@@ -69,7 +69,7 @@ GE2E + FastSpeech2 | AISHELL-3 |[ge2e-fastspeech2-aishell3](https://github.com/
...
@@ -69,7 +69,7 @@ GE2E + FastSpeech2 | AISHELL-3 |[ge2e-fastspeech2-aishell3](https://github.com/
Model Type | Dataset| Example Link | Pretrained Models | Static Models
Model Type | Dataset| Example Link | Pretrained Models | Static Models
:-------------:| :------------:| :-----: | :-----: | :-----:
:-------------:| :------------:| :-----: | :-----: | :-----:
PANN | Audioset|
[
audioset_tagging_cnn
](
https://github.com/qiuqiangkong/audioset_tagging_cnn
)
|
[
panns_cnn6.pdparams
](
https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams
)
,
[
panns_cnn10.pdparams
](
https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams
)
,
[
panns_cnn14.pdparams
](
https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams
)
|
[
panns_cnn6_static.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz
)
,
[
panns_cnn10_static.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz
)
,
[
panns_cnn14_static.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz
)
PANN | Audioset|
[
audioset_tagging_cnn
](
https://github.com/qiuqiangkong/audioset_tagging_cnn
)
|
[
panns_cnn6.pdparams
](
https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams
)
,
[
panns_cnn10.pdparams
](
https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams
)
,
[
panns_cnn14.pdparams
](
https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams
)
|
[
panns_cnn6_static.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz
)
(
18M
)
,
[
panns_cnn10_static.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz
)(
19M
)
,
[
panns_cnn14_static.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz
)(
289M
)
PANN | ESC-50 |
[
pann-esc50
](
../../examples/esc50/cls0
)
|
[
esc50_cnn6.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn6.tar.gz
)
,
[
esc50_cnn10.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn10.tar.gz
)
,
[
esc50_cnn14.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn14.tar.gz
)
PANN | ESC-50 |
[
pann-esc50
](
../../examples/esc50/cls0
)
|
[
esc50_cnn6.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn6.tar.gz
)
,
[
esc50_cnn10.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn10.tar.gz
)
,
[
esc50_cnn14.tar.gz
](
https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn14.tar.gz
)
## Punctuation Restoration Models
## Punctuation Restoration Models
...
...
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
8ef92a94
...
@@ -70,13 +70,10 @@ class TTSClientExecutor(BaseExecutor):
...
@@ -70,13 +70,10 @@ class TTSClientExecutor(BaseExecutor):
choices
=
[
0
,
8000
,
16000
],
choices
=
[
0
,
8000
,
16000
],
help
=
'Sampling rate, the default is the same as the model'
)
help
=
'Sampling rate, the default is the same as the model'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
'--output'
,
'--output'
,
type
=
str
,
default
=
None
,
help
=
'Synthesized audio file'
)
type
=
str
,
default
=
"./output.wav"
,
help
=
'Synthesized audio file'
)
def
postprocess
(
self
,
response_dict
:
dict
,
outfile
:
str
)
->
float
:
def
postprocess
(
self
,
wav_base64
:
str
,
outfile
:
str
)
->
float
:
wav_base64
=
response_dict
[
"result"
][
"audio"
]
#
wav_base64 = response_dict["result"]["audio"]
audio_data_byte
=
base64
.
b64decode
(
wav_base64
)
audio_data_byte
=
base64
.
b64decode
(
wav_base64
)
# from byte
# from byte
samples
,
sample_rate
=
soundfile
.
read
(
samples
,
sample_rate
=
soundfile
.
read
(
...
@@ -93,37 +90,38 @@ class TTSClientExecutor(BaseExecutor):
...
@@ -93,37 +90,38 @@ class TTSClientExecutor(BaseExecutor):
else
:
else
:
logger
.
error
(
"The format for saving audio only supports wav or pcm"
)
logger
.
error
(
"The format for saving audio only supports wav or pcm"
)
duration
=
len
(
samples
)
/
sample_rate
return
duration
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
args
=
self
.
parser
.
parse_args
(
argv
)
try
:
input_
=
args
.
input
url
=
'http://'
+
args
.
server_ip
+
":"
+
str
(
server_ip
=
args
.
server_ip
args
.
port
)
+
'/paddlespeech/tts'
port
=
args
.
port
request
=
{
spk_id
=
args
.
spk_id
"text"
:
args
.
input
,
speed
=
args
.
speed
"spk_id"
:
args
.
spk_id
,
volume
=
args
.
volume
"speed"
:
args
.
speed
,
sample_rate
=
args
.
sample_rate
"volume"
:
args
.
volume
,
output
=
args
.
output
"sample_rate"
:
args
.
sample_rate
,
"save_path"
:
args
.
output
}
st
=
time
.
time
()
response
=
requests
.
post
(
url
,
json
.
dumps
(
request
))
time_consume
=
time
.
time
()
-
st
response_dict
=
response
.
json
()
duration
=
self
.
postprocess
(
response_dict
,
args
.
output
)
try
:
time_start
=
time
.
time
()
res
=
self
(
input
=
input_
,
server_ip
=
server_ip
,
port
=
port
,
spk_id
=
spk_id
,
speed
=
speed
,
volume
=
volume
,
sample_rate
=
sample_rate
,
output
=
output
)
time_end
=
time
.
time
()
time_consume
=
time_end
-
time_start
response_dict
=
res
.
json
()
logger
.
info
(
response_dict
[
"message"
])
logger
.
info
(
response_dict
[
"message"
])
logger
.
info
(
"Save synthesized audio successfully on %s."
%
logger
.
info
(
"Save synthesized audio successfully on %s."
%
(
output
))
(
args
.
output
))
logger
.
info
(
"Audio duration: %f s."
%
logger
.
info
(
"Audio duration: %f s."
%
(
duration
))
(
response_dict
[
'result'
][
'duration'
]
))
logger
.
info
(
"Response time: %f s."
%
(
time_consume
))
logger
.
info
(
"Response time: %f s."
%
(
time_consume
))
return
True
return
True
except
BaseException
:
except
Exception
as
e
:
logger
.
error
(
"Failed to synthesized audio."
)
logger
.
error
(
"Failed to synthesized audio."
)
return
False
return
False
...
@@ -136,7 +134,7 @@ class TTSClientExecutor(BaseExecutor):
...
@@ -136,7 +134,7 @@ class TTSClientExecutor(BaseExecutor):
speed
:
float
=
1.0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
sample_rate
:
int
=
0
,
output
:
str
=
"./output.wav"
):
output
:
str
=
None
):
"""
"""
Python API to call an executor.
Python API to call an executor.
"""
"""
...
@@ -151,20 +149,11 @@ class TTSClientExecutor(BaseExecutor):
...
@@ -151,20 +149,11 @@ class TTSClientExecutor(BaseExecutor):
"save_path"
:
output
"save_path"
:
output
}
}
try
:
res
=
requests
.
post
(
url
,
json
.
dumps
(
request
))
st
=
time
.
time
()
response_dict
=
res
.
json
()
response
=
requests
.
post
(
url
,
json
.
dumps
(
request
))
if
not
output
:
time_consume
=
time
.
time
()
-
st
self
.
postprocess
(
response_dict
[
"result"
][
"audio"
],
output
)
response_dict
=
response
.
json
()
return
res
duration
=
self
.
postprocess
(
response_dict
,
output
)
print
(
response_dict
[
"message"
])
print
(
"Save synthesized audio successfully on %s."
%
(
output
))
print
(
"Audio duration: %f s."
%
(
duration
))
print
(
"Response time: %f s."
%
(
time_consume
))
print
(
"RTF: %f "
%
(
time_consume
/
duration
))
except
BaseException
:
print
(
"Failed to synthesized audio."
)
@
cli_client_register
(
@
cli_client_register
(
...
@@ -193,24 +182,27 @@ class ASRClientExecutor(BaseExecutor):
...
@@ -193,24 +182,27 @@ class ASRClientExecutor(BaseExecutor):
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
args
=
self
.
parser
.
parse_args
(
argv
)
url
=
'http://'
+
args
.
server_ip
+
":"
+
str
(
input_
=
args
.
input
args
.
port
)
+
'/paddlespeech/asr'
server_ip
=
args
.
server_ip
audio
=
wav2base64
(
args
.
input
)
port
=
args
.
port
data
=
{
sample_rate
=
args
.
sample_rate
"audio"
:
audio
,
lang
=
args
.
lang
"audio_format"
:
args
.
audio_format
,
audio_format
=
args
.
audio_format
"sample_rate"
:
args
.
sample_rate
,
"lang"
:
args
.
lang
,
}
time_start
=
time
.
time
()
try
:
try
:
r
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
time_start
=
time
.
time
()
# ending Timestamp
res
=
self
(
input
=
input_
,
server_ip
=
server_ip
,
port
=
port
,
sample_rate
=
sample_rate
,
lang
=
lang
,
audio_format
=
audio_format
)
time_end
=
time
.
time
()
time_end
=
time
.
time
()
logger
.
info
(
r
.
json
())
logger
.
info
(
r
es
.
json
())
logger
.
info
(
"
time cost
%f s."
%
(
time_end
-
time_start
))
logger
.
info
(
"
Response time
%f s."
%
(
time_end
-
time_start
))
return
True
return
True
except
BaseException
:
except
Exception
as
e
:
logger
.
error
(
"Failed to speech recognition."
)
logger
.
error
(
"Failed to speech recognition."
)
return
False
return
False
...
@@ -234,15 +226,9 @@ class ASRClientExecutor(BaseExecutor):
...
@@ -234,15 +226,9 @@ class ASRClientExecutor(BaseExecutor):
"sample_rate"
:
sample_rate
,
"sample_rate"
:
sample_rate
,
"lang"
:
lang
,
"lang"
:
lang
,
}
}
time_start
=
time
.
time
()
try
:
res
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
r
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
return
res
# ending Timestamp
time_end
=
time
.
time
()
print
(
r
.
json
())
print
(
"time cost %f s."
%
(
time_end
-
time_start
))
except
BaseException
:
print
(
"Failed to speech recognition."
)
@
cli_client_register
(
@
cli_client_register
(
...
@@ -270,22 +256,19 @@ class CLSClientExecutor(BaseExecutor):
...
@@ -270,22 +256,19 @@ class CLSClientExecutor(BaseExecutor):
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
args
=
self
.
parser
.
parse_args
(
argv
)
args
=
self
.
parser
.
parse_args
(
argv
)
url
=
'http://'
+
args
.
server_ip
+
":"
+
str
(
input_
=
args
.
input
args
.
port
)
+
'/paddlespeech/cls'
server_ip
=
args
.
server_ip
audio
=
wav2base64
(
args
.
input
)
port
=
args
.
port
data
=
{
topk
=
args
.
topk
"audio"
:
audio
,
"topk"
:
args
.
topk
,
}
time_start
=
time
.
time
()
try
:
try
:
r
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
)
)
time_start
=
time
.
time
(
)
# ending Timestamp
res
=
self
(
input
=
input_
,
server_ip
=
server_ip
,
port
=
port
,
topk
=
topk
)
time_end
=
time
.
time
()
time_end
=
time
.
time
()
logger
.
info
(
r
.
json
())
logger
.
info
(
r
es
.
json
())
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
logger
.
info
(
"Response time %f s."
%
(
time_end
-
time_start
))
return
True
return
True
except
BaseException
:
except
Exception
as
e
:
logger
.
error
(
"Failed to speech classification."
)
logger
.
error
(
"Failed to speech classification."
)
return
False
return
False
...
@@ -302,12 +285,6 @@ class CLSClientExecutor(BaseExecutor):
...
@@ -302,12 +285,6 @@ class CLSClientExecutor(BaseExecutor):
url
=
'http://'
+
server_ip
+
":"
+
str
(
port
)
+
'/paddlespeech/cls'
url
=
'http://'
+
server_ip
+
":"
+
str
(
port
)
+
'/paddlespeech/cls'
audio
=
wav2base64
(
input
)
audio
=
wav2base64
(
input
)
data
=
{
"audio"
:
audio
,
"topk"
:
topk
}
data
=
{
"audio"
:
audio
,
"topk"
:
topk
}
time_start
=
time
.
time
()
try
:
res
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
r
=
requests
.
post
(
url
=
url
,
data
=
json
.
dumps
(
data
))
return
res
# ending Timestamp
time_end
=
time
.
time
()
print
(
r
.
json
())
print
(
"Response time %f s."
%
(
time_end
-
time_start
))
except
BaseException
:
print
(
"Failed to speech classification."
)
paddlespeech/server/engine/tts/paddleinference/tts_engine.py
浏览文件 @
8ef92a94
...
@@ -531,4 +531,4 @@ class TTSEngine(BaseEngine):
...
@@ -531,4 +531,4 @@ class TTSEngine(BaseEngine):
postprocess_time
))
postprocess_time
))
logger
.
info
(
"RTF: {}"
.
format
(
rtf
))
logger
.
info
(
"RTF: {}"
.
format
(
rtf
))
return
lang
,
target_sample_rate
,
wav_base64
return
lang
,
target_sample_rate
,
duration
,
wav_base64
paddlespeech/server/engine/tts/python/tts_engine.py
浏览文件 @
8ef92a94
...
@@ -250,4 +250,4 @@ class TTSEngine(BaseEngine):
...
@@ -250,4 +250,4 @@ class TTSEngine(BaseEngine):
logger
.
info
(
"RTF: {}"
.
format
(
rtf
))
logger
.
info
(
"RTF: {}"
.
format
(
rtf
))
logger
.
info
(
"device: {}"
.
format
(
self
.
device
))
logger
.
info
(
"device: {}"
.
format
(
self
.
device
))
return
lang
,
target_sample_rate
,
wav_base64
return
lang
,
target_sample_rate
,
duration
,
wav_base64
paddlespeech/server/restful/response.py
浏览文件 @
8ef92a94
...
@@ -54,10 +54,11 @@ class ASRResponse(BaseModel):
...
@@ -54,10 +54,11 @@ class ASRResponse(BaseModel):
#****************************************************************************************/
#****************************************************************************************/
class
TTSResult
(
BaseModel
):
class
TTSResult
(
BaseModel
):
lang
:
str
=
"zh"
lang
:
str
=
"zh"
sample_rate
:
int
spk_id
:
int
=
0
spk_id
:
int
=
0
speed
:
float
=
1.0
speed
:
float
=
1.0
volume
:
float
=
1.0
volume
:
float
=
1.0
sample_rate
:
int
duration
:
float
save_path
:
str
=
None
save_path
:
str
=
None
audio
:
str
audio
:
str
...
...
paddlespeech/server/restful/tts_api.py
浏览文件 @
8ef92a94
...
@@ -98,7 +98,7 @@ def tts(request_body: TTSRequest):
...
@@ -98,7 +98,7 @@ def tts(request_body: TTSRequest):
tts_engine
=
engine_pool
[
'tts'
]
tts_engine
=
engine_pool
[
'tts'
]
logger
.
info
(
"Get tts engine successfully."
)
logger
.
info
(
"Get tts engine successfully."
)
lang
,
target_sample_rate
,
wav_base64
=
tts_engine
.
run
(
lang
,
target_sample_rate
,
duration
,
wav_base64
=
tts_engine
.
run
(
text
,
spk_id
,
speed
,
volume
,
sample_rate
,
save_path
)
text
,
spk_id
,
speed
,
volume
,
sample_rate
,
save_path
)
response
=
{
response
=
{
...
@@ -113,6 +113,7 @@ def tts(request_body: TTSRequest):
...
@@ -113,6 +113,7 @@ def tts(request_body: TTSRequest):
"speed"
:
speed
,
"speed"
:
speed
,
"volume"
:
volume
,
"volume"
:
volume
,
"sample_rate"
:
target_sample_rate
,
"sample_rate"
:
target_sample_rate
,
"duration"
:
duration
,
"save_path"
:
save_path
,
"save_path"
:
save_path
,
"audio"
:
wav_base64
"audio"
:
wav_base64
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录