Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
777a0262
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
777a0262
编写于
1月 25, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add tts server, test=tts
上级
2a530d49
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
279 addition
and
36 deletion
+279
-36
speechserving/speechserving/conf/tts/tts.yaml
speechserving/speechserving/conf/tts/tts.yaml
+38
-0
speechserving/speechserving/engine/tts/python/tts_engine.py
speechserving/speechserving/engine/tts/python/tts_engine.py
+143
-0
speechserving/speechserving/main.py
speechserving/speechserving/main.py
+33
-9
speechserving/speechserving/restful/api.py
speechserving/speechserving/restful/api.py
+5
-15
speechserving/speechserving/restful/request.py
speechserving/speechserving/restful/request.py
+21
-9
speechserving/speechserving/restful/response.py
speechserving/speechserving/restful/response.py
+39
-3
未找到文件。
speechserving/speechserving/conf/tts/tts.yaml
0 → 100644
浏览文件 @
777a0262
# This is the parameter configuration file for TTS server.
##################################################################
# TTS SERVER SETTING #
##################################################################
host
:
'
0.0.0.0'
port
:
8692
##################################################################
# ACOUSTIC MODEL SETTING #
# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc',
# 'fastspeech2_ljspeech', 'fastspeech2_aishell3',
# 'fastspeech2_vctk']
##################################################################
am
:
'
fastspeech2_csmsc'
am_config
:
am_ckpt
:
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
##################################################################
# VOCODER SETTING #
# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3',
# 'pwgan_vctk', 'mb_melgan_csmsc']
##################################################################
voc
:
'
pwgan_csmsc'
voc_config
:
voc_ckpt
:
voc_stat
:
##################################################################
# OTHERS #
##################################################################
lang
:
'
zh'
device
:
paddle.get_device()
\ No newline at end of file
speechserving/speechserving/engine/tts/python/tts_engine.py
0 → 100644
浏览文件 @
777a0262
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
base64
import
librosa
import
numpy
as
np
import
soundfile
as
sf
import
yaml
from
engine.base_engine
import
BaseEngine
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
__all__
=
[
'TTSEngine'
]
class
TTSServerExecutor
(
TTSExecutor
):
def
__init__
(
self
):
super
().
__init__
()
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.tts'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
'--conf'
,
type
=
str
,
default
=
'./conf/tts/tts.yaml'
,
help
=
'Configuration parameters.'
)
class
TTSEngine
(
BaseEngine
):
"""TTS server engine
Args:
metaclass: Defaults to Singleton.
"""
def
__init__
(
self
,
name
=
None
):
"""Initialize TTS server engine
"""
super
(
TTSEngine
,
self
).
__init__
()
self
.
executor
=
TTSServerExecutor
()
config_path
=
self
.
executor
.
parser
.
parse_args
().
conf
with
open
(
config_path
,
'rt'
)
as
f
:
self
.
conf_dict
=
yaml
.
safe_load
(
f
)
self
.
executor
.
_init_from_path
(
am
=
self
.
conf_dict
[
"am"
],
am_config
=
self
.
conf_dict
[
"am_config"
],
am_ckpt
=
self
.
conf_dict
[
"am_ckpt"
],
am_stat
=
self
.
conf_dict
[
"am_stat"
],
phones_dict
=
self
.
conf_dict
[
"phones_dict"
],
tones_dict
=
self
.
conf_dict
[
"tones_dict"
],
speaker_dict
=
self
.
conf_dict
[
"speaker_dict"
],
voc
=
self
.
conf_dict
[
"voc"
],
voc_config
=
self
.
conf_dict
[
"voc_config"
],
voc_ckpt
=
self
.
conf_dict
[
"voc_ckpt"
],
voc_stat
=
self
.
conf_dict
[
"voc_stat"
],
lang
=
self
.
conf_dict
[
"lang"
])
logger
.
info
(
"Initialize TTS server engine successfully."
)
def
postprocess
(
self
,
wav
,
original_fs
:
int
,
target_fs
:
int
=
16000
,
volume
:
float
=
1.0
,
speed
:
float
=
1.0
,
audio_path
:
str
=
None
,
audio_format
:
str
=
"wav"
):
"""Post-processing operations, including speech, volume, sample rate, save audio file
Args:
wav (numpy(float)): Synthesized audio sample points
original_fs (int): original audio sample rate
target_fs (int): target audio sample rate
volume (float): target volume
speed (float): target speed
"""
# transform sample_rate
if
target_fs
==
0
or
target_fs
>
original_fs
:
target_fs
=
original_fs
wav_tar_fs
=
wav
else
:
wav_tar_fs
=
librosa
.
resample
(
np
.
squeeze
(
wav
),
original_fs
,
target_fs
)
# transform volume
wav_vol
=
wav_tar_fs
*
volume
# transform speed
# TODO
target_wav
=
wav_vol
.
reshape
(
-
1
,
1
)
# save audio
if
audio_path
is
not
None
:
sf
.
write
(
audio_path
,
target_wav
,
target_fs
)
logger
.
info
(
'Wave file has been generated: {}'
.
format
(
audio_path
))
# wav to base64
base64_bytes
=
base64
.
b64encode
(
target_wav
)
base64_string
=
base64_bytes
.
decode
(
'utf-8'
)
wav_base64
=
base64_string
return
target_fs
,
wav_base64
def
run
(
self
,
sentence
:
str
,
spk_id
:
int
=
0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
save_path
:
str
=
None
,
audio_format
:
str
=
"wav"
):
lang
=
self
.
conf_dict
[
"lang"
]
self
.
executor
.
infer
(
text
=
sentence
,
lang
=
lang
,
am
=
self
.
conf_dict
[
"am"
],
spk_id
=
spk_id
)
target_sample_rate
,
wav_base64
=
self
.
postprocess
(
wav
=
self
.
executor
.
_outputs
[
'wav'
].
numpy
(),
original_fs
=
self
.
executor
.
am_config
.
fs
,
target_fs
=
sample_rate
,
volume
=
volume
,
speed
=
speed
,
audio_path
=
save_path
,
audio_format
=
audio_format
)
return
lang
,
target_sample_rate
,
wav_base64
speechserving/speechserving/main.py
浏览文件 @
777a0262
...
@@ -13,31 +13,55 @@
...
@@ -13,31 +13,55 @@
# limitations under the License.
# limitations under the License.
import
argparse
import
argparse
import
asr_api
as
api_run
import
uvicorn
import
tts_api
as
api_run
import
yaml
from
engine.tts.python.tts_engine
import
TTSEngine
from
fastapi
import
FastAPI
from
restful.api
import
router
as
api_router
from
paddlespeech.cli.log
import
logger
app
=
FastAPI
(
title
=
"PaddleSpeech Serving API"
,
description
=
"Api"
,
version
=
"0.0.1"
)
def
init
(
args
):
def
init
(
args
):
""" 系统初始化
""" 系统初始化
"""
"""
app
.
include_router
(
api_router
)
# engine single
TTS_ENGINE
=
TTSEngine
()
# todo others
return
True
def
main
(
args
):
def
main
(
args
):
"""主程序入口"""
"""主程序入口"""
if
init
(
args
):
#TODO configuration
api_run
.
run
()
from
yacs.config
import
CfgNode
app
.
run
(
host
=
'0.0.0.0'
,
port
=
conf
.
port
)
with
open
(
args
.
config_file
,
'rt'
)
as
f
:
config
=
CfgNode
(
yaml
.
safe_load
(
f
))
if
init
(
args
):
uvicorn
.
run
(
app
,
host
=
config
.
host
,
port
=
config
.
port
,
debug
=
True
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--config_file"
,
action
=
"store"
,
parser
.
add_argument
(
help
=
"yaml file of the app"
,
default
=
"./conf/application.yaml"
)
"--config_file"
,
parser
.
add_argument
(
"--log_file"
,
action
=
"store"
,
action
=
"store"
,
help
=
"log file"
,
default
=
"./log/paddlespeech.log"
)
help
=
"yaml file of the app"
,
default
=
"./server.yaml"
)
parser
.
add_argument
(
"--log_file"
,
action
=
"store"
,
help
=
"log file"
,
default
=
"./log/paddlespeech.log"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
main
(
args
)
main
(
args
)
speechserving/speechserving/restful/api.py
浏览文件 @
777a0262
...
@@ -13,19 +13,9 @@
...
@@ -13,19 +13,9 @@
# limitations under the License.
# limitations under the License.
from
fastapi
import
APIRouter
from
fastapi
import
APIRouter
router
=
APIRouter
()
from
.tts_api
import
router
as
tts_router
#from .asr_api import router as asr_router
router
.
include_router
(
auth_router
)
router
.
include_router
(
user_router
)
router
.
include_router
(
profile_router
)
router
.
include_router
(
comment_router
)
router
.
include_router
(
article_router
)
router
.
include_router
(
tag_router
)
router
=
APIRouter
()
#router.include_router(asr_router)
def
init_app
(
app
):
router
.
include_router
(
tts_router
)
app
.
include_router
(
router
)
speechserving/speechserving/restful/request.py
浏览文件 @
777a0262
...
@@ -11,13 +11,13 @@
...
@@ -11,13 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
typing
import
Optional
from
typing
import
List
from
typing
import
List
from
typing
import
Optional
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
__all__
=
[
'ASRRequest'
,
'TTSRequest'
]
__all__
=
[
'ASRRequest, TTSRequest'
]
#****************************************************************************************/
#****************************************************************************************/
#************************************ ASR request ***************************************/
#************************************ ASR request ***************************************/
...
@@ -44,13 +44,25 @@ class ASRRequest(BaseModel):
...
@@ -44,13 +44,25 @@ class ASRRequest(BaseModel):
#************************************ TTS request ***************************************/
#************************************ TTS request ***************************************/
#****************************************************************************************/
#****************************************************************************************/
class
TTSRequest
(
BaseModel
):
class
TTSRequest
(
BaseModel
):
"""
"""TTS request
request body example
request body example
{
{
"audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
"text": "你好,欢迎使用百度飞桨语音合成服务。",
"audio_format": "wav",
"spk_id": 0,
"sample_rate": 16000,
"speed": 1.0,
"lang ": "zh_cn",
"volume": 1.0,
"ptt ":false
"sample_rate": 0,
"tts_audio_path": "./tts.wav",
"audio_format": "wav"
}
}
"""
\ No newline at end of file
"""
text
:
str
spk_id
:
int
=
0
speed
:
float
=
1.0
volume
:
float
=
1.0
sample_rate
:
int
=
0
save_path
:
str
=
None
audio_format
:
str
=
"wav"
speechserving/speechserving/restful/response.py
浏览文件 @
777a0262
...
@@ -11,23 +11,25 @@
...
@@ -11,23 +11,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
typing
import
Optional
from
typing
import
List
from
typing
import
List
from
typing
import
Optional
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
__all__
=
[
'ASRResponse'
]
__all__
=
[
'ASRResponse'
,
'TTSResponse'
]
class
Message
(
BaseModel
):
class
Message
(
BaseModel
):
description
:
str
description
:
str
#****************************************************************************************/
#****************************************************************************************/
#************************************ ASR response **************************************/
#************************************ ASR response **************************************/
#****************************************************************************************/
#****************************************************************************************/
class
AsrResult
(
BaseModel
):
class
AsrResult
(
BaseModel
):
transcription
:
str
transcription
:
str
class
ASRResponse
(
BaseModel
):
class
ASRResponse
(
BaseModel
):
"""
"""
response example
response example
...
@@ -36,7 +38,7 @@ class ASRResponse(BaseModel):
...
@@ -36,7 +38,7 @@ class ASRResponse(BaseModel):
"code": 0,
"code": 0,
"message": {
"message": {
"description": "success"
"description": "success"
}
}
,
"result": {
"result": {
"transcription": "你好,飞桨"
"transcription": "你好,飞桨"
}
}
...
@@ -47,6 +49,40 @@ class ASRResponse(BaseModel):
...
@@ -47,6 +49,40 @@ class ASRResponse(BaseModel):
message
:
Message
message
:
Message
result
:
AsrResult
result
:
AsrResult
#****************************************************************************************/
#****************************************************************************************/
#************************************ TTS response **************************************/
#************************************ TTS response **************************************/
#****************************************************************************************/
#****************************************************************************************/
class
TTSResult
(
BaseModel
):
lang
:
str
=
"zh"
sample_rate
:
int
spk_id
:
int
=
0
speed
:
float
=
1.0
volume
:
float
=
1.0
save_path
:
str
=
None
audio
:
str
class
TTSResponse
(
BaseModel
):
"""
response example
{
"success": true,
"code": 0,
"message": {
"description": "success"
},
"result": {
"lang": "zh",
"sample_rate": 24000,
"speed": 1.0,
"volume": 1.0,
"audio": "LTI1OTIuNjI1OTUwMzQsOTk2OS41NDk4...",
"save_path": "./tts.wav"
}
}
"""
success
:
bool
code
:
int
message
:
Message
result
:
TTSResult
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录