Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
e66233fa
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e66233fa
编写于
1月 26, 2022
作者:
L
liangym
提交者:
GitHub
1月 26, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1386 from lym0302/tts-server
[server] tts server
上级
6bd011d7
299835a8
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
244 addition
and
14 deletion
+244
-14
speechserving/speechserving/conf/tts/tts.yaml
speechserving/speechserving/conf/tts/tts.yaml
+38
-0
speechserving/speechserving/engine/tts/python/tts_engine.py
speechserving/speechserving/engine/tts/python/tts_engine.py
+143
-0
speechserving/speechserving/main.py
speechserving/speechserving/main.py
+7
-4
speechserving/speechserving/restful/api.py
speechserving/speechserving/restful/api.py
+3
-2
speechserving/speechserving/restful/request.py
speechserving/speechserving/restful/request.py
+19
-7
speechserving/speechserving/restful/response.py
speechserving/speechserving/restful/response.py
+34
-1
未找到文件。
speechserving/speechserving/conf/tts/tts.yaml
0 → 100644
浏览文件 @
e66233fa
# This is the parameter configuration file for TTS server.
##################################################################
# TTS SERVER SETTING #
##################################################################
host
:
'
0.0.0.0'
port
:
8692
##################################################################
# ACOUSTIC MODEL SETTING #
# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc',
# 'fastspeech2_ljspeech', 'fastspeech2_aishell3',
# 'fastspeech2_vctk']
##################################################################
am
:
'
fastspeech2_csmsc'
am_config
:
am_ckpt
:
am_stat
:
phones_dict
:
tones_dict
:
speaker_dict
:
spk_id
:
0
##################################################################
# VOCODER SETTING #
# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3',
# 'pwgan_vctk', 'mb_melgan_csmsc']
##################################################################
voc
:
'
pwgan_csmsc'
voc_config
:
voc_ckpt
:
voc_stat
:
##################################################################
# OTHERS #
##################################################################
lang
:
'
zh'
device
:
paddle.get_device()
\ No newline at end of file
speechserving/speechserving/engine/tts/python/tts_engine.py
0 → 100644
浏览文件 @
e66233fa
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
base64
import
librosa
import
numpy
as
np
import
soundfile
as
sf
import
yaml
from
engine.base_engine
import
BaseEngine
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
__all__
=
[
'TTSEngine'
]
class
TTSServerExecutor
(
TTSExecutor
):
def
__init__
(
self
):
super
().
__init__
()
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.tts'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
'--conf'
,
type
=
str
,
default
=
'./conf/tts/tts.yaml'
,
help
=
'Configuration parameters.'
)
class
TTSEngine
(
BaseEngine
):
"""TTS server engine
Args:
metaclass: Defaults to Singleton.
"""
def
__init__
(
self
,
name
=
None
):
"""Initialize TTS server engine
"""
super
(
TTSEngine
,
self
).
__init__
()
self
.
executor
=
TTSServerExecutor
()
config_path
=
self
.
executor
.
parser
.
parse_args
().
conf
with
open
(
config_path
,
'rt'
)
as
f
:
self
.
conf_dict
=
yaml
.
safe_load
(
f
)
self
.
executor
.
_init_from_path
(
am
=
self
.
conf_dict
[
"am"
],
am_config
=
self
.
conf_dict
[
"am_config"
],
am_ckpt
=
self
.
conf_dict
[
"am_ckpt"
],
am_stat
=
self
.
conf_dict
[
"am_stat"
],
phones_dict
=
self
.
conf_dict
[
"phones_dict"
],
tones_dict
=
self
.
conf_dict
[
"tones_dict"
],
speaker_dict
=
self
.
conf_dict
[
"speaker_dict"
],
voc
=
self
.
conf_dict
[
"voc"
],
voc_config
=
self
.
conf_dict
[
"voc_config"
],
voc_ckpt
=
self
.
conf_dict
[
"voc_ckpt"
],
voc_stat
=
self
.
conf_dict
[
"voc_stat"
],
lang
=
self
.
conf_dict
[
"lang"
])
logger
.
info
(
"Initialize TTS server engine successfully."
)
def
postprocess
(
self
,
wav
,
original_fs
:
int
,
target_fs
:
int
=
16000
,
volume
:
float
=
1.0
,
speed
:
float
=
1.0
,
audio_path
:
str
=
None
,
audio_format
:
str
=
"wav"
):
"""Post-processing operations, including speech, volume, sample rate, save audio file
Args:
wav (numpy(float)): Synthesized audio sample points
original_fs (int): original audio sample rate
target_fs (int): target audio sample rate
volume (float): target volume
speed (float): target speed
"""
# transform sample_rate
if
target_fs
==
0
or
target_fs
>
original_fs
:
target_fs
=
original_fs
wav_tar_fs
=
wav
else
:
wav_tar_fs
=
librosa
.
resample
(
np
.
squeeze
(
wav
),
original_fs
,
target_fs
)
# transform volume
wav_vol
=
wav_tar_fs
*
volume
# transform speed
# TODO
target_wav
=
wav_vol
.
reshape
(
-
1
,
1
)
# save audio
if
audio_path
is
not
None
:
sf
.
write
(
audio_path
,
target_wav
,
target_fs
)
logger
.
info
(
'Wave file has been generated: {}'
.
format
(
audio_path
))
# wav to base64
base64_bytes
=
base64
.
b64encode
(
target_wav
)
base64_string
=
base64_bytes
.
decode
(
'utf-8'
)
wav_base64
=
base64_string
return
target_fs
,
wav_base64
def
run
(
self
,
sentence
:
str
,
spk_id
:
int
=
0
,
speed
:
float
=
1.0
,
volume
:
float
=
1.0
,
sample_rate
:
int
=
0
,
save_path
:
str
=
None
,
audio_format
:
str
=
"wav"
):
lang
=
self
.
conf_dict
[
"lang"
]
self
.
executor
.
infer
(
text
=
sentence
,
lang
=
lang
,
am
=
self
.
conf_dict
[
"am"
],
spk_id
=
spk_id
)
target_sample_rate
,
wav_base64
=
self
.
postprocess
(
wav
=
self
.
executor
.
_outputs
[
'wav'
].
numpy
(),
original_fs
=
self
.
executor
.
am_config
.
fs
,
target_fs
=
sample_rate
,
volume
=
volume
,
speed
=
speed
,
audio_path
=
save_path
,
audio_format
=
audio_format
)
return
lang
,
target_sample_rate
,
wav_base64
speechserving/speechserving/main.py
浏览文件 @
e66233fa
...
@@ -15,11 +15,12 @@ import argparse
...
@@ -15,11 +15,12 @@ import argparse
import
uvicorn
import
uvicorn
import
yaml
import
yaml
from
engine.asr.python.asr_engine
import
ASREngine
from
engine.tts.python.tts_engine
import
TTSEngine
from
fastapi
import
FastAPI
from
fastapi
import
FastAPI
from
restful.api
import
router
as
api_router
from
restful.api
import
router
as
api_router
from
utils
.log
import
logger
from
paddlespeech.cli
.log
import
logger
app
=
FastAPI
(
app
=
FastAPI
(
title
=
"PaddleSpeech Serving API"
,
description
=
"Api"
,
version
=
"0.0.1"
)
title
=
"PaddleSpeech Serving API"
,
description
=
"Api"
,
version
=
"0.0.1"
)
...
@@ -31,7 +32,8 @@ def init(args):
...
@@ -31,7 +32,8 @@ def init(args):
app
.
include_router
(
api_router
)
app
.
include_router
(
api_router
)
# engine single
# engine single
ASR_ENGINE
=
ASREngine
(
"asr"
)
TTS_ENGINE
=
TTSEngine
()
# todo others
# todo others
...
@@ -56,7 +58,8 @@ if __name__ == "__main__":
...
@@ -56,7 +58,8 @@ if __name__ == "__main__":
"--config_file"
,
"--config_file"
,
action
=
"store"
,
action
=
"store"
,
help
=
"yaml file of the app"
,
help
=
"yaml file of the app"
,
default
=
"./conf/application.yaml"
)
default
=
"./conf/tts/tts.yaml"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--log_file"
,
"--log_file"
,
action
=
"store"
,
action
=
"store"
,
...
...
speechserving/speechserving/restful/api.py
浏览文件 @
e66233fa
...
@@ -13,9 +13,10 @@
...
@@ -13,9 +13,10 @@
# limitations under the License.
# limitations under the License.
from
fastapi
import
APIRouter
from
fastapi
import
APIRouter
from
.asr_api
import
router
as
asr_router
from
.tts_api
import
router
as
tts_router
from
.tts_api
import
router
as
tts_router
#from .asr_api import router as asr_router
router
=
APIRouter
()
router
=
APIRouter
()
router
.
include_router
(
asr_router
)
#
router.include_router(asr_router)
router
.
include_router
(
tts_router
)
router
.
include_router
(
tts_router
)
speechserving/speechserving/restful/request.py
浏览文件 @
e66233fa
...
@@ -16,7 +16,7 @@ from typing import Optional
...
@@ -16,7 +16,7 @@ from typing import Optional
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
__all__
=
[
'ASRRequest
,
TTSRequest'
]
__all__
=
[
'ASRRequest
'
,
'
TTSRequest'
]
#****************************************************************************************/
#****************************************************************************************/
...
@@ -44,13 +44,25 @@ class ASRRequest(BaseModel):
...
@@ -44,13 +44,25 @@ class ASRRequest(BaseModel):
#************************************ TTS request ***************************************/
#************************************ TTS request ***************************************/
#****************************************************************************************/
#****************************************************************************************/
class
TTSRequest
(
BaseModel
):
class
TTSRequest
(
BaseModel
):
"""
"""TTS request
request body example
request body example
{
{
"audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
"text": "你好,欢迎使用百度飞桨语音合成服务。",
"audio_format": "wav",
"spk_id": 0,
"sample_rate": 16000,
"speed": 1.0,
"lang ": "zh_cn",
"volume": 1.0,
"ptt ":false
"sample_rate": 0,
"tts_audio_path": "./tts.wav",
"audio_format": "wav"
}
}
"""
"""
text
:
str
spk_id
:
int
=
0
speed
:
float
=
1.0
volume
:
float
=
1.0
sample_rate
:
int
=
0
save_path
:
str
=
None
audio_format
:
str
=
"wav"
speechserving/speechserving/restful/response.py
浏览文件 @
e66233fa
...
@@ -16,7 +16,7 @@ from typing import Optional
...
@@ -16,7 +16,7 @@ from typing import Optional
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
__all__
=
[
'ASRResponse'
]
__all__
=
[
'ASRResponse'
,
'TTSResponse'
]
class
Message
(
BaseModel
):
class
Message
(
BaseModel
):
...
@@ -53,3 +53,36 @@ class ASRResponse(BaseModel):
...
@@ -53,3 +53,36 @@ class ASRResponse(BaseModel):
#****************************************************************************************/
#****************************************************************************************/
#************************************ TTS response **************************************/
#************************************ TTS response **************************************/
#****************************************************************************************/
#****************************************************************************************/
class
TTSResult
(
BaseModel
):
lang
:
str
=
"zh"
sample_rate
:
int
spk_id
:
int
=
0
speed
:
float
=
1.0
volume
:
float
=
1.0
save_path
:
str
=
None
audio
:
str
class
TTSResponse
(
BaseModel
):
"""
response example
{
"success": true,
"code": 0,
"message": {
"description": "success"
},
"result": {
"lang": "zh",
"sample_rate": 24000,
"speed": 1.0,
"volume": 1.0,
"audio": "LTI1OTIuNjI1OTUwMzQsOTk2OS41NDk4...",
"save_path": "./tts.wav"
}
}
"""
success
:
bool
code
:
int
message
:
Message
result
:
TTSResult
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录