Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
bb023742
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
bb023742
编写于
1月 28, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix speed, add setup, test=doc
上级
35738988
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
110 addition
and
19 deletion
+110
-19
setup.py
setup.py
+3
-0
speechserving/speechserving/conf/application.yaml
speechserving/speechserving/conf/application.yaml
+2
-1
speechserving/speechserving/engine/tts/python/tts_engine.py
speechserving/speechserving/engine/tts/python/tts_engine.py
+18
-18
speechserving/speechserving/utils/audio_process.py
speechserving/speechserving/utils/audio_process.py
+87
-0
未找到文件。
setup.py
浏览文件 @
bb023742
...
...
@@ -61,6 +61,9 @@ requirements = {
"visualdl"
,
"webrtcvad"
,
"yacs~=0.1.8"
,
# fastapi server
"fastapi"
,
"uvicorn"
,
],
"develop"
:
[
"ConfigArgParse"
,
...
...
speechserving/speechserving/conf/application.yaml
浏览文件 @
bb023742
...
...
@@ -12,3 +12,4 @@ port: 8090
# add engine type (Options: asr, tts) and config file here.
engine_backend
:
asr
:
'
conf/asr/asr.yaml'
tts
:
'
conf/tts/tts.yaml'
speechserving/speechserving/engine/tts/python/tts_engine.py
浏览文件 @
bb023742
...
...
@@ -13,19 +13,18 @@
# limitations under the License.
import
argparse
import
base64
import
os
import
random
import
io
import
librosa
import
numpy
as
np
import
soundfile
as
sf
import
yaml
from
engine.base_engine
import
BaseEngine
from
ffmpeg
import
audio
from
scipy.io
import
wavfile
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
utils.audio_
types
import
wav2pcm
from
utils.audio_
process
import
change_speed
from
utils.errors
import
ErrorCode
from
utils.exception
import
ServerBaseException
...
...
@@ -107,26 +106,27 @@ class TTSEngine(BaseEngine):
wav_vol
=
wav_tar_fs
*
volume
# transform speed
hash
=
random
.
getrandbits
(
128
)
temp_wav
=
str
(
hash
)
+
".wav"
temp_speed_wav
=
str
(
hash
+
1
)
+
".wav"
sf
.
write
(
temp_wav
,
wav_vol
.
reshape
(
-
1
,
1
),
target_fs
)
audio
.
a_speed
(
temp_wav
,
speed
,
temp_speed_wav
)
os
.
system
(
"rm %s"
%
(
temp_wav
)
)
try
:
# windows not support soxbindings
wav_speed
=
change_speed
(
wav_vol
,
speed
,
target_fs
)
except
:
raise
ServerBaseException
(
ErrorCode
.
SERVER_INTERNAL_ERR
,
"Can not install soxbindings on your system."
)
# wav to base64
with
open
(
temp_speed_wav
,
'rb'
)
as
f
:
base64_bytes
=
base64
.
b64encode
(
f
.
read
())
buf
=
io
.
BytesIO
()
wavfile
.
write
(
buf
,
target_fs
,
wav_speed
)
base64_bytes
=
base64
.
b64encode
(
buf
.
read
())
wav_base64
=
base64_bytes
.
decode
(
'utf-8'
)
# save audio
if
audio_path
is
not
None
and
audio_path
.
endswith
(
".wav"
):
os
.
system
(
"mv %s %s"
%
(
temp_speed_wav
,
audio_path
)
)
sf
.
write
(
audio_path
,
wav_speed
,
target_fs
)
elif
audio_path
is
not
None
and
audio_path
.
endswith
(
".pcm"
):
wav
2pcm
(
temp_speed_wav
,
audio_path
,
data_type
=
np
.
int16
)
os
.
system
(
"rm %s"
%
(
temp_speed_wav
))
else
:
os
.
system
(
"rm %s"
%
(
temp_speed_wav
))
wav
_norm
=
wav_speed
*
(
32767
/
max
(
0.001
,
np
.
max
(
np
.
abs
(
wav_speed
))
))
with
open
(
audio_path
,
"wb"
)
as
f
:
f
.
write
(
wav_norm
.
astype
(
np
.
int16
))
return
target_fs
,
wav_base64
...
...
speechserving/speechserving/utils/audio_
type
s.py
→
speechserving/speechserving/utils/audio_
proces
s.py
浏览文件 @
bb023742
...
...
@@ -38,3 +38,50 @@ def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
wavfile
.
setframerate
(
sample_rate
)
wavfile
.
writeframes
(
pcmdata
)
wavfile
.
close
()
def
change_speed
(
sample_raw
,
speed_rate
,
sample_rate
):
"""Change the audio speed by linear interpolation.
Note that this is an in-place transformation.
:param speed_rate: Rate of speed change:
speed_rate > 1.0, speed up the audio;
speed_rate = 1.0, unchanged;
speed_rate < 1.0, slow down the audio;
speed_rate <= 0.0, not allowed, raise ValueError.
:type speed_rate: float
:raises ValueError: If speed_rate <= 0.0.
"""
if
speed_rate
==
1.0
:
return
if
speed_rate
<=
0
:
raise
ValueError
(
"speed_rate should be greater than zero."
)
# numpy
# old_length = self._samples.shape[0]
# new_length = int(old_length / speed_rate)
# old_indices = np.arange(old_length)
# new_indices = np.linspace(start=0, stop=old_length, num=new_length)
# self._samples = np.interp(new_indices, old_indices, self._samples)
# sox, slow
try
:
import
soxbindings
as
sox
except
ImportError
:
try
:
from
paddlespeech.s2t.utils
import
dynamic_pip_install
package
=
"sox"
dynamic_pip_install
.
install
(
package
)
package
=
"soxbindings"
dynamic_pip_install
.
install
(
package
)
import
soxbindings
as
sox
except
Exception
:
raise
RuntimeError
(
"Can not install soxbindings on your system."
)
tfm
=
sox
.
Transformer
()
tfm
.
set_globals
(
multithread
=
False
)
tfm
.
tempo
(
speed_rate
)
sample_speed
=
tfm
.
build_array
(
input_array
=
sample_raw
,
sample_rate_in
=
sample_rate
).
squeeze
(
-
1
).
astype
(
np
.
float32
).
copy
()
return
sample_speed
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录