提交 bb023742 编写于 作者: L lym0302

fix speed, add setup, test=doc

上级 35738988
...@@ -61,6 +61,9 @@ requirements = { ...@@ -61,6 +61,9 @@ requirements = {
"visualdl", "visualdl",
"webrtcvad", "webrtcvad",
"yacs~=0.1.8", "yacs~=0.1.8",
# fastapi server
"fastapi",
"uvicorn",
], ],
"develop": [ "develop": [
"ConfigArgParse", "ConfigArgParse",
......
...@@ -11,4 +11,5 @@ port: 8090 ...@@ -11,4 +11,5 @@ port: 8090
################################################################## ##################################################################
# add engine type (Options: asr, tts) and config file here. # add engine type (Options: asr, tts) and config file here.
engine_backend: engine_backend:
asr: 'conf/asr/asr.yaml' asr: 'conf/asr/asr.yaml'
\ No newline at end of file tts: 'conf/tts/tts.yaml'
...@@ -13,19 +13,18 @@ ...@@ -13,19 +13,18 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
import base64 import base64
import os import io
import random
import librosa import librosa
import numpy as np import numpy as np
import soundfile as sf import soundfile as sf
import yaml import yaml
from engine.base_engine import BaseEngine from engine.base_engine import BaseEngine
from ffmpeg import audio from scipy.io import wavfile
from paddlespeech.cli.log import logger from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor from paddlespeech.cli.tts.infer import TTSExecutor
from utils.audio_types import wav2pcm from utils.audio_process import change_speed
from utils.errors import ErrorCode from utils.errors import ErrorCode
from utils.exception import ServerBaseException from utils.exception import ServerBaseException
...@@ -107,26 +106,27 @@ class TTSEngine(BaseEngine): ...@@ -107,26 +106,27 @@ class TTSEngine(BaseEngine):
wav_vol = wav_tar_fs * volume wav_vol = wav_tar_fs * volume
# transform speed # transform speed
hash = random.getrandbits(128) try: # windows not support soxbindings
temp_wav = str(hash) + ".wav" wav_speed = change_speed(wav_vol, speed, target_fs)
temp_speed_wav = str(hash + 1) + ".wav" except:
sf.write(temp_wav, wav_vol.reshape(-1, 1), target_fs) raise ServerBaseException(
audio.a_speed(temp_wav, speed, temp_speed_wav) ErrorCode.SERVER_INTERNAL_ERR,
os.system("rm %s" % (temp_wav)) "Can not install soxbindings on your system.")
# wav to base64 # wav to base64
with open(temp_speed_wav, 'rb') as f: buf = io.BytesIO()
base64_bytes = base64.b64encode(f.read()) wavfile.write(buf, target_fs, wav_speed)
wav_base64 = base64_bytes.decode('utf-8') base64_bytes = base64.b64encode(buf.read())
wav_base64 = base64_bytes.decode('utf-8')
# save audio # save audio
if audio_path is not None and audio_path.endswith(".wav"): if audio_path is not None and audio_path.endswith(".wav"):
os.system("mv %s %s" % (temp_speed_wav, audio_path)) sf.write(audio_path, wav_speed, target_fs)
elif audio_path is not None and audio_path.endswith(".pcm"): elif audio_path is not None and audio_path.endswith(".pcm"):
wav2pcm(temp_speed_wav, audio_path, data_type=np.int16) wav_norm = wav_speed * (32767 / max(0.001,
os.system("rm %s" % (temp_speed_wav)) np.max(np.abs(wav_speed))))
else: with open(audio_path, "wb") as f:
os.system("rm %s" % (temp_speed_wav)) f.write(wav_norm.astype(np.int16))
return target_fs, wav_base64 return target_fs, wav_base64
......
...@@ -38,3 +38,50 @@ def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000): ...@@ -38,3 +38,50 @@ def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
wavfile.setframerate(sample_rate) wavfile.setframerate(sample_rate)
wavfile.writeframes(pcmdata) wavfile.writeframes(pcmdata)
wavfile.close() wavfile.close()
def change_speed(sample_raw, speed_rate, sample_rate):
"""Change the audio speed by linear interpolation.
Note that this is an in-place transformation.
:param speed_rate: Rate of speed change:
speed_rate > 1.0, speed up the audio;
speed_rate = 1.0, unchanged;
speed_rate < 1.0, slow down the audio;
speed_rate <= 0.0, not allowed, raise ValueError.
:type speed_rate: float
:raises ValueError: If speed_rate <= 0.0.
"""
if speed_rate == 1.0:
return
if speed_rate <= 0:
raise ValueError("speed_rate should be greater than zero.")
# numpy
# old_length = self._samples.shape[0]
# new_length = int(old_length / speed_rate)
# old_indices = np.arange(old_length)
# new_indices = np.linspace(start=0, stop=old_length, num=new_length)
# self._samples = np.interp(new_indices, old_indices, self._samples)
# sox, slow
try:
import soxbindings as sox
except ImportError:
try:
from paddlespeech.s2t.utils import dynamic_pip_install
package = "sox"
dynamic_pip_install.install(package)
package = "soxbindings"
dynamic_pip_install.install(package)
import soxbindings as sox
except Exception:
raise RuntimeError("Can not install soxbindings on your system.")
tfm = sox.Transformer()
tfm.set_globals(multithread=False)
tfm.tempo(speed_rate)
sample_speed = tfm.build_array(
input_array=sample_raw,
sample_rate_in=sample_rate).squeeze(-1).astype(np.float32).copy()
return sample_speed
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册