提交 622d3883 编写于 作者: Y YangZhou

make soundfile compatible

上级 91ce0d87
......@@ -23,7 +23,7 @@ from typing import List
import tqdm
from yacs.config import CfgNode
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.utils.vector_utils import get_chunks
......
......@@ -24,7 +24,7 @@ import random
import tqdm
from yacs.config import CfgNode
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.utils.vector_utils import get_chunks
......
......@@ -26,6 +26,7 @@ from . import utils
from paddlespeech.audio.backends import get_audio_backend
from paddlespeech.audio.backends import list_audio_backends
from paddlespeech.audio.backends import set_audio_backend
from paddlespeech.audio.backends import soundfile_backend
__all__ = [
"io",
......@@ -37,4 +38,5 @@ __all__ = [
"list_audio_backends",
"get_audio_backend",
"set_audio_backend",
"soudfile_backend",
]
......@@ -34,8 +34,9 @@ __all__ = [
'save',
'soudfile_save',
'load',
'soundfile_load',
'info'
'load_old',
'info',
'to_mono'
]
NORMALMIZE_TYPES = ['linear', 'gaussian']
MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
......@@ -122,7 +123,7 @@ def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
return y_out
def soundfile_load(file: os.PathLike,
def soundfile_load_(file: os.PathLike,
offset: Optional[float]=None,
dtype: str='int16',
duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
......@@ -204,7 +205,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
wavfile.write(file, sr, y_out)
def soudfile_load(
def soundfile_load(
file: os.PathLike,
sr: Optional[int]=None,
mono: bool=True,
......@@ -235,7 +236,7 @@ def soudfile_load(
Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
"""
y, r = sound_file_load(file, offset=offset, dtype=dtype, duration=duration)
y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)
if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
raise ParameterError(f'audio file {file} looks empty')
......
......@@ -25,7 +25,7 @@ import yaml
from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio import load
from paddlespeech.audio.soundfile_backend import soundfile_load as load
from paddlespeech.audio.features import LogMelSpectrogram
__all__ = ['CLSExecutor']
......
......@@ -24,7 +24,7 @@ import yaml
from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio import load
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank
__all__ = ['KWSExecutor']
......
......@@ -27,7 +27,7 @@ from yacs.config import CfgNode
from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.vector.io.batch import feature_normalize
from paddlespeech.vector.modules.sid_model import SpeakerIdetification
......
......@@ -18,7 +18,7 @@ import numpy as np
from paddle import inference
from scipy.special import softmax
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.datasets import ESC50
from paddlespeech.audio.features import melspectrogram
......@@ -42,7 +42,7 @@ def extract_features(files: str, **kwargs):
srs = []
max_length = float('-inf')
for file in files:
waveform, sr = load_audio(file, sr=None)
waveform, sr = load_audio(file)
max_length = max(max_length, len(waveform))
waveforms.append(waveform)
srs.append(sr)
......
......@@ -17,7 +17,7 @@ from collections import OrderedDict
import numpy as np
import paddle
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.cli.log import logger
from paddlespeech.cli.vector.infer import VectorExecutor
......
......@@ -18,7 +18,7 @@ import time
import paddle
from yacs.config import CfgNode
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.batch import feature_normalize
......
......@@ -16,7 +16,8 @@ from dataclasses import fields
from paddle.io import Dataset
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()
......
......@@ -17,7 +17,7 @@ from dataclasses import fields
from paddle.io import Dataset
from paddlespeech.audio import load as load_audio
from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.audio.compliance.librosa import mfcc
......
......@@ -18,7 +18,7 @@ import urllib.request
import numpy as np
import paddle
from paddlespeech.audio import load
from paddlespeech.audio.soundfile_backend import soundfile_load as load
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册