From 622d3883423591f2199d3fc9547d2ac469288203 Mon Sep 17 00:00:00 2001 From: YangZhou Date: Wed, 17 Aug 2022 17:50:11 +0800 Subject: [PATCH] make soundfile compatible --- .../local/make_rirs_noise_csv_dataset_from_json.py | 2 +- .../sv0/local/make_vox_csv_dataset_from_json.py | 2 +- paddlespeech/audio/__init__.py | 2 ++ paddlespeech/audio/backends/soundfile_backend.py | 11 ++++++----- paddlespeech/cli/cls/infer.py | 2 +- paddlespeech/cli/kws/infer.py | 2 +- paddlespeech/cli/vector/infer.py | 2 +- paddlespeech/cls/exps/panns/deploy/predict.py | 4 ++-- .../server/engine/vector/python/vector_engine.py | 2 +- paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py | 2 +- paddlespeech/vector/io/dataset.py | 3 ++- paddlespeech/vector/io/dataset_from_json.py | 2 +- tests/unit/audio/features/base.py | 2 +- 13 files changed, 21 insertions(+), 17 deletions(-) diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 7ad9bd6e..233977ba 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -23,7 +23,7 @@ from typing import List import tqdm from yacs.config import CfgNode -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index 40adf53d..49c234a4 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -24,7 +24,7 @@ import random import tqdm from yacs.config import CfgNode -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py index f79f3d77..0e95377e 100644 --- a/paddlespeech/audio/__init__.py +++ b/paddlespeech/audio/__init__.py @@ -26,6 +26,7 @@ from . import utils from paddlespeech.audio.backends import get_audio_backend from paddlespeech.audio.backends import list_audio_backends from paddlespeech.audio.backends import set_audio_backend +from paddlespeech.audio.backends import soundfile_backend __all__ = [ "io", @@ -37,4 +38,5 @@ __all__ = [ "list_audio_backends", "get_audio_backend", "set_audio_backend", + "soudfile_backend", ] diff --git a/paddlespeech/audio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py index 1afe3dc3..b3421e32 100644 --- a/paddlespeech/audio/backends/soundfile_backend.py +++ b/paddlespeech/audio/backends/soundfile_backend.py @@ -34,8 +34,9 @@ __all__ = [ 'save', 'soudfile_save', 'load', - 'soundfile_load', - 'info' + 'load_old', + 'info', + 'to_mono' ] NORMALMIZE_TYPES = ['linear', 'gaussian'] MERGE_TYPES = ['ch0', 'ch1', 'random', 'average'] @@ -122,7 +123,7 @@ def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray: return y_out -def soundfile_load(file: os.PathLike, +def soundfile_load_(file: os.PathLike, offset: Optional[float]=None, dtype: str='int16', duration: Optional[int]=None) -> Tuple[np.ndarray, int]: @@ -204,7 +205,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None: wavfile.write(file, sr, y_out) -def soudfile_load( +def soundfile_load( file: os.PathLike, sr: Optional[int]=None, mono: bool=True, @@ -235,7 +236,7 @@ def soudfile_load( Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate. """ - y, r = sound_file_load(file, offset=offset, dtype=dtype, duration=duration) + y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration) if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)): raise ParameterError(f'audio file {file} looks empty') diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index c869e28b..5ace7fe0 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -25,7 +25,7 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio import load +from paddlespeech.audio.soundfile_backend import soundfile_load as load from paddlespeech.audio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index 111cfd75..bd15e80e 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -24,7 +24,7 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio import load +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank __all__ = ['KWSExecutor'] diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 7fb7b495..aeaada85 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -27,7 +27,7 @@ from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index 3c58d61c..93eee74b 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -18,7 +18,7 @@ import numpy as np from paddle import inference from scipy.special import softmax -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.datasets import ESC50 from paddlespeech.audio.features import melspectrogram @@ -42,7 +42,7 @@ def extract_features(files: str, **kwargs): srs = [] max_length = float('-inf') for file in files: - waveform, sr = load_audio(file, sr=None) + waveform, sr = load_audio(file) max_length = max(max_length, len(waveform)) waveforms.append(waveform) srs.append(sr) diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index 30979645..cf04d843 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -17,7 +17,7 @@ from collections import OrderedDict import numpy as np import paddle -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index 2d01598c..e9203ef9 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -18,7 +18,7 @@ import time import paddle from yacs.config import CfgNode -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index 245b2959..c9d56b5e 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -16,7 +16,8 @@ from dataclasses import fields from paddle.io import Dataset -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio + from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index 12e84577..32960e45 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -17,7 +17,7 @@ from dataclasses import fields from paddle.io import Dataset -from paddlespeech.audio import load as load_audio +from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.audio.compliance.librosa import mfcc diff --git a/tests/unit/audio/features/base.py b/tests/unit/audio/features/base.py index 6d59f72b..4049b610 100644 --- a/tests/unit/audio/features/base.py +++ b/tests/unit/audio/features/base.py @@ -18,7 +18,7 @@ import urllib.request import numpy as np import paddle -from paddlespeech.audio import load +from paddlespeech.audio.soundfile_backend import soundfile_load as load wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' -- GitLab