diff --git a/paddlespeech/audio/CMakeLists.txt b/paddlespeech/audio/CMakeLists.txt deleted file mode 100644 index c6b43c780deb2af9d26f7c9344d43519c0db9619..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ - -add_subdirectory(third_party) -add_subdirectory(src) diff --git a/paddlespeech/audio/README.md b/paddlespeech/audio/README.md deleted file mode 100644 index fc1e5942c83a9ae9042740cb10f7590851099eaf..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# PaddleAudio - -## Reference -`csrc` code is reference of `torchaudio`. - -```text -BSD 2-Clause License - -Copyright (c) [year], [fullname] - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -``` diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py index ad06603a7f4fd31359a0bb4625edddc85e0ebaf1..a7cf6caafb3eeceee3460df1773992387ddfc0b1 100644 --- a/paddlespeech/audio/__init__.py +++ b/paddlespeech/audio/__init__.py @@ -11,17 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from . import _extension -from . import compliance -from . import datasets -from . import features -from . import functional -from . import io -from . import metric -from . import sox_effects from . import streamdata from . import text from . import transform -from .backends import load -from .backends import save diff --git a/paddlespeech/audio/_extension.py b/paddlespeech/audio/_extension.py deleted file mode 100644 index ac82c06e53d6898bac5fb0e3b42e47ddd49c8964..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/_extension.py +++ /dev/null @@ -1,164 +0,0 @@ -import os -import warnings -from pathlib import Path - -from ._internal import module_utils as _mod_utils # noqa: F401 - - -import contextlib -import ctypes -import os -import sys -import types - -# Query `hasattr` only once. -_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys, - 'setdlopenflags') - - -@contextlib.contextmanager -def dl_open_guard(): - """ - # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html - Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a - shared library to load custom operators. - """ - if _SET_GLOBAL_FLAGS: - old_flags = sys.getdlopenflags() - sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL) - yield - if _SET_GLOBAL_FLAGS: - sys.setdlopenflags(old_flags) - - -def resolve_library_path(path: str) -> str: - return os.path.realpath(path) - - -class _Ops(types.ModuleType): - #__file__ = '_ops.py' - - def __init__(self): - super(_Ops, self).__init__('paddlespeech.ops') - self.loaded_libraries = set() - - def load_library(self, path): - """ - Loads a shared library from the given path into the current process. - This allows dynamically loading custom operators. For this, - you should compile your operator and - the static registration code into a shared library object, and then - call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the - shared object. - After the library is loaded, it is added to the - ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected - for the paths of all libraries loaded using this function. - Args: - path (str): A path to a shared library to load. - """ - path = resolve_library_path(path) - with dl_open_guard(): - # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries - # Import the shared library into the process, thus running its - # static (global) initialization code in order to register custom - # operators with the JIT. - ctypes.CDLL(path) - self.loaded_libraries.add(path) - - -_LIB_DIR = Path(__file__).parent / "lib" - -def _get_lib_path(lib: str): - suffix = "pyd" if os.name == "nt" else "so" - path = _LIB_DIR / f"{lib}.{suffix}" - return path - - -def _load_lib(lib: str) -> bool: - """Load extension module - Note: - In case `paddleaudio` is deployed with `pex` format, the library file - is not in a standard location. - In this case, we expect that `libpaddlleaudio` is available somewhere - in the search path of dynamic loading mechanism, so that importing - `_paddlleaudio` will have library loader find and load `libpaddlleaudio`. - This is the reason why the function should not raising an error when the library - file is not found. - Returns: - bool: - True if the library file is found AND the library loaded without failure. - False if the library file is not found (like in the case where paddlleaudio - is deployed with pex format, thus the shared library file is - in a non-standard location.). - If the library file is found but there is an issue loading the library, - (such as missing dependency) then this function raises the exception as-is. - Raises: - Exception: - If the library file is found, but there is an issue loading the library file, - (when underlying `ctype.DLL` throws an exception), this function will pass - the exception as-is, instead of catching it and returning bool. - The expected case is `OSError` thrown by `ctype.DLL` when a dynamic dependency - is not found. - This behavior was chosen because the expected failure case is not recoverable. - If a dependency is missing, then users have to install it. - """ - path = _get_lib_path(lib) - if not path.exists(): - warnings.warn("lib path is not exists:" + str(path)) - return False - #paddlespeech.audio.ops.load_library(path) - ops.load_library(path) - return True - - -_FFMPEG_INITIALIZED = False - - -def _init_ffmpeg(): - global _FFMPEG_INITIALIZED - if _FFMPEG_INITIALIZED: - return - - if not paddlespeech.audio._paddlleaudio.is_ffmpeg_available(): - raise RuntimeError( - "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio." - ) - - try: - _load_lib("libpaddlleaudio_ffmpeg") - except OSError as err: - raise ImportError( - "FFmpeg libraries are not found. Please install FFmpeg.") from err - - import paddllespeech.audio._paddlleaudio_ffmpeg # noqa - - paddlespeech.audio._paddlleaudio.ffmpeg_init() - if paddlespeech.audio._paddlleaudio.ffmpeg_get_log_level() > 8: - paddlespeech.audio._paddlleaudio.ffmpeg_set_log_level(8) - - _FFMPEG_INITIALIZED = True - - -def _init_extension(): - if not _mod_utils.is_module_available("paddlespeech.audio._paddleaudio"): - warnings.warn("paddlespeech C++ extension is not available.") - return - - _load_lib("libpaddleaudio") - # This import is for initializing the methods registered via PyBind11 - # This has to happen after the base library is loaded - from paddlespeech.audio import _paddleaudio # noqa - - # Because this part is executed as part of `import torchaudio`, we ignore the - # initialization failure. - # If the FFmpeg integration is not properly initialized, then detailed error - # will be raised when client code attempts to import the dedicated feature. - try: - _init_ffmpeg() - except Exception: - pass - - -ops = _Ops() - -_init_extension() diff --git a/paddlespeech/audio/_internal/__init__.py b/paddlespeech/audio/_internal/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/paddlespeech/audio/_internal/module_utils.py b/paddlespeech/audio/_internal/module_utils.py deleted file mode 100644 index d4a308fe7251f48641f927c4689414d64954a842..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/_internal/module_utils.py +++ /dev/null @@ -1,148 +0,0 @@ -import importlib.util -import warnings -from functools import wraps -from typing import Optional - -#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py - - -def is_module_available(*modules: str) -> bool: - r"""Returns if a top-level module with :attr:`name` exists *without** - importing it. This is generally safer than try-catch block around a - `import X`. It avoids third party libraries breaking assumptions of some of - our tests, e.g., setting multiprocessing start method when imported - (see librosa/#747, torchvision/#544). - """ - return all(importlib.util.find_spec(m) is not None for m in modules) - - -def requires_module(*modules: str): - """Decorate function to give error message if invoked without required optional modules. - This decorator is to give better error message to users rather - than raising ``NameError: name 'module' is not defined`` at random places. - """ - missing = [m for m in modules if not is_module_available(m)] - - if not missing: - # fall through. If all the modules are available, no need to decorate - def decorator(func): - return func - - else: - req = f"module: {missing[0]}" if len( - missing) == 1 else f"modules: {missing}" - - def decorator(func): - @wraps(func) - def wrapped(*args, **kwargs): - raise RuntimeError( - f"{func.__module__}.{func.__name__} requires {req}") - - return wrapped - - return decorator - - -def deprecated(direction: str, version: Optional[str]=None): - """Decorator to add deprecation message - Args: - direction (str): Migration steps to be given to users. - version (str or int): The version when the object will be removed - """ - - def decorator(func): - @wraps(func) - def wrapped(*args, **kwargs): - message = ( - f"{func.__module__}.{func.__name__} has been deprecated " - f'and will be removed from {"future" if version is None else version} release. ' - f"{direction}") - warnings.warn(message, stacklevel=2) - return func(*args, **kwargs) - - return wrapped - - return decorator - - -def is_kaldi_available(): - return is_module_available("paddlespeech.audio._paddleaudio") - - -def requires_kaldi(): - if is_kaldi_available(): - - def decorator(func): - return func - - else: - - def decorator(func): - @wraps(func) - def wrapped(*args, **kwargs): - raise RuntimeError( - f"{func.__module__}.{func.__name__} requires kaldi") - - return wrapped - - return decorator - - -def _check_soundfile_importable(): - if not is_module_available("soundfile"): - return False - try: - import soundfile # noqa: F401 - - return True - except Exception: - warnings.warn( - "Failed to import soundfile. 'soundfile' backend is not available.") - return False - - -_is_soundfile_importable = _check_soundfile_importable() - - -def is_soundfile_available(): - return _is_soundfile_importable - - -def requires_soundfile(): - if is_soundfile_available(): - - def decorator(func): - return func - else: - - def decorator(func): - @wraps(func) - def wrapped(*args, **kwargs): - raise RuntimeError( - f"{func.__module__}.{func.__name__} requires soundfile") - - return wrapped - - return decorator - - -def is_sox_available(): - return is_module_available("paddlespeech.audio._paddleaudio") - - -def requires_sox(): - if is_sox_available(): - - def decorator(func): - return func - else: - - def decorator(func): - @wraps(func) - def wrapped(*args, **kwargs): - raise RuntimeError( - f"{func.__module__}.{func.__name__} requires sox") - - return wrapped - - return decorator diff --git a/paddlespeech/audio/_ops.py b/paddlespeech/audio/_ops.py deleted file mode 100644 index 6bcf25fe41d0d9e29a3d11255cd6ec28a6635e88..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/_ops.py +++ /dev/null @@ -1,63 +0,0 @@ -import contextlib -import ctypes -import os -import sys -import types - -# Query `hasattr` only once. -_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys, - 'setdlopenflags') - - -@contextlib.contextmanager -def dl_open_guard(): - """ - # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html - Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a - shared library to load custom operators. - """ - if _SET_GLOBAL_FLAGS: - old_flags = sys.getdlopenflags() - sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL) - yield - if _SET_GLOBAL_FLAGS: - sys.setdlopenflags(old_flags) - - -def resolve_library_path(path: str) -> str: - return os.path.realpath(path) - - -class _Ops(types.ModuleType): - __file__ = '_ops.py' - - def __init__(self): - super(_Ops, self).__init__('paddlespeech.ops') - self.loaded_libraries = set() - - def load_library(self, path): - """ - Loads a shared library from the given path into the current process. - This allows dynamically loading custom operators. For this, - you should compile your operator and - the static registration code into a shared library object, and then - call ``paddlespeech.ops.load_library('path/to/libcustom.so')`` to load the - shared object. - After the library is loaded, it is added to the - ``paddlespeech.ops.loaded_libraries`` attribute, a set that may be inspected - for the paths of all libraries loaded using this function. - Args: - path (str): A path to a shared library to load. - """ - path = resolve_library_path(path) - with dl_open_guard(): - # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries - # Import the shared library into the process, thus running its - # static (global) initialization code in order to register custom - # operators with the JIT. - ctypes.CDLL(path) - self.loaded_libraries.add(path) - - -# The ops "namespace" -ops = _Ops() diff --git a/paddlespeech/audio/backends/__init__.py b/paddlespeech/audio/backends/__init__.py deleted file mode 100644 index 6c73ca43c37bb46d5bf77d7e686bf5ceab40c4ce..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/backends/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# flake8: noqa -from . import utils -from .utils import get_audio_backend -from .utils import list_audio_backends -from .utils import set_audio_backend diff --git a/paddlespeech/audio/backends/common.py b/paddlespeech/audio/backends/common.py deleted file mode 100644 index 7ccab1d3389e225c48478d40ba9ef7f85c03617f..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/backends/common.py +++ /dev/null @@ -1,55 +0,0 @@ -# code from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py - -class AudioMetaData: - """Return type of ``torchaudio.info`` function. - - This class is used by :ref:`"sox_io" backend` and - :ref:`"soundfile" backend with the new interface`. - - :ivar int sample_rate: Sample rate - :ivar int num_frames: The number of frames - :ivar int num_channels: The number of channels - :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats, - or when it cannot be accurately inferred. - :ivar str encoding: Audio encoding - The values encoding can take are one of the following: - - * ``PCM_S``: Signed integer linear PCM - * ``PCM_U``: Unsigned integer linear PCM - * ``PCM_F``: Floating point linear PCM - * ``FLAC``: Flac, Free Lossless Audio Codec - * ``ULAW``: Mu-law - * ``ALAW``: A-law - * ``MP3`` : MP3, MPEG-1 Audio Layer III - * ``VORBIS``: OGG Vorbis - * ``AMR_WB``: Adaptive Multi-Rate - * ``AMR_NB``: Adaptive Multi-Rate Wideband - * ``OPUS``: Opus - * ``HTK``: Single channel 16-bit PCM - * ``UNKNOWN`` : None of above - """ - - def __init__( - self, - sample_rate: int, - num_frames: int, - num_channels: int, - bits_per_sample: int, - encoding: str, - ): - self.sample_rate = sample_rate - self.num_frames = num_frames - self.num_channels = num_channels - self.bits_per_sample = bits_per_sample - self.encoding = encoding - - def __str__(self): - return ( - f"AudioMetaData(" - f"sample_rate={self.sample_rate}, " - f"num_frames={self.num_frames}, " - f"num_channels={self.num_channels}, " - f"bits_per_sample={self.bits_per_sample}, " - f"encoding={self.encoding}" - f")" - ) diff --git a/paddlespeech/audio/backends/no_backend.py b/paddlespeech/audio/backends/no_backend.py deleted file mode 100644 index 157536f46e73c1b8911ed61f40ecb730a6af41dc..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/backends/no_backend.py +++ /dev/null @@ -1,32 +0,0 @@ -from pathlib import Path -from typing import Callable -from typing import Optional -from typing import Tuple -from typing import Union - -from paddle import Tensor - -#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py - - -def load( - filepath: Union[str, Path], - out: Optional[Tensor]=None, - normalization: Union[bool, float, Callable]=True, - channels_first: bool=True, - num_frames: int=0, - offset: int=0, - filetype: Optional[str]=None, ) -> Tuple[Tensor, int]: - raise RuntimeError("No audio I/O backend is available.") - - -def save(filepath: str, - src: Tensor, - sample_rate: int, - precision: int=16, - channels_first: bool=True) -> None: - raise RuntimeError("No audio I/O backend is available.") - - -def info(filepath: str) -> None: - raise RuntimeError("No audio I/O backend is available.") diff --git a/paddlespeech/audio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py deleted file mode 100644 index 57e06e521cb3e4de137e062610b58839d19b6b78..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/backends/soundfile_backend.py +++ /dev/null @@ -1,662 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import warnings -from typing import Optional -from typing import Tuple - -import numpy as np -import paddle -import resampy -import soundfile -from scipy.io import wavfile - -from ..utils import depth_convert -from ..utils import ParameterError -from .common import AudioMetaData - -__all__ = [ - 'resample', - 'to_mono', - 'normalize', - 'save', - 'soundfile_save', - 'load', - 'soundfile_load', - 'info', - 'to_mono' -] -NORMALMIZE_TYPES = ['linear', 'gaussian'] -MERGE_TYPES = ['ch0', 'ch1', 'random', 'average'] -RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast'] -EPS = 1e-8 - - -def resample(y: np.ndarray, - src_sr: int, - target_sr: int, - mode: str='kaiser_fast') -> np.ndarray: - """Audio resampling. - - Args: - y (np.ndarray): Input waveform array in 1D or 2D. - src_sr (int): Source sample rate. - target_sr (int): Target sample rate. - mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'. - - Returns: - np.ndarray: `y` resampled to `target_sr` - """ - - if mode == 'kaiser_best': - warnings.warn( - f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \ - we recommend the mode kaiser_fast in large scale audio trainning') - - if not isinstance(y, np.ndarray): - raise ParameterError( - 'Only support numpy np.ndarray, but received y in {type(y)}') - - if mode not in RESAMPLE_MODES: - raise ParameterError(f'resample mode must in {RESAMPLE_MODES}') - - return resampy.resample(y, src_sr, target_sr, filter=mode) - - -def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray: - """Convert sterior audio to mono. - - Args: - y (np.ndarray): Input waveform array in 1D or 2D. - merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'. - - Returns: - np.ndarray: `y` with mono channel. - """ - - if merge_type not in MERGE_TYPES: - raise ParameterError( - f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}' - ) - if y.ndim > 2: - raise ParameterError( - f'Unsupported audio array, y.ndim > 2, the shape is {y.shape}') - if y.ndim == 1: # nothing to merge - return y - - if merge_type == 'ch0': - return y[0] - if merge_type == 'ch1': - return y[1] - if merge_type == 'random': - return y[np.random.randint(0, 2)] - - # need to do averaging according to dtype - - if y.dtype == 'float32': - y_out = (y[0] + y[1]) * 0.5 - elif y.dtype == 'int16': - y_out = y.astype('int32') - y_out = (y_out[0] + y_out[1]) // 2 - y_out = np.clip(y_out, np.iinfo(y.dtype).min, - np.iinfo(y.dtype).max).astype(y.dtype) - - elif y.dtype == 'int8': - y_out = y.astype('int16') - y_out = (y_out[0] + y_out[1]) // 2 - y_out = np.clip(y_out, np.iinfo(y.dtype).min, - np.iinfo(y.dtype).max).astype(y.dtype) - else: - raise ParameterError(f'Unsupported dtype: {y.dtype}') - return y_out - - -def soundfile_load_(file: os.PathLike, - offset: Optional[float]=None, - dtype: str='int16', - duration: Optional[int]=None) -> Tuple[np.ndarray, int]: - """Load audio using soundfile library. This function load audio file using libsndfile. - - Args: - file (os.PathLike): File of waveform. - offset (Optional[float], optional): Offset to the start of waveform. Defaults to None. - dtype (str, optional): Data type of waveform. Defaults to 'int16'. - duration (Optional[int], optional): Duration of waveform to read. Defaults to None. - - Returns: - Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate. - """ - with soundfile.SoundFile(file) as sf_desc: - sr_native = sf_desc.samplerate - if offset: - sf_desc.seek(int(offset * sr_native)) - if duration is not None: - frame_duration = int(duration * sr_native) - else: - frame_duration = -1 - y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T - - return y, sf_desc.samplerate - - -def normalize(y: np.ndarray, norm_type: str='linear', - mul_factor: float=1.0) -> np.ndarray: - """Normalize an input audio with additional multiplier. - - Args: - y (np.ndarray): Input waveform array in 1D or 2D. - norm_type (str, optional): Type of normalization. Defaults to 'linear'. - mul_factor (float, optional): Scaling factor. Defaults to 1.0. - - Returns: - np.ndarray: `y` after normalization. - """ - - if norm_type == 'linear': - amax = np.max(np.abs(y)) - factor = 1.0 / (amax + EPS) - y = y * factor * mul_factor - elif norm_type == 'gaussian': - amean = np.mean(y) - astd = np.std(y) - astd = max(astd, EPS) - y = mul_factor * (y - amean) / astd - else: - raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}') - - return y - - -def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None: - """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16. - - Args: - y (np.ndarray): Input waveform array in 1D or 2D. - sr (int): Sample rate. - file (os.PathLike): Path of auido file to save. - """ - if not file.endswith('.wav'): - raise ParameterError( - f'only .wav file supported, but dst file name is: {file}') - - if sr <= 0: - raise ParameterError( - f'Sample rate should be larger than 0, recieved sr = {sr}') - - if y.dtype not in ['int16', 'int8']: - warnings.warn( - f'input data type is {y.dtype}, will convert data to int16 format before saving' - ) - y_out = depth_convert(y, 'int16') - else: - y_out = y - - wavfile.write(file, sr, y_out) - -def soundfile_load( - file: os.PathLike, - sr: Optional[int]=None, - mono: bool=True, - merge_type: str='average', # ch0,ch1,random,average - normal: bool=True, - norm_type: str='linear', - norm_mul_factor: float=1.0, - offset: float=0.0, - duration: Optional[int]=None, - dtype: str='float32', - resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]: - """Load audio file from disk. This function loads audio from disk using using audio beackend. - - Args: - file (os.PathLike): Path of auido file to load. - sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None. - mono (bool, optional): Return waveform with mono channel. Defaults to True. - merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'. - normal (bool, optional): Waveform normalization. Defaults to True. - norm_type (str, optional): Type of normalization. Defaults to 'linear'. - norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0. - offset (float, optional): Offset to the start of waveform. Defaults to 0.0. - duration (Optional[int], optional): Duration of waveform to read. Defaults to None. - dtype (str, optional): Data type of waveform. Defaults to 'float32'. - resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'. - - Returns: - Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate. - """ - - y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration) - - if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)): - raise ParameterError(f'audio file {file} looks empty') - - if mono: - y = to_mono(y, merge_type) - - if sr is not None and sr != r: - y = resample(y, r, sr, mode=resample_mode) - r = sr - - if normal: - y = normalize(y, norm_type, norm_mul_factor) - elif dtype in ['int8', 'int16']: - # still need to do normalization, before depth convertion - y = normalize(y, 'linear', 1.0) - - y = depth_convert(y, dtype) - return y, r - -#the code below is form: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py - -def _get_subtype_for_wav(dtype: paddle.dtype, encoding: str, bits_per_sample: int): - if not encoding: - if not bits_per_sample: - subtype = { - paddle.uint8: "PCM_U8", - paddle.int16: "PCM_16", - paddle.int32: "PCM_32", - paddle.float32: "FLOAT", - paddle.float64: "DOUBLE", - }.get(dtype) - if not subtype: - raise ValueError(f"Unsupported dtype for wav: {dtype}") - return subtype - if bits_per_sample == 8: - return "PCM_U8" - return f"PCM_{bits_per_sample}" - if encoding == "PCM_S": - if not bits_per_sample: - return "PCM_32" - if bits_per_sample == 8: - raise ValueError("wav does not support 8-bit signed PCM encoding.") - return f"PCM_{bits_per_sample}" - if encoding == "PCM_U": - if bits_per_sample in (None, 8): - return "PCM_U8" - raise ValueError("wav only supports 8-bit unsigned PCM encoding.") - if encoding == "PCM_F": - if bits_per_sample in (None, 32): - return "FLOAT" - if bits_per_sample == 64: - return "DOUBLE" - raise ValueError("wav only supports 32/64-bit float PCM encoding.") - if encoding == "ULAW": - if bits_per_sample in (None, 8): - return "ULAW" - raise ValueError("wav only supports 8-bit mu-law encoding.") - if encoding == "ALAW": - if bits_per_sample in (None, 8): - return "ALAW" - raise ValueError("wav only supports 8-bit a-law encoding.") - raise ValueError(f"wav does not support {encoding}.") - - -def _get_subtype_for_sphere(encoding: str, bits_per_sample: int): - if encoding in (None, "PCM_S"): - return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32" - if encoding in ("PCM_U", "PCM_F"): - raise ValueError(f"sph does not support {encoding} encoding.") - if encoding == "ULAW": - if bits_per_sample in (None, 8): - return "ULAW" - raise ValueError("sph only supports 8-bit for mu-law encoding.") - if encoding == "ALAW": - return "ALAW" - raise ValueError(f"sph does not support {encoding}.") - - -def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sample: int): - if format == "wav": - return _get_subtype_for_wav(dtype, encoding, bits_per_sample) - if format == "flac": - if encoding: - raise ValueError("flac does not support encoding.") - if not bits_per_sample: - return "PCM_16" - if bits_per_sample > 24: - raise ValueError("flac does not support bits_per_sample > 24.") - return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}" - if format in ("ogg", "vorbis"): - if encoding or bits_per_sample: - raise ValueError("ogg/vorbis does not support encoding/bits_per_sample.") - return "VORBIS" - if format == "sph": - return _get_subtype_for_sphere(encoding, bits_per_sample) - if format in ("nis", "nist"): - return "PCM_16" - raise ValueError(f"Unsupported format: {format}") - -def save( - filepath: str, - src: paddle.Tensor, - sample_rate: int, - channels_first: bool = True, - compression: Optional[float] = None, - format: Optional[str] = None, - encoding: Optional[str] = None, - bits_per_sample: Optional[int] = None, -): - """Save audio data to file. - - Note: - The formats this function can handle depend on the soundfile installation. - This function is tested on the following formats; - - * WAV - - * 32-bit floating-point - * 32-bit signed integer - * 16-bit signed integer - * 8-bit unsigned integer - - * FLAC - * OGG/VORBIS - * SPHERE - - Note: - ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts - ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend, - - Args: - filepath (str or pathlib.Path): Path to audio file. - src (paddle.Tensor): Audio data to save. must be 2D tensor. - sample_rate (int): sampling rate - channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`, - otherwise `[time, channel]`. - compression (float of None, optional): Not used. - It is here only for interface compatibility reson with "sox_io" backend. - format (str or None, optional): Override the audio format. - When ``filepath`` argument is path-like object, audio format is - inferred from file extension. If the file extension is missing or - different, you can specify the correct format with this argument. - - When ``filepath`` argument is file-like object, - this argument is required. - - Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``, - ``"flac"`` and ``"sph"``. - encoding (str or None, optional): Changes the encoding for supported formats. - This argument is effective only for supported formats, sush as - ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are; - - - ``"PCM_S"`` (signed integer Linear PCM) - - ``"PCM_U"`` (unsigned integer Linear PCM) - - ``"PCM_F"`` (floating point PCM) - - ``"ULAW"`` (mu-law) - - ``"ALAW"`` (a-law) - - bits_per_sample (int or None, optional): Changes the bit depth for the - supported formats. - When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``, - you can change the bit depth. - Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``. - - Supported formats/encodings/bit depth/compression are: - - ``"wav"`` - - 32-bit floating-point PCM - - 32-bit signed integer PCM - - 24-bit signed integer PCM - - 16-bit signed integer PCM - - 8-bit unsigned integer PCM - - 8-bit mu-law - - 8-bit a-law - - Note: - Default encoding/bit depth is determined by the dtype of - the input Tensor. - - ``"flac"`` - - 8-bit - - 16-bit (default) - - 24-bit - - ``"ogg"``, ``"vorbis"`` - - Doesn't accept changing configuration. - - ``"sph"`` - - 8-bit signed integer PCM - - 16-bit signed integer PCM - - 24-bit signed integer PCM - - 32-bit signed integer PCM (default) - - 8-bit mu-law - - 8-bit a-law - - 16-bit a-law - - 24-bit a-law - - 32-bit a-law - - """ - if src.ndim != 2: - raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.") - if compression is not None: - warnings.warn( - '`save` function of "soundfile" backend does not support "compression" parameter. ' - "The argument is silently ignored." - ) - if hasattr(filepath, "write"): - if format is None: - raise RuntimeError("`format` is required when saving to file object.") - ext = format.lower() - else: - ext = str(filepath).split(".")[-1].lower() - - if bits_per_sample not in (None, 8, 16, 24, 32, 64): - raise ValueError("Invalid bits_per_sample.") - if bits_per_sample == 24: - warnings.warn( - "Saving audio with 24 bits per sample might warp samples near -1. " - "Using 16 bits per sample might be able to avoid this." - ) - subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample) - - # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format, - # so we extend the extensions manually here - if ext in ["nis", "nist", "sph"] and format is None: - format = "NIST" - - if channels_first: - src = src.t() - - soundfile.write(file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format) - -_SUBTYPE2DTYPE = { - "PCM_S8": "int8", - "PCM_U8": "uint8", - "PCM_16": "int16", - "PCM_32": "int32", - "FLOAT": "float32", - "DOUBLE": "float64", -} - -def load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[paddle.Tensor, int]: - """Load audio data from file. - - Note: - The formats this function can handle depend on the soundfile installation. - This function is tested on the following formats; - - * WAV - - * 32-bit floating-point - * 32-bit signed integer - * 16-bit signed integer - * 8-bit unsigned integer - - * FLAC - * OGG/VORBIS - * SPHERE - - By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with - ``float32`` dtype and the shape of `[channel, time]`. - The samples are normalized to fit in the range of ``[-1.0, 1.0]``. - - When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit - signed integer and 8-bit unsigned integer (24-bit signed integer is not supported), - by providing ``normalize=False``, this function can return integer Tensor, where the samples - are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor - for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. - - ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as - ``flac`` and ``mp3``. - For these formats, this function always returns ``float32`` Tensor with values normalized to - ``[-1.0, 1.0]``. - - Note: - ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts - ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend. - - Args: - filepath (path-like object or file-like object): - Source of audio data. - frame_offset (int, optional): - Number of frames to skip before start reading data. - num_frames (int, optional): - Maximum number of frames to read. ``-1`` reads all the remaining samples, - starting from ``frame_offset``. - This function may return the less number of frames if there is not enough - frames in the given file. - normalize (bool, optional): - When ``True``, this function always return ``float32``, and sample values are - normalized to ``[-1.0, 1.0]``. - If input file is integer WAV, giving ``False`` will change the resulting Tensor type to - integer type. - This argument has no effect for formats other than integer WAV type. - channels_first (bool, optional): - When True, the returned Tensor has dimension `[channel, time]`. - Otherwise, the returned Tensor's dimension is `[time, channel]`. - format (str or None, optional): - Not used. PySoundFile does not accept format hint. - - Returns: - (paddle.Tensor, int): Resulting Tensor and sample rate. - If the input file has integer wav format and normalization is off, then it has - integer type, else ``float32`` type. If ``channels_first=True``, it has - `[channel, time]` else `[time, channel]`. - """ - with soundfile.SoundFile(filepath, "r") as file_: - if file_.format != "WAV" or normalize: - dtype = "float32" - elif file_.subtype not in _SUBTYPE2DTYPE: - raise ValueError(f"Unsupported subtype: {file_.subtype}") - else: - dtype = _SUBTYPE2DTYPE[file_.subtype] - - frames = file_._prepare_read(frame_offset, None, num_frames) - waveform = file_.read(frames, dtype, always_2d=True) - sample_rate = file_.samplerate - - waveform = paddle.to_tensor(waveform) - if channels_first: - waveform = paddle.transpose(waveform, perm=[1,0]) - return waveform, sample_rate - - -# Mapping from soundfile subtype to number of bits per sample. -# This is mostly heuristical and the value is set to 0 when it is irrelevant -# (lossy formats) or when it can't be inferred. -# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard: -# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony, -# the default seems to be 8 bits but it can be compressed further to 4 bits. -# The dict is inspired from -# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94 -_SUBTYPE_TO_BITS_PER_SAMPLE = { - "PCM_S8": 8, # Signed 8 bit data - "PCM_16": 16, # Signed 16 bit data - "PCM_24": 24, # Signed 24 bit data - "PCM_32": 32, # Signed 32 bit data - "PCM_U8": 8, # Unsigned 8 bit data (WAV and RAW only) - "FLOAT": 32, # 32 bit float data - "DOUBLE": 64, # 64 bit float data - "ULAW": 8, # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types - "ALAW": 8, # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types - "IMA_ADPCM": 0, # IMA ADPCM. - "MS_ADPCM": 0, # Microsoft ADPCM. - "GSM610": 0, # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate) - "VOX_ADPCM": 0, # OKI / Dialogix ADPCM - "G721_32": 0, # 32kbs G721 ADPCM encoding. - "G723_24": 0, # 24kbs G723 ADPCM encoding. - "G723_40": 0, # 40kbs G723 ADPCM encoding. - "DWVW_12": 12, # 12 bit Delta Width Variable Word encoding. - "DWVW_16": 16, # 16 bit Delta Width Variable Word encoding. - "DWVW_24": 24, # 24 bit Delta Width Variable Word encoding. - "DWVW_N": 0, # N bit Delta Width Variable Word encoding. - "DPCM_8": 8, # 8 bit differential PCM (XI only) - "DPCM_16": 16, # 16 bit differential PCM (XI only) - "VORBIS": 0, # Xiph Vorbis encoding. (lossy) - "ALAC_16": 16, # Apple Lossless Audio Codec (16 bit). - "ALAC_20": 20, # Apple Lossless Audio Codec (20 bit). - "ALAC_24": 24, # Apple Lossless Audio Codec (24 bit). - "ALAC_32": 32, # Apple Lossless Audio Codec (32 bit). -} - -def _get_bit_depth(subtype): - if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE: - warnings.warn( - f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample " - "attribute will be set to 0. If you are seeing this warning, please " - "report by opening an issue on github (after checking for existing/closed ones). " - "You may otherwise ignore this warning." - ) - return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0) - -_SUBTYPE_TO_ENCODING = { - "PCM_S8": "PCM_S", - "PCM_16": "PCM_S", - "PCM_24": "PCM_S", - "PCM_32": "PCM_S", - "PCM_U8": "PCM_U", - "FLOAT": "PCM_F", - "DOUBLE": "PCM_F", - "ULAW": "ULAW", - "ALAW": "ALAW", - "VORBIS": "VORBIS", -} - -def _get_encoding(format: str, subtype: str): - if format == "FLAC": - return "FLAC" - return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN") - -def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: - """Get signal information of an audio file. - - Note: - ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts - ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend, - - Args: - filepath (path-like object or file-like object): - Source of audio data. - format (str or None, optional): - Not used. PySoundFile does not accept format hint. - - Returns: - AudioMetaData: meta data of the given audio. - - """ - sinfo = soundfile.info(filepath) - return AudioMetaData( - sinfo.samplerate, - sinfo.frames, - sinfo.channels, - bits_per_sample=_get_bit_depth(sinfo.subtype), - encoding=_get_encoding(sinfo.format, sinfo.subtype), - ) diff --git a/paddlespeech/audio/backends/sox_io_backend.py b/paddlespeech/audio/backends/sox_io_backend.py deleted file mode 100644 index fff9e2069678475f311cc88d00135ef051a22643..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/backends/sox_io_backend.py +++ /dev/null @@ -1,101 +0,0 @@ -from pathlib import Path -from typing import Callable -from typing import Optional, Tuple, Union - -import paddle -from paddle import Tensor -from .common import AudioMetaData -import os - -from paddlespeech.audio._internal import module_utils as _mod_utils -from paddlespeech.audio import _paddleaudio as paddleaudio - -#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py - -def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData: - raise RuntimeError("Failed to fetch metadata from {}".format(filepath)) - - -def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData: - raise RuntimeError("Failed to fetch metadata from {}".format(fileobj)) - - -# Note: need to comply TorchScript syntax -- need annotation and no f-string -def _fail_load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[Tensor, int]: - raise RuntimeError("Failed to load audio from {}".format(filepath)) - - -def _fail_load_fileobj(fileobj, *args, **kwargs): - raise RuntimeError(f"Failed to load audio from {fileobj}") - -_fallback_info = _fail_info -_fallback_info_fileobj = _fail_info_fileobj -_fallback_load = _fail_load -_fallback_load_filebj = _fail_load_fileobj - -@_mod_utils.requires_sox() -def load( - filepath: str, - frame_offset: int = 0, - num_frames: int=-1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str]=None, ) -> Tuple[Tensor, int]: - if hasattr(filepath, "read"): - ret = paddleaudio.load_audio_fileobj( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) - if ret is not None: - audio_tensor = paddle.to_tensor(ret[0]) - return (audio_tensor, ret[1]) - return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) - filepath = os.fspath(filepath) - ret = paddleaudio.sox_io_load_audio_file( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) - if ret is not None: - audio_tensor = paddle.to_tensor(ret[0]) - return (audio_tensor, ret[1]) - return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format) - - -@_mod_utils.requires_sox() -def save(filepath: str, - src: Tensor, - sample_rate: int, - channels_first: bool = True, - compression: Optional[float] = None, - format: Optional[str] = None, - encoding: Optional[str] = None, - bits_per_sample: Optional[int] = None, -): - src_arr = src.numpy() - if hasattr(filepath, "write"): - paddleaudio.save_audio_fileobj( - filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample - ) - return - filepath = os.fspath(filepath) - paddleaudio.sox_io_save_audio_file( - filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample - ) - -@_mod_utils.requires_sox() -def info(filepath: str, format: Optional[str] = None,) -> AudioMetaData: - if hasattr(filepath, "read"): - sinfo = paddleaudio.get_info_fileobj(filepath, format) - if sinfo is not None: - return AudioMetaData(*sinfo) - return _fallback_info_fileobj(filepath, format) - filepath = os.fspath(filepath) - sinfo = paddleaudio.get_info_file(filepath, format) - if sinfo is not None: - return AudioMetaData(*sinfo) - return _fallback_info(filepath, format) diff --git a/paddlespeech/audio/backends/utils.py b/paddlespeech/audio/backends/utils.py deleted file mode 100644 index 9ea2eaca750eeae8499e94b54c90d5a0cf8065ed..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/backends/utils.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Defines utilities for switching audio backends""" -#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py - -import warnings -from typing import List -from typing import Optional - -import paddlespeech.audio -from paddlespeech.audio._internal import module_utils as _mod_utils - -from . import no_backend, soundfile_backend, sox_io_backend - -__all__ = [ - "list_audio_backends", - "get_audio_backend", - "set_audio_backend", -] - - -def list_audio_backends() -> List[str]: - """List available backends - - Returns: - List[str]: The list of available backends. - """ - backends = [] - if _mod_utils.is_module_available("soundfile"): - backends.append("soundfile") - if _mod_utils.is_sox_available(): - backends.append("sox_io") - return backends - - -def set_audio_backend(backend: Optional[str]): - """Set the backend for I/O operation - - Args: - backend (str or None): Name of the backend. - One of ``"sox_io"`` or ``"soundfile"`` based on availability - of the system. If ``None`` is provided the current backend is unassigned. - """ - if backend is not None and backend not in list_audio_backends(): - raise RuntimeError(f'Backend "{backend}" is not one of ' - f"available backends: {list_audio_backends()}.") - - if backend is None: - module = no_backend - elif backend == "sox_io": - module = sox_io_backend - elif backend == "soundfile": - module = soundfile_backend - else: - raise NotImplementedError(f'Unexpected backend "{backend}"') - - for func in ["save", "load", "info"]: - setattr(paddlespeech.audio, func, getattr(module, func)) - - -# def _init_audio_backend(): -# backends = list_audio_backends() -# if "sox_io" in backends: -# set_audio_backend("sox_io") -# elif "soundfile" in backends: -# set_audio_backend("soundfile") -# else: -# warnings.warn("No audio backend is available.") -# set_audio_backend(None) - - -def _init_audio_backend(): - backends = list_audio_backends() - if "soundfile" in backends: - set_audio_backend("soundfile") - elif "sox_io" in backends: - set_audio_backend("sox_io") - else: - warnings.warn("No audio backend is available.") - set_audio_backend(None) - - -def get_audio_backend() -> Optional[str]: - """Get the name of the current backend - - Returns: - Optional[str]: The name of the current backend or ``None`` if no backend is assigned. - """ - if paddlespeech.audio.load == no_backend.load: - return None - if paddlespeech.audio.load == sox_io_backend.load: - return "sox_io" - if paddlespeech.audio.load == soundfile_backend.load: - return "soundfile" - raise ValueError("Unknown backend.") diff --git a/paddlespeech/audio/compliance/__init__.py b/paddlespeech/audio/compliance/__init__.py deleted file mode 100644 index c08f9ab11ea7b6e71eb62f095b9404e4d4331e91..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/compliance/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from . import kaldi -from . import librosa diff --git a/paddlespeech/audio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py deleted file mode 100644 index 538be019619441bee7c135b1e8666a806bc31fa2..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/compliance/kaldi.py +++ /dev/null @@ -1,638 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Modified from torchaudio(https://github.com/pytorch/audio) -import math -from typing import Tuple - -import paddle -from paddle import Tensor - -from ..functional import create_dct -from ..functional.window import get_window - -__all__ = [ - 'spectrogram', - 'fbank', - 'mfcc', -] - -# window types -HANNING = 'hann' -HAMMING = 'hamming' -POVEY = 'povey' -RECTANGULAR = 'rect' -BLACKMAN = 'blackman' - - -def _get_epsilon(dtype): - return paddle.to_tensor(1e-07, dtype=dtype) - - -def _next_power_of_2(x: int) -> int: - return 1 if x == 0 else 2**(x - 1).bit_length() - - -def _get_strided(waveform: Tensor, - window_size: int, - window_shift: int, - snip_edges: bool) -> Tensor: - assert waveform.dim() == 1 - num_samples = waveform.shape[0] - - if snip_edges: - if num_samples < window_size: - return paddle.empty((0, 0), dtype=waveform.dtype) - else: - m = 1 + (num_samples - window_size) // window_shift - else: - reversed_waveform = paddle.flip(waveform, [0]) - m = (num_samples + (window_shift // 2)) // window_shift - pad = window_size // 2 - window_shift // 2 - pad_right = reversed_waveform - if pad > 0: - pad_left = reversed_waveform[-pad:] - waveform = paddle.concat((pad_left, waveform, pad_right), axis=0) - else: - waveform = paddle.concat((waveform[-pad:], pad_right), axis=0) - - return paddle.signal.frame(waveform, window_size, window_shift)[:, :m].T - - -def _feature_window_function( - window_type: str, - window_size: int, - blackman_coeff: float, - dtype: int, ) -> Tensor: - if window_type == HANNING: - return get_window('hann', window_size, fftbins=False, dtype=dtype) - elif window_type == HAMMING: - return get_window('hamming', window_size, fftbins=False, dtype=dtype) - elif window_type == POVEY: - return get_window( - 'hann', window_size, fftbins=False, dtype=dtype).pow(0.85) - elif window_type == RECTANGULAR: - return paddle.ones([window_size], dtype=dtype) - elif window_type == BLACKMAN: - a = 2 * math.pi / (window_size - 1) - window_function = paddle.arange(window_size, dtype=dtype) - return (blackman_coeff - 0.5 * paddle.cos(a * window_function) + - (0.5 - blackman_coeff) * paddle.cos(2 * a * window_function) - ).astype(dtype) - else: - raise Exception('Invalid window type ' + window_type) - - -def _get_log_energy(strided_input: Tensor, epsilon: Tensor, - energy_floor: float) -> Tensor: - log_energy = paddle.maximum(strided_input.pow(2).sum(1), epsilon).log() - if energy_floor == 0.0: - return log_energy - return paddle.maximum( - log_energy, - paddle.to_tensor(math.log(energy_floor), dtype=strided_input.dtype)) - - -def _get_waveform_and_window_properties( - waveform: Tensor, - channel: int, - sr: int, - frame_shift: float, - frame_length: float, - round_to_power_of_two: bool, - preemphasis_coefficient: float) -> Tuple[Tensor, int, int, int]: - channel = max(channel, 0) - assert channel < waveform.shape[0], ( - 'Invalid channel {} for size {}'.format(channel, waveform.shape[0])) - waveform = waveform[channel, :] # size (n) - window_shift = int( - sr * frame_shift * - 0.001) # pass frame_shift and frame_length in milliseconds - window_size = int(sr * frame_length * 0.001) - padded_window_size = _next_power_of_2( - window_size) if round_to_power_of_two else window_size - - assert 2 <= window_size <= len(waveform), ( - 'choose a window size {} that is [2, {}]'.format(window_size, - len(waveform))) - assert 0 < window_shift, '`window_shift` must be greater than 0' - assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \ - ' use `round_to_power_of_two` or change `frame_length`' - assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]' - assert sr > 0, '`sr` must be greater than zero' - return waveform, window_shift, window_size, padded_window_size - - -def _get_window(waveform: Tensor, - padded_window_size: int, - window_size: int, - window_shift: int, - window_type: str, - blackman_coeff: float, - snip_edges: bool, - raw_energy: bool, - energy_floor: float, - dither: float, - remove_dc_offset: bool, - preemphasis_coefficient: float) -> Tuple[Tensor, Tensor]: - dtype = waveform.dtype - epsilon = _get_epsilon(dtype) - - # (m, window_size) - strided_input = _get_strided(waveform, window_size, window_shift, - snip_edges) - - if dither != 0.0: - x = paddle.maximum(epsilon, - paddle.rand(strided_input.shape, dtype=dtype)) - rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x) - strided_input = strided_input + rand_gauss * dither - - if remove_dc_offset: - row_means = paddle.mean(strided_input, axis=1).unsqueeze(1) # (m, 1) - strided_input = strided_input - row_means - - if raw_energy: - signal_log_energy = _get_log_energy(strided_input, epsilon, - energy_floor) # (m) - - if preemphasis_coefficient != 0.0: - offset_strided_input = paddle.nn.functional.pad( - strided_input.unsqueeze(0), (1, 0), - data_format='NCL', - mode='replicate').squeeze(0) # (m, window_size + 1) - strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, : - -1] - - window_function = _feature_window_function( - window_type, window_size, blackman_coeff, - dtype).unsqueeze(0) # (1, window_size) - strided_input = strided_input * window_function # (m, window_size) - - # (m, padded_window_size) - if padded_window_size != window_size: - padding_right = padded_window_size - window_size - strided_input = paddle.nn.functional.pad( - strided_input.unsqueeze(0), (0, padding_right), - data_format='NCL', - mode='constant', - value=0).squeeze(0) - - if not raw_energy: - signal_log_energy = _get_log_energy(strided_input, epsilon, - energy_floor) # size (m) - - return strided_input, signal_log_energy - - -def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor: - if subtract_mean: - col_means = paddle.mean(tensor, axis=0).unsqueeze(0) - tensor = tensor - col_means - return tensor - - -def spectrogram(waveform: Tensor, - blackman_coeff: float=0.42, - channel: int=-1, - dither: float=0.0, - energy_floor: float=1.0, - frame_length: float=25.0, - frame_shift: float=10.0, - preemphasis_coefficient: float=0.97, - raw_energy: bool=True, - remove_dc_offset: bool=True, - round_to_power_of_two: bool=True, - sr: int=16000, - snip_edges: bool=True, - subtract_mean: bool=False, - window_type: str=POVEY) -> Tensor: - """Compute and return a spectrogram from a waveform. The output is identical to Kaldi's. - - Args: - waveform (Tensor): A waveform tensor with shape `(C, T)`. - blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42. - channel (int, optional): Select the channel of waveform. Defaults to -1. - dither (float, optional): Dithering constant . Defaults to 0.0. - energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0. - frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0. - frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0. - preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97. - raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True. - remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True. - round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input - to FFT. Defaults to True. - sr (int, optional): Sample rate of input waveform. Defaults to 16000. - snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it - is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. - subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. - window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY. - - Returns: - Tensor: A spectrogram tensor with shape `(m, padded_window_size // 2 + 1)` where m is the number of frames - depends on frame_length and frame_shift. - """ - dtype = waveform.dtype - epsilon = _get_epsilon(dtype) - - waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties( - waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two, - preemphasis_coefficient) - - strided_input, signal_log_energy = _get_window( - waveform, padded_window_size, window_size, window_shift, window_type, - blackman_coeff, snip_edges, raw_energy, energy_floor, dither, - remove_dc_offset, preemphasis_coefficient) - - # (m, padded_window_size // 2 + 1, 2) - fft = paddle.fft.rfft(strided_input) - - power_spectrum = paddle.maximum( - fft.abs().pow(2.), epsilon).log() # (m, padded_window_size // 2 + 1) - power_spectrum[:, 0] = signal_log_energy - - power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean) - return power_spectrum - - -def _inverse_mel_scale_scalar(mel_freq: float) -> float: - return 700.0 * (math.exp(mel_freq / 1127.0) - 1.0) - - -def _inverse_mel_scale(mel_freq: Tensor) -> Tensor: - return 700.0 * ((mel_freq / 1127.0).exp() - 1.0) - - -def _mel_scale_scalar(freq: float) -> float: - return 1127.0 * math.log(1.0 + freq / 700.0) - - -def _mel_scale(freq: Tensor) -> Tensor: - return 1127.0 * (1.0 + freq / 700.0).log() - - -def _vtln_warp_freq(vtln_low_cutoff: float, - vtln_high_cutoff: float, - low_freq: float, - high_freq: float, - vtln_warp_factor: float, - freq: Tensor) -> Tensor: - assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq' - assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]' - l = vtln_low_cutoff * max(1.0, vtln_warp_factor) - h = vtln_high_cutoff * min(1.0, vtln_warp_factor) - scale = 1.0 / vtln_warp_factor - Fl = scale * l - Fh = scale * h - assert l > low_freq and h < high_freq - scale_left = (Fl - low_freq) / (l - low_freq) - scale_right = (high_freq - Fh) / (high_freq - h) - res = paddle.empty_like(freq) - - outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \ - | paddle.greater_than(freq, paddle.to_tensor(high_freq)) - before_l = paddle.less_than(freq, paddle.to_tensor(l)) - before_h = paddle.less_than(freq, paddle.to_tensor(h)) - after_h = paddle.greater_equal(freq, paddle.to_tensor(h)) - - res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq) - res[before_h] = scale * freq[before_h] - res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq) - res[outside_low_high_freq] = freq[outside_low_high_freq] - - return res - - -def _vtln_warp_mel_freq(vtln_low_cutoff: float, - vtln_high_cutoff: float, - low_freq, - high_freq: float, - vtln_warp_factor: float, - mel_freq: Tensor) -> Tensor: - return _mel_scale( - _vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq, - vtln_warp_factor, _inverse_mel_scale(mel_freq))) - - -def _get_mel_banks(num_bins: int, - window_length_padded: int, - sample_freq: float, - low_freq: float, - high_freq: float, - vtln_low: float, - vtln_high: float, - vtln_warp_factor: float) -> Tuple[Tensor, Tensor]: - assert num_bins > 3, 'Must have at least 3 mel bins' - assert window_length_padded % 2 == 0 - num_fft_bins = window_length_padded / 2 - nyquist = 0.5 * sample_freq - - if high_freq <= 0.0: - high_freq += nyquist - - assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \ - ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist)) - - fft_bin_width = sample_freq / window_length_padded - mel_low_freq = _mel_scale_scalar(low_freq) - mel_high_freq = _mel_scale_scalar(high_freq) - - mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1) - - if vtln_high < 0.0: - vtln_high += nyquist - - assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and - (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \ - ('Bad values in options: vtln-low {} and vtln-high {}, versus ' - 'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq)) - - bin = paddle.arange(num_bins).unsqueeze(1) - left_mel = mel_low_freq + bin * mel_freq_delta # (num_bins, 1) - center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta # (num_bins, 1) - right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta # (num_bins, 1) - - if vtln_warp_factor != 1.0: - left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, - vtln_warp_factor, left_mel) - center_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, - high_freq, vtln_warp_factor, - center_mel) - right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, - high_freq, vtln_warp_factor, right_mel) - - center_freqs = _inverse_mel_scale(center_mel) # (num_bins) - # (1, num_fft_bins) - mel = _mel_scale(fft_bin_width * paddle.arange(num_fft_bins)).unsqueeze(0) - - # (num_bins, num_fft_bins) - up_slope = (mel - left_mel) / (center_mel - left_mel) - down_slope = (right_mel - mel) / (right_mel - center_mel) - - if vtln_warp_factor == 1.0: - bins = paddle.maximum( - paddle.zeros([1]), paddle.minimum(up_slope, down_slope)) - else: - bins = paddle.zeros_like(up_slope) - up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than( - mel, center_mel) - down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than( - mel, right_mel) - bins[up_idx] = up_slope[up_idx] - bins[down_idx] = down_slope[down_idx] - - return bins, center_freqs - - -def fbank(waveform: Tensor, - blackman_coeff: float=0.42, - channel: int=-1, - dither: float=0.0, - energy_floor: float=1.0, - frame_length: float=25.0, - frame_shift: float=10.0, - high_freq: float=0.0, - htk_compat: bool=False, - low_freq: float=20.0, - n_mels: int=23, - preemphasis_coefficient: float=0.97, - raw_energy: bool=True, - remove_dc_offset: bool=True, - round_to_power_of_two: bool=True, - sr: int=16000, - snip_edges: bool=True, - subtract_mean: bool=False, - use_energy: bool=False, - use_log_fbank: bool=True, - use_power: bool=True, - vtln_high: float=-500.0, - vtln_low: float=100.0, - vtln_warp: float=1.0, - window_type: str=POVEY) -> Tensor: - """Compute and return filter banks from a waveform. The output is identical to Kaldi's. - - Args: - waveform (Tensor): A waveform tensor with shape `(C, T)`. - blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42. - channel (int, optional): Select the channel of waveform. Defaults to -1. - dither (float, optional): Dithering constant . Defaults to 0.0. - energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0. - frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0. - frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0. - high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0. - htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False. - low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0. - n_mels (int, optional): Number of output mel bins. Defaults to 23. - preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97. - raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True. - remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True. - round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input - to FFT. Defaults to True. - sr (int, optional): Sample rate of input waveform. Defaults to 16000. - snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it - is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. - subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. - use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. - use_log_fbank (bool, optional): Return log fbank when it is set True. Defaults to True. - use_power (bool, optional): Whether to use power instead of magnitude. Defaults to True. - vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0. - vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0. - vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0. - window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY. - - Returns: - Tensor: A filter banks tensor with shape `(m, n_mels)`. - """ - dtype = waveform.dtype - - waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties( - waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two, - preemphasis_coefficient) - - strided_input, signal_log_energy = _get_window( - waveform, padded_window_size, window_size, window_shift, window_type, - blackman_coeff, snip_edges, raw_energy, energy_floor, dither, - remove_dc_offset, preemphasis_coefficient) - - # (m, padded_window_size // 2 + 1) - spectrum = paddle.fft.rfft(strided_input).abs() - if use_power: - spectrum = spectrum.pow(2.) - - # (n_mels, padded_window_size // 2) - mel_energies, _ = _get_mel_banks(n_mels, padded_window_size, sr, low_freq, - high_freq, vtln_low, vtln_high, vtln_warp) - mel_energies = mel_energies.astype(dtype) - - # (n_mels, padded_window_size // 2 + 1) - mel_energies = paddle.nn.functional.pad( - mel_energies.unsqueeze(0), (0, 1), - data_format='NCL', - mode='constant', - value=0).squeeze(0) - - # (m, n_mels) - mel_energies = paddle.mm(spectrum, mel_energies.T) - if use_log_fbank: - mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log() - - if use_energy: - signal_log_energy = signal_log_energy.unsqueeze(1) - if htk_compat: - mel_energies = paddle.concat( - (mel_energies, signal_log_energy), axis=1) - else: - mel_energies = paddle.concat( - (signal_log_energy, mel_energies), axis=1) - - # (m, n_mels + 1) - mel_energies = _subtract_column_mean(mel_energies, subtract_mean) - return mel_energies - - -def _get_dct_matrix(n_mfcc: int, n_mels: int) -> Tensor: - dct_matrix = create_dct(n_mels, n_mels, 'ortho') - dct_matrix[:, 0] = math.sqrt(1 / float(n_mels)) - dct_matrix = dct_matrix[:, :n_mfcc] # (n_mels, n_mfcc) - return dct_matrix - - -def _get_lifter_coeffs(n_mfcc: int, cepstral_lifter: float) -> Tensor: - i = paddle.arange(n_mfcc) - return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i / - cepstral_lifter) - - -def mfcc(waveform: Tensor, - blackman_coeff: float=0.42, - cepstral_lifter: float=22.0, - channel: int=-1, - dither: float=0.0, - energy_floor: float=1.0, - frame_length: float=25.0, - frame_shift: float=10.0, - high_freq: float=0.0, - htk_compat: bool=False, - low_freq: float=20.0, - n_mfcc: int=13, - n_mels: int=23, - preemphasis_coefficient: float=0.97, - raw_energy: bool=True, - remove_dc_offset: bool=True, - round_to_power_of_two: bool=True, - sr: int=16000, - snip_edges: bool=True, - subtract_mean: bool=False, - use_energy: bool=False, - vtln_high: float=-500.0, - vtln_low: float=100.0, - vtln_warp: float=1.0, - window_type: str=POVEY) -> Tensor: - """Compute and return mel frequency cepstral coefficients from a waveform. The output is - identical to Kaldi's. - - Args: - waveform (Tensor): A waveform tensor with shape `(C, T)`. - blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42. - cepstral_lifter (float, optional): Scaling of output mfccs. Defaults to 22.0. - channel (int, optional): Select the channel of waveform. Defaults to -1. - dither (float, optional): Dithering constant . Defaults to 0.0. - energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0. - frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0. - frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0. - high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0. - htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False. - low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0. - n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 13. - n_mels (int, optional): Number of output mel bins. Defaults to 23. - preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97. - raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True. - remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True. - round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input - to FFT. Defaults to True. - sr (int, optional): Sample rate of input waveform. Defaults to 16000. - snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it - is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. - subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. - use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. - vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0. - vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0. - vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0. - window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY. - - Returns: - Tensor: A mel frequency cepstral coefficients tensor with shape `(m, n_mfcc)`. - """ - assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % ( - n_mfcc, n_mels) - - dtype = waveform.dtype - - # (m, n_mels + use_energy) - feature = fbank( - waveform=waveform, - blackman_coeff=blackman_coeff, - channel=channel, - dither=dither, - energy_floor=energy_floor, - frame_length=frame_length, - frame_shift=frame_shift, - high_freq=high_freq, - htk_compat=htk_compat, - low_freq=low_freq, - n_mels=n_mels, - preemphasis_coefficient=preemphasis_coefficient, - raw_energy=raw_energy, - remove_dc_offset=remove_dc_offset, - round_to_power_of_two=round_to_power_of_two, - sr=sr, - snip_edges=snip_edges, - subtract_mean=False, - use_energy=use_energy, - use_log_fbank=True, - use_power=True, - vtln_high=vtln_high, - vtln_low=vtln_low, - vtln_warp=vtln_warp, - window_type=window_type) - - if use_energy: - # (m) - signal_log_energy = feature[:, n_mels if htk_compat else 0] - mel_offset = int(not htk_compat) - feature = feature[:, mel_offset:(n_mels + mel_offset)] - - # (n_mels, n_mfcc) - dct_matrix = _get_dct_matrix(n_mfcc, n_mels).astype(dtype=dtype) - - # (m, n_mfcc) - feature = feature.matmul(dct_matrix) - - if cepstral_lifter != 0.0: - # (1, n_mfcc) - lifter_coeffs = _get_lifter_coeffs(n_mfcc, cepstral_lifter).unsqueeze(0) - feature *= lifter_coeffs.astype(dtype=dtype) - - if use_energy: - feature[:, 0] = signal_log_energy - - if htk_compat: - energy = feature[:, 0].unsqueeze(1) # (m, 1) - feature = feature[:, 1:] # (m, n_mfcc - 1) - if not use_energy: - energy *= math.sqrt(2) - - feature = paddle.concat((feature, energy), axis=1) - - feature = _subtract_column_mean(feature, subtract_mean) - return feature diff --git a/paddlespeech/audio/compliance/librosa.py b/paddlespeech/audio/compliance/librosa.py deleted file mode 100644 index 17ad51b41d92ebcdf933a5866f72a68ffa65b41b..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/compliance/librosa.py +++ /dev/null @@ -1,788 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Modified from librosa(https://github.com/librosa/librosa) -import warnings -from typing import List -from typing import Optional -from typing import Union - -import numpy as np -import scipy -from numpy.lib.stride_tricks import as_strided -from scipy import signal - -from ..utils import depth_convert -from ..utils import ParameterError - -__all__ = [ - # dsp - 'stft', - 'mfcc', - 'hz_to_mel', - 'mel_to_hz', - 'mel_frequencies', - 'power_to_db', - 'compute_fbank_matrix', - 'melspectrogram', - 'spectrogram', - 'mu_encode', - 'mu_decode', - # augmentation - 'depth_augment', - 'spect_augment', - 'random_crop1d', - 'random_crop2d', - 'adaptive_spect_augment', -] - - -def _pad_center(data: np.ndarray, size: int, axis: int=-1, - **kwargs) -> np.ndarray: - """Pad an array to a target length along a target axis. - - This differs from `np.pad` by centering the data prior to padding, - analogous to `str.center` - """ - - kwargs.setdefault("mode", "constant") - n = data.shape[axis] - lpad = int((size - n) // 2) - lengths = [(0, 0)] * data.ndim - lengths[axis] = (lpad, int(size - n - lpad)) - - if lpad < 0: - raise ParameterError(("Target size ({size:d}) must be " - "at least input size ({n:d})")) - - return np.pad(data, lengths, **kwargs) - - -def _split_frames(x: np.ndarray, - frame_length: int, - hop_length: int, - axis: int=-1) -> np.ndarray: - """Slice a data array into (overlapping) frames. - - This function is aligned with librosa.frame - """ - - if not isinstance(x, np.ndarray): - raise ParameterError( - f"Input must be of type numpy.ndarray, given type(x)={type(x)}") - - if x.shape[axis] < frame_length: - raise ParameterError(f"Input is too short (n={x.shape[axis]:d})" - f" for frame_length={frame_length:d}") - - if hop_length < 1: - raise ParameterError(f"Invalid hop_length: {hop_length:d}") - - if axis == -1 and not x.flags["F_CONTIGUOUS"]: - warnings.warn(f"librosa.util.frame called with axis={axis} " - "on a non-contiguous input. This will result in a copy.") - x = np.asfortranarray(x) - elif axis == 0 and not x.flags["C_CONTIGUOUS"]: - warnings.warn(f"librosa.util.frame called with axis={axis} " - "on a non-contiguous input. This will result in a copy.") - x = np.ascontiguousarray(x) - - n_frames = 1 + (x.shape[axis] - frame_length) // hop_length - strides = np.asarray(x.strides) - - new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize - - if axis == -1: - shape = list(x.shape)[:-1] + [frame_length, n_frames] - strides = list(strides) + [hop_length * new_stride] - - elif axis == 0: - shape = [n_frames, frame_length] + list(x.shape)[1:] - strides = [hop_length * new_stride] + list(strides) - - else: - raise ParameterError(f"Frame axis={axis} must be either 0 or -1") - - return as_strided(x, shape=shape, strides=strides) - - -def _check_audio(y, mono=True) -> bool: - """Determine whether a variable contains valid audio data. - - The audio y must be a np.ndarray, ether 1-channel or two channel - """ - if not isinstance(y, np.ndarray): - raise ParameterError("Audio data must be of type numpy.ndarray") - if y.ndim > 2: - raise ParameterError( - f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}") - - if mono and y.ndim == 2: - raise ParameterError( - f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}") - - if (mono and len(y) == 0) or (not mono and y.shape[1] < 0): - raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}") - - if not np.issubdtype(y.dtype, np.floating): - raise ParameterError("Audio data must be floating-point") - - if not np.isfinite(y).all(): - raise ParameterError("Audio buffer is not finite everywhere") - - return True - - -def hz_to_mel(frequencies: Union[float, List[float], np.ndarray], - htk: bool=False) -> np.ndarray: - """Convert Hz to Mels. - - Args: - frequencies (Union[float, List[float], np.ndarray]): Frequencies in Hz. - htk (bool, optional): Use htk scaling. Defaults to False. - - Returns: - np.ndarray: Frequency in mels. - """ - freq = np.asanyarray(frequencies) - - if htk: - return 2595.0 * np.log10(1.0 + freq / 700.0) - - # Fill in the linear part - f_min = 0.0 - f_sp = 200.0 / 3 - - mels = (freq - f_min) / f_sp - - # Fill in the log-scale part - - min_log_hz = 1000.0 # beginning of log region (Hz) - min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = np.log(6.4) / 27.0 # step size for log region - - if freq.ndim: - # If we have array data, vectorize - log_t = freq >= min_log_hz - mels[log_t] = min_log_mel + \ - np.log(freq[log_t] / min_log_hz) / logstep - elif freq >= min_log_hz: - # If we have scalar data, heck directly - mels = min_log_mel + np.log(freq / min_log_hz) / logstep - - return mels - - -def mel_to_hz(mels: Union[float, List[float], np.ndarray], - htk: int=False) -> np.ndarray: - """Convert mel bin numbers to frequencies. - - Args: - mels (Union[float, List[float], np.ndarray]): Frequency in mels. - htk (bool, optional): Use htk scaling. Defaults to False. - - Returns: - np.ndarray: Frequencies in Hz. - """ - mel_array = np.asanyarray(mels) - - if htk: - return 700.0 * (10.0**(mel_array / 2595.0) - 1.0) - - # Fill in the linear scale - f_min = 0.0 - f_sp = 200.0 / 3 - freqs = f_min + f_sp * mel_array - - # And now the nonlinear scale - min_log_hz = 1000.0 # beginning of log region (Hz) - min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = np.log(6.4) / 27.0 # step size for log region - - if mel_array.ndim: - # If we have vector data, vectorize - log_t = mel_array >= min_log_mel - freqs[log_t] = min_log_hz * \ - np.exp(logstep * (mel_array[log_t] - min_log_mel)) - elif mel_array >= min_log_mel: - # If we have scalar data, check directly - freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel)) - - return freqs - - -def mel_frequencies(n_mels: int=128, - fmin: float=0.0, - fmax: float=11025.0, - htk: bool=False) -> np.ndarray: - """Compute mel frequencies. - - Args: - n_mels (int, optional): Number of mel bins. Defaults to 128. - fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0. - fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0. - htk (bool, optional): Use htk scaling. Defaults to False. - - Returns: - np.ndarray: Vector of n_mels frequencies in Hz with shape `(n_mels,)`. - """ - # 'Center freqs' of mel bands - uniformly spaced between limits - min_mel = hz_to_mel(fmin, htk=htk) - max_mel = hz_to_mel(fmax, htk=htk) - - mels = np.linspace(min_mel, max_mel, n_mels) - - return mel_to_hz(mels, htk=htk) - - -def fft_frequencies(sr: int, n_fft: int) -> np.ndarray: - """Compute fourier frequencies. - - Args: - sr (int): Sample rate. - n_fft (int): FFT size. - - Returns: - np.ndarray: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`. - """ - return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True) - - -def compute_fbank_matrix(sr: int, - n_fft: int, - n_mels: int=128, - fmin: float=0.0, - fmax: Optional[float]=None, - htk: bool=False, - norm: str="slaney", - dtype: type=np.float32) -> np.ndarray: - """Compute fbank matrix. - - Args: - sr (int): Sample rate. - n_fft (int): FFT size. - n_mels (int, optional): Number of mel bins. Defaults to 128. - fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0. - fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None. - htk (bool, optional): Use htk scaling. Defaults to False. - norm (str, optional): Type of normalization. Defaults to "slaney". - dtype (type, optional): Data type. Defaults to np.float32. - - - Returns: - np.ndarray: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`. - """ - if norm != "slaney": - raise ParameterError('norm must set to slaney') - - if fmax is None: - fmax = float(sr) / 2 - - # Initialize the weights - n_mels = int(n_mels) - weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) - - # Center freqs of each FFT bin - fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft) - - # 'Center freqs' of mel bands - uniformly spaced between limits - mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk) - - fdiff = np.diff(mel_f) - ramps = np.subtract.outer(mel_f, fftfreqs) - - for i in range(n_mels): - # lower and upper slopes for all bins - lower = -ramps[i] / fdiff[i] - upper = ramps[i + 2] / fdiff[i + 1] - - # .. then intersect them with each other and zero - weights[i] = np.maximum(0, np.minimum(lower, upper)) - - if norm == "slaney": - # Slaney-style mel is scaled to be approx constant energy per channel - enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels]) - weights *= enorm[:, np.newaxis] - - # Only check weights if f_mel[0] is positive - if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)): - # This means we have an empty channel somewhere - warnings.warn("Empty filters detected in mel frequency basis. " - "Some channels will produce empty responses. " - "Try increasing your sampling rate (and fmax) or " - "reducing n_mels.") - - return weights - - -def stft(x: np.ndarray, - n_fft: int=2048, - hop_length: Optional[int]=None, - win_length: Optional[int]=None, - window: str="hann", - center: bool=True, - dtype: type=np.complex64, - pad_mode: str="reflect") -> np.ndarray: - """Short-time Fourier transform (STFT). - - Args: - x (np.ndarray): Input waveform in one dimension. - n_fft (int, optional): FFT size. Defaults to 2048. - hop_length (Optional[int], optional): Number of steps to advance between adjacent windows. Defaults to None. - win_length (Optional[int], optional): The size of window. Defaults to None. - window (str, optional): A string of window specification. Defaults to "hann". - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - dtype (type, optional): Data type of STFT results. Defaults to np.complex64. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect". - - Returns: - np.ndarray: The complex STFT output with shape `(n_fft//2 + 1, num_frames)`. - """ - _check_audio(x) - - # By default, use the entire frame - if win_length is None: - win_length = n_fft - - # Set the default hop, if it's not already specified - if hop_length is None: - hop_length = int(win_length // 4) - - fft_window = signal.get_window(window, win_length, fftbins=True) - - # Pad the window out to n_fft size - fft_window = _pad_center(fft_window, n_fft) - - # Reshape so that the window can be broadcast - fft_window = fft_window.reshape((-1, 1)) - - # Pad the time series so that frames are centered - if center: - if n_fft > x.shape[-1]: - warnings.warn( - f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}" - ) - x = np.pad(x, int(n_fft // 2), mode=pad_mode) - - elif n_fft > x.shape[-1]: - raise ParameterError( - f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}" - ) - - # Window the time series. - x_frames = _split_frames(x, frame_length=n_fft, hop_length=hop_length) - # Pre-allocate the STFT matrix - stft_matrix = np.empty( - (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F") - fft = np.fft # use numpy fft as default - # Constrain STFT block sizes to 256 KB - MAX_MEM_BLOCK = 2**8 * 2**10 - # how many columns can we fit within MAX_MEM_BLOCK? - n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize) - n_columns = max(n_columns, 1) - - for bl_s in range(0, stft_matrix.shape[1], n_columns): - bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) - stft_matrix[:, bl_s:bl_t] = fft.rfft( - fft_window * x_frames[:, bl_s:bl_t], axis=0) - - return stft_matrix - - -def power_to_db(spect: np.ndarray, - ref: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=80.0) -> np.ndarray: - """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way. - - Args: - spect (np.ndarray): STFT power spectrogram of an input waveform. - ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0. - amin (float, optional): Minimum threshold. Defaults to 1e-10. - top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to 80.0. - - Returns: - np.ndarray: Power spectrogram in db scale. - """ - spect = np.asarray(spect) - - if amin <= 0: - raise ParameterError("amin must be strictly positive") - - if np.issubdtype(spect.dtype, np.complexfloating): - warnings.warn( - "power_to_db was called on complex input so phase " - "information will be discarded. To suppress this warning, " - "call power_to_db(np.abs(D)**2) instead.") - magnitude = np.abs(spect) - else: - magnitude = spect - - if callable(ref): - # User supplied a function to calculate reference power - ref_value = ref(magnitude) - else: - ref_value = np.abs(ref) - - log_spec = 10.0 * np.log10(np.maximum(amin, magnitude)) - log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value)) - - if top_db is not None: - if top_db < 0: - raise ParameterError("top_db must be non-negative") - log_spec = np.maximum(log_spec, log_spec.max() - top_db) - - return log_spec - - -def mfcc(x: np.ndarray, - sr: int=16000, - spect: Optional[np.ndarray]=None, - n_mfcc: int=20, - dct_type: int=2, - norm: str="ortho", - lifter: int=0, - **kwargs) -> np.ndarray: - """Mel-frequency cepstral coefficients (MFCCs) - - Args: - x (np.ndarray): Input waveform in one dimension. - sr (int, optional): Sample rate. Defaults to 16000. - spect (Optional[np.ndarray], optional): Input log-power Mel spectrogram. Defaults to None. - n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 20. - dct_type (int, optional): Discrete cosine transform (DCT) type. Defaults to 2. - norm (str, optional): Type of normalization. Defaults to "ortho". - lifter (int, optional): Cepstral filtering. Defaults to 0. - - Returns: - np.ndarray: Mel frequency cepstral coefficients array with shape `(n_mfcc, num_frames)`. - """ - if spect is None: - spect = melspectrogram(x, sr=sr, **kwargs) - - M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc] - - if lifter > 0: - factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) / - lifter) - return M * factor[:, np.newaxis] - elif lifter == 0: - return M - else: - raise ParameterError( - f"MFCC lifter={lifter} must be a non-negative number") - - -def melspectrogram(x: np.ndarray, - sr: int=16000, - window_size: int=512, - hop_length: int=320, - n_mels: int=64, - fmin: float=50.0, - fmax: Optional[float]=None, - window: str='hann', - center: bool=True, - pad_mode: str='reflect', - power: float=2.0, - to_db: bool=True, - ref: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=None) -> np.ndarray: - """Compute mel-spectrogram. - - Args: - x (np.ndarray): Input waveform in one dimension. - sr (int, optional): Sample rate. Defaults to 16000. - window_size (int, optional): Size of FFT and window length. Defaults to 512. - hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320. - n_mels (int, optional): Number of mel bins. Defaults to 64. - fmin (float, optional): Minimum frequency in Hz. Defaults to 50.0. - fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None. - window (str, optional): A string of window specification. Defaults to "hann". - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect". - power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0. - to_db (bool, optional): Enable db scale. Defaults to True. - ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0. - amin (float, optional): Minimum threshold. Defaults to 1e-10. - top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None. - - Returns: - np.ndarray: The mel-spectrogram in power scale or db scale with shape `(n_mels, num_frames)`. - """ - _check_audio(x, mono=True) - if len(x) <= 0: - raise ParameterError('The input waveform is empty') - - if fmax is None: - fmax = sr // 2 - if fmin < 0 or fmin >= fmax: - raise ParameterError('fmin and fmax must statisfy 0 np.ndarray: - """Compute spectrogram. - - Args: - x (np.ndarray): Input waveform in one dimension. - sr (int, optional): Sample rate. Defaults to 16000. - window_size (int, optional): Size of FFT and window length. Defaults to 512. - hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320. - window (str, optional): A string of window specification. Defaults to "hann". - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect". - power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0. - - Returns: - np.ndarray: The STFT spectrogram in power scale `(n_fft//2 + 1, num_frames)`. - """ - - s = stft( - x, - n_fft=window_size, - hop_length=hop_length, - win_length=window_size, - window=window, - center=center, - pad_mode=pad_mode) - - return np.abs(s)**power - - -def mu_encode(x: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray: - """Mu-law encoding. Encode waveform based on mu-law companding. When quantized is True, the result will be converted to integer in range `[0,mu-1]`. Otherwise, the resulting waveform is in range `[-1,1]`. - - Args: - x (np.ndarray): The input waveform to encode. - mu (int, optional): The endoceding parameter. Defaults to 255. - quantized (bool, optional): If `True`, quantize the encoded values into `1 + mu` distinct integer values. Defaults to True. - - Returns: - np.ndarray: The mu-law encoded waveform. - """ - mu = 255 - y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu) - if quantized: - y = np.floor((y + 1) / 2 * mu + 0.5) # convert to [0 , mu-1] - return y - - -def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray: - """Mu-law decoding. Compute the mu-law decoding given an input code. It assumes that the input `y` is in range `[0,mu-1]` when quantize is True and `[-1,1]` otherwise. - - Args: - y (np.ndarray): The encoded waveform. - mu (int, optional): The endoceding parameter. Defaults to 255. - quantized (bool, optional): If `True`, the input is assumed to be quantized to `1 + mu` distinct integer values. Defaults to True. - - Returns: - np.ndarray: The mu-law decoded waveform. - """ - if mu < 1: - raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...') - - mu = mu - 1 - if quantized: # undo the quantization - y = y * 2 / mu - 1 - x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1) - return x - - -def _randint(high: int) -> int: - """Generate one random integer in range [0 high) - - This is a helper function for random data augmentaiton - """ - return int(np.random.randint(0, high=high)) - - -def depth_augment(y: np.ndarray, - choices: List=['int8', 'int16'], - probs: List[float]=[0.5, 0.5]) -> np.ndarray: - """ Audio depth augmentation. Do audio depth augmentation to simulate the distortion brought by quantization. - - Args: - y (np.ndarray): Input waveform array in 1D or 2D. - choices (List, optional): A list of data type to depth conversion. Defaults to ['int8', 'int16']. - probs (List[float], optional): Probabilities to depth conversion. Defaults to [0.5, 0.5]. - - Returns: - np.ndarray: The augmented waveform. - """ - assert len(probs) == len( - choices - ), 'number of choices {} must be equal to size of probs {}'.format( - len(choices), len(probs)) - depth = np.random.choice(choices, p=probs) - src_depth = y.dtype - y1 = depth_convert(y, depth) - y2 = depth_convert(y1, src_depth) - - return y2 - - -def adaptive_spect_augment(spect: np.ndarray, - tempo_axis: int=0, - level: float=0.1) -> np.ndarray: - """Do adpative spectrogram augmentation. The level of the augmentation is gowern by the paramter level, ranging from 0 to 1, with 0 represents no augmentation. - - Args: - spect (np.ndarray): Input spectrogram. - tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0. - level (float, optional): The level factor of masking. Defaults to 0.1. - - Returns: - np.ndarray: The augmented spectrogram. - """ - assert spect.ndim == 2., 'only supports 2d tensor or numpy array' - if tempo_axis == 0: - nt, nf = spect.shape - else: - nf, nt = spect.shape - - time_mask_width = int(nt * level * 0.5) - freq_mask_width = int(nf * level * 0.5) - - num_time_mask = int(10 * level) - num_freq_mask = int(10 * level) - - if tempo_axis == 0: - for _ in range(num_time_mask): - start = _randint(nt - time_mask_width) - spect[start:start + time_mask_width, :] = 0 - for _ in range(num_freq_mask): - start = _randint(nf - freq_mask_width) - spect[:, start:start + freq_mask_width] = 0 - else: - for _ in range(num_time_mask): - start = _randint(nt - time_mask_width) - spect[:, start:start + time_mask_width] = 0 - for _ in range(num_freq_mask): - start = _randint(nf - freq_mask_width) - spect[start:start + freq_mask_width, :] = 0 - - return spect - - -def spect_augment(spect: np.ndarray, - tempo_axis: int=0, - max_time_mask: int=3, - max_freq_mask: int=3, - max_time_mask_width: int=30, - max_freq_mask_width: int=20) -> np.ndarray: - """Do spectrogram augmentation in both time and freq axis. - - Args: - spect (np.ndarray): Input spectrogram. - tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0. - max_time_mask (int, optional): Maximum number of time masking. Defaults to 3. - max_freq_mask (int, optional): Maximum number of frenquence masking. Defaults to 3. - max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30. - max_freq_mask_width (int, optional): Maximum width of frenquence masking. Defaults to 20. - - Returns: - np.ndarray: The augmented spectrogram. - """ - assert spect.ndim == 2., 'only supports 2d tensor or numpy array' - if tempo_axis == 0: - nt, nf = spect.shape - else: - nf, nt = spect.shape - - num_time_mask = _randint(max_time_mask) - num_freq_mask = _randint(max_freq_mask) - - time_mask_width = _randint(max_time_mask_width) - freq_mask_width = _randint(max_freq_mask_width) - - if tempo_axis == 0: - for _ in range(num_time_mask): - start = _randint(nt - time_mask_width) - spect[start:start + time_mask_width, :] = 0 - for _ in range(num_freq_mask): - start = _randint(nf - freq_mask_width) - spect[:, start:start + freq_mask_width] = 0 - else: - for _ in range(num_time_mask): - start = _randint(nt - time_mask_width) - spect[:, start:start + time_mask_width] = 0 - for _ in range(num_freq_mask): - start = _randint(nf - freq_mask_width) - spect[start:start + freq_mask_width, :] = 0 - - return spect - - -def random_crop1d(y: np.ndarray, crop_len: int) -> np.ndarray: - """ Random cropping on a input waveform. - - Args: - y (np.ndarray): Input waveform array in 1D. - crop_len (int): Length of waveform to crop. - - Returns: - np.ndarray: The cropped waveform. - """ - if y.ndim != 1: - 'only accept 1d tensor or numpy array' - n = len(y) - idx = _randint(n - crop_len) - return y[idx:idx + crop_len] - - -def random_crop2d(s: np.ndarray, crop_len: int, - tempo_axis: int=0) -> np.ndarray: - """ Random cropping on a spectrogram. - - Args: - s (np.ndarray): Input spectrogram in 2D. - crop_len (int): Length of spectrogram to crop. - tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0. - - Returns: - np.ndarray: The cropped spectrogram. - """ - if tempo_axis >= s.ndim: - raise ParameterError('axis out of range') - - n = s.shape[tempo_axis] - idx = _randint(high=n - crop_len) - sli = [slice(None) for i in range(s.ndim)] - sli[tempo_axis] = slice(idx, idx + crop_len) - out = s[tuple(sli)] - return out diff --git a/paddlespeech/audio/datasets/__init__.py b/paddlespeech/audio/datasets/__init__.py deleted file mode 100644 index f95fad3054de8d19f24f881b69b682ae6def5b5b..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .esc50 import ESC50 -from .gtzan import GTZAN -from .hey_snips import HeySnips -from .rirs_noises import OpenRIRNoise -from .tess import TESS -from .urban_sound import UrbanSound8K -from .voxceleb import VoxCeleb diff --git a/paddlespeech/audio/datasets/dataset.py b/paddlespeech/audio/datasets/dataset.py deleted file mode 100644 index 81e6bdf5efd632578167bd762415b4c4896d4f13..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/dataset.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import List - -import numpy as np -import paddle - -from ..compliance.kaldi import fbank as kaldi_fbank -from ..compliance.kaldi import mfcc as kaldi_mfcc -from ..compliance.librosa import melspectrogram -from ..compliance.librosa import mfcc - -feat_funcs = { - 'raw': None, - 'melspectrogram': melspectrogram, - 'mfcc': mfcc, - 'kaldi_fbank': kaldi_fbank, - 'kaldi_mfcc': kaldi_mfcc, -} - - -class AudioClassificationDataset(paddle.io.Dataset): - """ - Base class of audio classification dataset. - """ - - def __init__(self, - files: List[str], - labels: List[int], - feat_type: str='raw', - sample_rate: int=None, - **kwargs): - """ - Ags: - files (:obj:`List[str]`): A list of absolute path of audio files. - labels (:obj:`List[int]`): Labels of audio files. - feat_type (:obj:`str`, `optional`, defaults to `raw`): - It identifies the feature type that user wants to extrace of an audio file. - """ - super(AudioClassificationDataset, self).__init__() - - if feat_type not in feat_funcs.keys(): - raise RuntimeError( - f"Unknown feat_type: {feat_type}, it must be one in {list(feat_funcs.keys())}" - ) - - self.files = files - self.labels = labels - - self.feat_type = feat_type - self.sample_rate = sample_rate - self.feat_config = kwargs # Pass keyword arguments to customize feature config - - def _get_data(self, input_file: str): - raise NotImplementedError - - def _convert_to_record(self, idx): - file, label = self.files[idx], self.labels[idx] - - if self.sample_rate is None: - waveform, sample_rate = paddlespeech.audio.load(file) - else: - waveform, sample_rate = paddlespeech.audio.load( - file, sr=self.sample_rate) - - feat_func = feat_funcs[self.feat_type] - - record = {} - if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']: - waveform = paddle.to_tensor(waveform).unsqueeze(0) # (C, T) - record['feat'] = feat_func( - waveform=waveform, sr=self.sample_rate, **self.feat_config) - else: - record['feat'] = feat_func( - waveform, sample_rate, - **self.feat_config) if feat_func else waveform - record['label'] = label - return record - - def __getitem__(self, idx): - record = self._convert_to_record(idx) - if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']: - return self.keys[idx], record['feat'], record['label'] - else: - return np.array(record['feat']).transpose(), np.array( - record['label'], dtype=np.int64) - - def __len__(self): - return len(self.files) diff --git a/paddlespeech/audio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py deleted file mode 100644 index f5c7050f3fcc75dcb8fd53e7927cf92c22fb40e2..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/esc50.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import os -from typing import List -from typing import Tuple - -from ..utils import DATA_HOME -from ..utils.download import download_and_decompress -from .dataset import AudioClassificationDataset - -__all__ = ['ESC50'] - - -class ESC50(AudioClassificationDataset): - """ - The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings - suitable for benchmarking methods of environmental sound classification. The dataset - consists of 5-second-long recordings organized into 50 semantical classes (with - 40 examples per class) - - Reference: - ESC: Dataset for Environmental Sound Classification - http://dx.doi.org/10.1145/2733373.2806390 - """ - - archieves = [ - { - 'url': - 'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip', - 'md5': '7771e4b9d86d0945acce719c7a59305a', - }, - ] - label_list = [ - # Animals - 'Dog', - 'Rooster', - 'Pig', - 'Cow', - 'Frog', - 'Cat', - 'Hen', - 'Insects (flying)', - 'Sheep', - 'Crow', - # Natural soundscapes & water sounds - 'Rain', - 'Sea waves', - 'Crackling fire', - 'Crickets', - 'Chirping birds', - 'Water drops', - 'Wind', - 'Pouring water', - 'Toilet flush', - 'Thunderstorm', - # Human, non-speech sounds - 'Crying baby', - 'Sneezing', - 'Clapping', - 'Breathing', - 'Coughing', - 'Footsteps', - 'Laughing', - 'Brushing teeth', - 'Snoring', - 'Drinking, sipping', - # Interior/domestic sounds - 'Door knock', - 'Mouse click', - 'Keyboard typing', - 'Door, wood creaks', - 'Can opening', - 'Washing machine', - 'Vacuum cleaner', - 'Clock alarm', - 'Clock tick', - 'Glass breaking', - # Exterior/urban noises - 'Helicopter', - 'Chainsaw', - 'Siren', - 'Car horn', - 'Engine', - 'Train', - 'Church bells', - 'Airplane', - 'Fireworks', - 'Hand saw', - ] - meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv') - meta_info = collections.namedtuple( - 'META_INFO', - ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take')) - audio_path = os.path.join('ESC-50-master', 'audio') - - def __init__(self, - mode: str='train', - split: int=1, - feat_type: str='raw', - **kwargs): - """ - Ags: - mode (:obj:`str`, `optional`, defaults to `train`): - It identifies the dataset mode (train or dev). - split (:obj:`int`, `optional`, defaults to 1): - It specify the fold of dev dataset. - feat_type (:obj:`str`, `optional`, defaults to `raw`): - It identifies the feature type that user wants to extrace of an audio file. - """ - files, labels = self._get_data(mode, split) - super(ESC50, self).__init__( - files=files, labels=labels, feat_type=feat_type, **kwargs) - - def _get_meta_info(self) -> List[collections.namedtuple]: - ret = [] - with open(os.path.join(DATA_HOME, self.meta), 'r') as rf: - for line in rf.readlines()[1:]: - ret.append(self.meta_info(*line.strip().split(','))) - return ret - - def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]: - if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \ - not os.path.isfile(os.path.join(DATA_HOME, self.meta)): - download_and_decompress(self.archieves, DATA_HOME) - - meta_info = self._get_meta_info() - - files = [] - labels = [] - for sample in meta_info: - filename, fold, target, _, _, _, _ = sample - if mode == 'train' and int(fold) != split: - files.append(os.path.join(DATA_HOME, self.audio_path, filename)) - labels.append(int(target)) - - if mode != 'train' and int(fold) == split: - files.append(os.path.join(DATA_HOME, self.audio_path, filename)) - labels.append(int(target)) - - return files, labels diff --git a/paddlespeech/audio/datasets/gtzan.py b/paddlespeech/audio/datasets/gtzan.py deleted file mode 100644 index 1f6835a5a8ea89161c14bda4d619c2a68f45779f..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/gtzan.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import os -import random -from typing import List -from typing import Tuple - -from ..utils import DATA_HOME -from ..utils.download import download_and_decompress -from .dataset import AudioClassificationDataset - -__all__ = ['GTZAN'] - - -class GTZAN(AudioClassificationDataset): - """ - The GTZAN dataset consists of 1000 audio tracks each 30 seconds long. It contains 10 genres, - each represented by 100 tracks. The dataset is the most-used public dataset for evaluation - in machine listening research for music genre recognition (MGR). - - Reference: - Musical genre classification of audio signals - https://ieeexplore.ieee.org/document/1021072/ - """ - - archieves = [ - { - 'url': 'http://opihi.cs.uvic.ca/sound/genres.tar.gz', - 'md5': '5b3d6dddb579ab49814ab86dba69e7c7', - }, - ] - label_list = [ - 'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', - 'pop', 'reggae', 'rock' - ] - meta = os.path.join('genres', 'input.mf') - meta_info = collections.namedtuple('META_INFO', ('file_path', 'label')) - audio_path = 'genres' - - def __init__(self, - mode='train', - seed=0, - n_folds=5, - split=1, - feat_type='raw', - **kwargs): - """ - Ags: - mode (:obj:`str`, `optional`, defaults to `train`): - It identifies the dataset mode (train or dev). - seed (:obj:`int`, `optional`, defaults to 0): - Set the random seed to shuffle samples. - n_folds (:obj:`int`, `optional`, defaults to 5): - Split the dataset into n folds. 1 fold for dev dataset and n-1 for train dataset. - split (:obj:`int`, `optional`, defaults to 1): - It specify the fold of dev dataset. - feat_type (:obj:`str`, `optional`, defaults to `raw`): - It identifies the feature type that user wants to extrace of an audio file. - """ - assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}' - files, labels = self._get_data(mode, seed, n_folds, split) - super(GTZAN, self).__init__( - files=files, labels=labels, feat_type=feat_type, **kwargs) - - def _get_meta_info(self) -> List[collections.namedtuple]: - ret = [] - with open(os.path.join(DATA_HOME, self.meta), 'r') as rf: - for line in rf.readlines(): - ret.append(self.meta_info(*line.strip().split('\t'))) - return ret - - def _get_data(self, mode, seed, n_folds, - split) -> Tuple[List[str], List[int]]: - if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \ - not os.path.isfile(os.path.join(DATA_HOME, self.meta)): - download_and_decompress(self.archieves, DATA_HOME) - - meta_info = self._get_meta_info() - random.seed(seed) # shuffle samples to split data - random.shuffle( - meta_info - ) # make sure using the same seed to create train and dev dataset - - files = [] - labels = [] - n_samples_per_fold = len(meta_info) // n_folds - for idx, sample in enumerate(meta_info): - file_path, label = sample - filename = os.path.basename(file_path) - target = self.label_list.index(label) - fold = idx // n_samples_per_fold + 1 - - if mode == 'train' and int(fold) != split: - files.append( - os.path.join(DATA_HOME, self.audio_path, label, filename)) - labels.append(target) - - if mode != 'train' and int(fold) == split: - files.append( - os.path.join(DATA_HOME, self.audio_path, label, filename)) - labels.append(target) - - return files, labels diff --git a/paddlespeech/audio/datasets/hey_snips.py b/paddlespeech/audio/datasets/hey_snips.py deleted file mode 100644 index 7a67b843bb4dca8bea4f49c69cd7dd2105e2618d..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/hey_snips.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import json -import os -from typing import List -from typing import Tuple - -from .dataset import AudioClassificationDataset - -__all__ = ['HeySnips'] - - -class HeySnips(AudioClassificationDataset): - meta_info = collections.namedtuple('META_INFO', - ('key', 'label', 'duration', 'wav')) - - def __init__(self, - data_dir: os.PathLike, - mode: str='train', - feat_type: str='kaldi_fbank', - sample_rate: int=16000, - **kwargs): - self.data_dir = data_dir - files, labels = self._get_data(mode) - super(HeySnips, self).__init__( - files=files, - labels=labels, - feat_type=feat_type, - sample_rate=sample_rate, - **kwargs) - - def _get_meta_info(self, mode) -> List[collections.namedtuple]: - ret = [] - with open(os.path.join(self.data_dir, '{}.json'.format(mode)), - 'r') as f: - data = json.load(f) - for item in data: - sample = collections.OrderedDict() - if item['duration'] > 0: - sample['key'] = item['id'] - sample['label'] = 0 if item['is_hotword'] == 1 else -1 - sample['duration'] = item['duration'] - sample['wav'] = os.path.join(self.data_dir, - item['audio_file_path']) - ret.append(self.meta_info(*sample.values())) - return ret - - def _get_data(self, mode: str) -> Tuple[List[str], List[int]]: - meta_info = self._get_meta_info(mode) - - files = [] - labels = [] - self.keys = [] - self.durations = [] - for sample in meta_info: - key, target, duration, wav = sample - files.append(wav) - labels.append(int(target)) - self.keys.append(key) - self.durations.append(float(duration)) - - return files, labels diff --git a/paddlespeech/audio/datasets/rirs_noises.py b/paddlespeech/audio/datasets/rirs_noises.py deleted file mode 100644 index 61bbf72a25aff962cd7e693789a9bfa1055d7302..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/rirs_noises.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import csv -import os -import random -from typing import List - -from paddle.io import Dataset -from tqdm import tqdm - -from ..utils import DATA_HOME -from ..utils.download import download_and_decompress -from .dataset import feat_funcs - -__all__ = ['OpenRIRNoise'] - - -class OpenRIRNoise(Dataset): - archieves = [ - { - 'url': 'http://www.openslr.org/resources/28/rirs_noises.zip', - 'md5': 'e6f48e257286e05de56413b4779d8ffb', - }, - ] - - sample_rate = 16000 - meta_info = collections.namedtuple('META_INFO', ('id', 'duration', 'wav')) - base_path = os.path.join(DATA_HOME, 'open_rir_noise') - wav_path = os.path.join(base_path, 'RIRS_NOISES') - csv_path = os.path.join(base_path, 'csv') - subsets = ['rir', 'noise'] - - def __init__(self, - subset: str='rir', - feat_type: str='raw', - target_dir=None, - random_chunk: bool=True, - chunk_duration: float=3.0, - seed: int=0, - **kwargs): - - assert subset in self.subsets, \ - 'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset) - - self.subset = subset - self.feat_type = feat_type - self.feat_config = kwargs - self.random_chunk = random_chunk - self.chunk_duration = chunk_duration - - OpenRIRNoise.csv_path = os.path.join( - target_dir, "open_rir_noise", - "csv") if target_dir else self.csv_path - self._data = self._get_data() - super(OpenRIRNoise, self).__init__() - - # Set up a seed to reproduce training or predicting result. - # random.seed(seed) - - def _get_data(self): - # Download audio files. - print(f"rirs noises base path: {self.base_path}") - if not os.path.isdir(self.base_path): - download_and_decompress( - self.archieves, self.base_path, decompress=True) - else: - print( - f"{self.base_path} already exists, we will not download and decompress again" - ) - - # Data preparation. - print(f"prepare the csv to {self.csv_path}") - if not os.path.isdir(self.csv_path): - os.makedirs(self.csv_path) - self.prepare_data() - - data = [] - with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf: - for line in rf.readlines()[1:]: - audio_id, duration, wav = line.strip().split(',') - data.append(self.meta_info(audio_id, float(duration), wav)) - - random.shuffle(data) - return data - - def _convert_to_record(self, idx: int): - sample = self._data[idx] - - record = {} - # To show all fields in a namedtuple: `type(sample)._fields` - for field in type(sample)._fields: - record[field] = getattr(sample, field) - - waveform, sr = paddlespeech.audio.load(record['wav']) - - assert self.feat_type in feat_funcs.keys(), \ - f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}" - feat_func = feat_funcs[self.feat_type] - feat = feat_func( - waveform, sr=sr, **self.feat_config) if feat_func else waveform - - record.update({'feat': feat}) - return record - - @staticmethod - def _get_chunks(seg_dur, audio_id, audio_duration): - num_chunks = int(audio_duration / seg_dur) # all in milliseconds - - chunk_lst = [ - audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur) - for i in range(num_chunks) - ] - return chunk_lst - - def _get_audio_info(self, wav_file: str, - split_chunks: bool) -> List[List[str]]: - waveform, sr = paddlespeech.audio.load(wav_file) - audio_id = wav_file.split("/open_rir_noise/")[-1].split(".")[0] - audio_duration = waveform.shape[0] / sr - - ret = [] - if split_chunks and audio_duration > self.chunk_duration: # Split into pieces of self.chunk_duration seconds. - uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id, - audio_duration) - - for idx, chunk in enumerate(uniq_chunks_list): - s, e = chunk.split("_")[-2:] # Timestamps of start and end - start_sample = int(float(s) * sr) - end_sample = int(float(e) * sr) - new_wav_file = os.path.join(self.base_path, - audio_id + f'_chunk_{idx+1:02}.wav') - paddlespeech.audio.save(waveform[start_sample:end_sample], sr, - new_wav_file) - # id, duration, new_wav - ret.append([chunk, self.chunk_duration, new_wav_file]) - else: # Keep whole audio. - ret.append([audio_id, audio_duration, wav_file]) - return ret - - def generate_csv(self, - wav_files: List[str], - output_file: str, - split_chunks: bool=True): - print(f'Generating csv: {output_file}') - header = ["id", "duration", "wav"] - - infos = list( - tqdm( - map(self._get_audio_info, wav_files, [split_chunks] * len( - wav_files)), - total=len(wav_files))) - - csv_lines = [] - for info in infos: - csv_lines.extend(info) - - with open(output_file, mode="w") as csv_f: - csv_writer = csv.writer( - csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL) - csv_writer.writerow(header) - for line in csv_lines: - csv_writer.writerow(line) - - def prepare_data(self): - rir_list = os.path.join(self.wav_path, "real_rirs_isotropic_noises", - "rir_list") - rir_files = [] - with open(rir_list, 'r') as f: - for line in f.readlines(): - rir_file = line.strip().split(' ')[-1] - rir_files.append(os.path.join(self.base_path, rir_file)) - - noise_list = os.path.join(self.wav_path, "pointsource_noises", - "noise_list") - noise_files = [] - with open(noise_list, 'r') as f: - for line in f.readlines(): - noise_file = line.strip().split(' ')[-1] - noise_files.append(os.path.join(self.base_path, noise_file)) - - self.generate_csv(rir_files, os.path.join(self.csv_path, 'rir.csv')) - self.generate_csv(noise_files, os.path.join(self.csv_path, 'noise.csv')) - - def __getitem__(self, idx): - return self._convert_to_record(idx) - - def __len__(self): - return len(self._data) diff --git a/paddlespeech/audio/datasets/tess.py b/paddlespeech/audio/datasets/tess.py deleted file mode 100644 index 1469fa5e2456166ef30977ac4ac44bb302be7eb2..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/tess.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import os -import random -from typing import List -from typing import Tuple - -from ..utils import DATA_HOME -from ..utils.download import download_and_decompress -from .dataset import AudioClassificationDataset - -__all__ = ['TESS'] - - -class TESS(AudioClassificationDataset): - """ - TESS is a set of 200 target words were spoken in the carrier phrase - "Say the word _____' by two actresses (aged 26 and 64 years) and - recordings were made of the set portraying each of seven emotions(anger, - disgust, fear, happiness, pleasant surprise, sadness, and neutral). - There are 2800 stimuli in total. - - Reference: - Toronto emotional speech set (TESS) - https://doi.org/10.5683/SP2/E8H2MF - """ - - archieves = [ - { - 'url': - 'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip', - 'md5': - '1465311b24d1de704c4c63e4ccc470c7', - }, - ] - label_list = [ - 'angry', - 'disgust', - 'fear', - 'happy', - 'neutral', - 'ps', # pleasant surprise - 'sad', - ] - meta_info = collections.namedtuple('META_INFO', - ('speaker', 'word', 'emotion')) - audio_path = 'TESS_Toronto_emotional_speech_set' - - def __init__(self, - mode='train', - seed=0, - n_folds=5, - split=1, - feat_type='raw', - **kwargs): - """ - Ags: - mode (:obj:`str`, `optional`, defaults to `train`): - It identifies the dataset mode (train or dev). - seed (:obj:`int`, `optional`, defaults to 0): - Set the random seed to shuffle samples. - n_folds (:obj:`int`, `optional`, defaults to 5): - Split the dataset into n folds. 1 fold for dev dataset and n-1 for train dataset. - split (:obj:`int`, `optional`, defaults to 1): - It specify the fold of dev dataset. - feat_type (:obj:`str`, `optional`, defaults to `raw`): - It identifies the feature type that user wants to extrace of an audio file. - """ - assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}' - files, labels = self._get_data(mode, seed, n_folds, split) - super(TESS, self).__init__( - files=files, labels=labels, feat_type=feat_type, **kwargs) - - def _get_meta_info(self, files) -> List[collections.namedtuple]: - ret = [] - for file in files: - basename_without_extend = os.path.basename(file)[:-4] - ret.append(self.meta_info(*basename_without_extend.split('_'))) - return ret - - def _get_data(self, mode, seed, n_folds, - split) -> Tuple[List[str], List[int]]: - if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)): - download_and_decompress(self.archieves, DATA_HOME) - - wav_files = [] - for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)): - for file in files: - if file.endswith('.wav'): - wav_files.append(os.path.join(root, file)) - - random.seed(seed) # shuffle samples to split data - random.shuffle( - wav_files - ) # make sure using the same seed to create train and dev dataset - meta_info = self._get_meta_info(wav_files) - - files = [] - labels = [] - n_samples_per_fold = len(meta_info) // n_folds - for idx, sample in enumerate(meta_info): - _, _, emotion = sample - target = self.label_list.index(emotion) - fold = idx // n_samples_per_fold + 1 - - if mode == 'train' and int(fold) != split: - files.append(wav_files[idx]) - labels.append(target) - - if mode != 'train' and int(fold) == split: - files.append(wav_files[idx]) - labels.append(target) - - return files, labels diff --git a/paddlespeech/audio/datasets/urban_sound.py b/paddlespeech/audio/datasets/urban_sound.py deleted file mode 100644 index 0389cd5f97471e5f01e738d288f47d82d33650dc..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/urban_sound.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import os -from typing import List -from typing import Tuple - -from ..utils import DATA_HOME -from ..utils.download import download_and_decompress -from .dataset import AudioClassificationDataset - -__all__ = ['UrbanSound8K'] - - -class UrbanSound8K(AudioClassificationDataset): - """ - UrbanSound8K dataset contains 8732 labeled sound excerpts (<=4s) of urban - sounds from 10 classes: air_conditioner, car_horn, children_playing, dog_bark, - drilling, enginge_idling, gun_shot, jackhammer, siren, and street_music. The - classes are drawn from the urban sound taxonomy. - - Reference: - A Dataset and Taxonomy for Urban Sound Research - https://dl.acm.org/doi/10.1145/2647868.2655045 - """ - - archieves = [ - { - 'url': - 'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz', - 'md5': '9aa69802bbf37fb986f71ec1483a196e', - }, - ] - label_list = [ - "air_conditioner", "car_horn", "children_playing", "dog_bark", - "drilling", "engine_idling", "gun_shot", "jackhammer", "siren", - "street_music" - ] - meta = os.path.join('UrbanSound8K', 'metadata', 'UrbanSound8K.csv') - meta_info = collections.namedtuple( - 'META_INFO', ('filename', 'fsid', 'start', 'end', 'salience', 'fold', - 'class_id', 'label')) - audio_path = os.path.join('UrbanSound8K', 'audio') - - def __init__(self, - mode: str='train', - split: int=1, - feat_type: str='raw', - **kwargs): - files, labels = self._get_data(mode, split) - super(UrbanSound8K, self).__init__( - files=files, labels=labels, feat_type=feat_type, **kwargs) - """ - Ags: - mode (:obj:`str`, `optional`, defaults to `train`): - It identifies the dataset mode (train or dev). - split (:obj:`int`, `optional`, defaults to 1): - It specify the fold of dev dataset. - feat_type (:obj:`str`, `optional`, defaults to `raw`): - It identifies the feature type that user wants to extrace of an audio file. - """ - - def _get_meta_info(self): - ret = [] - with open(os.path.join(DATA_HOME, self.meta), 'r') as rf: - for line in rf.readlines()[1:]: - ret.append(self.meta_info(*line.strip().split(','))) - return ret - - def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]: - if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \ - not os.path.isfile(os.path.join(DATA_HOME, self.meta)): - download_and_decompress(self.archieves, DATA_HOME) - - meta_info = self._get_meta_info() - - files = [] - labels = [] - for sample in meta_info: - filename, _, _, _, _, fold, target, _ = sample - if mode == 'train' and int(fold) != split: - files.append( - os.path.join(DATA_HOME, self.audio_path, f'fold{fold}', - filename)) - labels.append(int(target)) - - if mode != 'train' and int(fold) == split: - files.append( - os.path.join(DATA_HOME, self.audio_path, f'fold{fold}', - filename)) - labels.append(int(target)) - - return files, labels diff --git a/paddlespeech/audio/datasets/voxceleb.py b/paddlespeech/audio/datasets/voxceleb.py deleted file mode 100644 index e1a8aa38b9ccf3b9db83f76f6d7ecbb9ec56d486..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/datasets/voxceleb.py +++ /dev/null @@ -1,355 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import collections -import csv -import glob -import os -import random -from multiprocessing import cpu_count -from typing import List - -from paddle.io import Dataset -from pathos.multiprocessing import Pool -from tqdm import tqdm - -from ..utils import DATA_HOME -from ..utils import decompress -from ..utils.download import download_and_decompress -from .dataset import feat_funcs - -__all__ = ['VoxCeleb'] - - -class VoxCeleb(Dataset): - source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/' - archieves_audio_dev = [ - { - 'url': source_url + 'vox1_dev_wav_partaa', - 'md5': 'e395d020928bc15670b570a21695ed96', - }, - { - 'url': source_url + 'vox1_dev_wav_partab', - 'md5': 'bbfaaccefab65d82b21903e81a8a8020', - }, - { - 'url': source_url + 'vox1_dev_wav_partac', - 'md5': '017d579a2a96a077f40042ec33e51512', - }, - { - 'url': source_url + 'vox1_dev_wav_partad', - 'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19', - }, - ] - archieves_audio_test = [ - { - 'url': source_url + 'vox1_test_wav.zip', - 'md5': '185fdc63c3c739954633d50379a3d102', - }, - ] - archieves_meta = [ - { - 'url': - 'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt', - 'md5': - 'b73110731c9223c1461fe49cb48dddfc', - }, - ] - - num_speakers = 1211 # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41 - sample_rate = 16000 - meta_info = collections.namedtuple( - 'META_INFO', ('id', 'duration', 'wav', 'start', 'stop', 'spk_id')) - base_path = os.path.join(DATA_HOME, 'vox1') - wav_path = os.path.join(base_path, 'wav') - meta_path = os.path.join(base_path, 'meta') - veri_test_file = os.path.join(meta_path, 'veri_test2.txt') - csv_path = os.path.join(base_path, 'csv') - subsets = ['train', 'dev', 'enroll', 'test'] - - def __init__( - self, - subset: str='train', - feat_type: str='raw', - random_chunk: bool=True, - chunk_duration: float=3.0, # seconds - split_ratio: float=0.9, # train split ratio - seed: int=0, - target_dir: str=None, - vox2_base_path=None, - **kwargs): - """VoxCeleb data prepare and get the specific dataset audio info - - Args: - subset (str, optional): dataset name, such as train, dev, enroll or test. Defaults to 'train'. - feat_type (str, optional): feat type, such raw, melspectrogram(fbank) or mfcc . Defaults to 'raw'. - random_chunk (bool, optional): random select a duration from audio. Defaults to True. - chunk_duration (float, optional): chunk duration if random_chunk flag is set. Defaults to 3.0. - target_dir (str, optional): data dir, audio info will be stored in this directory. Defaults to None. - vox2_base_path (_type_, optional): vox2 directory. vox2 data must be converted from m4a to wav. Defaults to None. - """ - assert subset in self.subsets, \ - 'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset) - - self.subset = subset - self.spk_id2label = {} - self.feat_type = feat_type - self.feat_config = kwargs - self.random_chunk = random_chunk - self.chunk_duration = chunk_duration - self.split_ratio = split_ratio - self.target_dir = target_dir if target_dir else VoxCeleb.base_path - self.vox2_base_path = vox2_base_path - - # if we set the target dir, we will change the vox data info data from base path to target dir - VoxCeleb.csv_path = os.path.join( - target_dir, "voxceleb", 'csv') if target_dir else VoxCeleb.csv_path - VoxCeleb.meta_path = os.path.join( - target_dir, "voxceleb", - 'meta') if target_dir else VoxCeleb.meta_path - VoxCeleb.veri_test_file = os.path.join(VoxCeleb.meta_path, - 'veri_test2.txt') - # self._data = self._get_data()[:1000] # KP: Small dataset test. - self._data = self._get_data() - super(VoxCeleb, self).__init__() - - # Set up a seed to reproduce training or predicting result. - # random.seed(seed) - - def _get_data(self): - # Download audio files. - # We need the users to decompress all vox1/dev/wav and vox1/test/wav/ to vox1/wav/ dir - # so, we check the vox1/wav dir status - print(f"wav base path: {self.wav_path}") - if not os.path.isdir(self.wav_path): - print("start to download the voxceleb1 dataset") - download_and_decompress( # multi-zip parts concatenate to vox1_dev_wav.zip - self.archieves_audio_dev, - self.base_path, - decompress=False) - download_and_decompress( # download the vox1_test_wav.zip and unzip - self.archieves_audio_test, - self.base_path, - decompress=True) - - # Download all parts and concatenate the files into one zip file. - dev_zipfile = os.path.join(self.base_path, 'vox1_dev_wav.zip') - print(f'Concatenating all parts to: {dev_zipfile}') - os.system( - f'cat {os.path.join(self.base_path, "vox1_dev_wav_parta*")} > {dev_zipfile}' - ) - - # Extract all audio files of dev and test set. - decompress(dev_zipfile, self.base_path) - - # Download meta files. - if not os.path.isdir(self.meta_path): - print("prepare the meta data") - download_and_decompress( - self.archieves_meta, self.meta_path, decompress=False) - - # Data preparation. - if not os.path.isdir(self.csv_path): - os.makedirs(self.csv_path) - self.prepare_data() - - data = [] - print( - f"read the {self.subset} from {os.path.join(self.csv_path, f'{self.subset}.csv')}" - ) - with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf: - for line in rf.readlines()[1:]: - audio_id, duration, wav, start, stop, spk_id = line.strip( - ).split(',') - data.append( - self.meta_info(audio_id, - float(duration), wav, - int(start), int(stop), spk_id)) - - with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'r') as f: - for line in f.readlines(): - spk_id, label = line.strip().split(' ') - self.spk_id2label[spk_id] = int(label) - - return data - - def _convert_to_record(self, idx: int): - sample = self._data[idx] - - record = {} - # To show all fields in a namedtuple: `type(sample)._fields` - for field in type(sample)._fields: - record[field] = getattr(sample, field) - - waveform, sr = paddlespeech.audio.load(record['wav']) - - # random select a chunk audio samples from the audio - if self.random_chunk: - num_wav_samples = waveform.shape[0] - num_chunk_samples = int(self.chunk_duration * sr) - start = random.randint(0, num_wav_samples - num_chunk_samples - 1) - stop = start + num_chunk_samples - else: - start = record['start'] - stop = record['stop'] - - waveform = waveform[start:stop] - - assert self.feat_type in feat_funcs.keys(), \ - f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}" - feat_func = feat_funcs[self.feat_type] - feat = feat_func( - waveform, sr=sr, **self.feat_config) if feat_func else waveform - - record.update({'feat': feat}) - if self.subset in ['train', - 'dev']: # Labels are available in train and dev. - record.update({'label': self.spk_id2label[record['spk_id']]}) - - return record - - @staticmethod - def _get_chunks(seg_dur, audio_id, audio_duration): - num_chunks = int(audio_duration / seg_dur) # all in milliseconds - - chunk_lst = [ - audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur) - for i in range(num_chunks) - ] - return chunk_lst - - def _get_audio_info(self, wav_file: str, - split_chunks: bool) -> List[List[str]]: - waveform, sr = paddlespeech.audio.load(wav_file) - spk_id, sess_id, utt_id = wav_file.split("/")[-3:] - audio_id = '-'.join([spk_id, sess_id, utt_id.split(".")[0]]) - audio_duration = waveform.shape[0] / sr - - ret = [] - if split_chunks: # Split into pieces of self.chunk_duration seconds. - uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id, - audio_duration) - - for chunk in uniq_chunks_list: - s, e = chunk.split("_")[-2:] # Timestamps of start and end - start_sample = int(float(s) * sr) - end_sample = int(float(e) * sr) - # id, duration, wav, start, stop, spk_id - ret.append([ - chunk, audio_duration, wav_file, start_sample, end_sample, - spk_id - ]) - else: # Keep whole audio. - ret.append([ - audio_id, audio_duration, wav_file, 0, waveform.shape[0], spk_id - ]) - return ret - - def generate_csv(self, - wav_files: List[str], - output_file: str, - split_chunks: bool=True): - print(f'Generating csv: {output_file}') - header = ["id", "duration", "wav", "start", "stop", "spk_id"] - # Note: this may occurs c++ execption, but the program will execute fine - # so we can ignore the execption - with Pool(cpu_count()) as p: - infos = list( - tqdm( - p.imap(lambda x: self._get_audio_info(x, split_chunks), - wav_files), - total=len(wav_files))) - - csv_lines = [] - for info in infos: - csv_lines.extend(info) - - with open(output_file, mode="w") as csv_f: - csv_writer = csv.writer( - csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL) - csv_writer.writerow(header) - for line in csv_lines: - csv_writer.writerow(line) - - def prepare_data(self): - # Audio of speakers in veri_test_file should not be included in training set. - print("start to prepare the data csv file") - enroll_files = set() - test_files = set() - # get the enroll and test audio file path - with open(self.veri_test_file, 'r') as f: - for line in f.readlines(): - _, enrol_file, test_file = line.strip().split(' ') - enroll_files.add(os.path.join(self.wav_path, enrol_file)) - test_files.add(os.path.join(self.wav_path, test_file)) - enroll_files = sorted(enroll_files) - test_files = sorted(test_files) - - # get the enroll and test speakers - test_spks = set() - for file in (enroll_files + test_files): - spk = file.split('/wav/')[1].split('/')[0] - test_spks.add(spk) - - # get all the train and dev audios file path - audio_files = [] - speakers = set() - print("Getting file list...") - for path in [self.wav_path, self.vox2_base_path]: - # if vox2 directory is not set and vox2 is not a directory - # we will not process this directory - if not path or not os.path.exists(path): - print(f"{path} is an invalid path, please check again, " - "and we will ignore the vox2 base path") - continue - for file in glob.glob( - os.path.join(path, "**", "*.wav"), recursive=True): - spk = file.split('/wav/')[1].split('/')[0] - if spk in test_spks: - continue - speakers.add(spk) - audio_files.append(file) - - print( - f"start to generate the {os.path.join(self.meta_path, 'spk_id2label.txt')}" - ) - # encode the train and dev speakers label to spk_id2label.txt - with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'w') as f: - for label, spk_id in enumerate( - sorted(speakers)): # 1211 vox1, 5994 vox2, 7205 vox1+2 - f.write(f'{spk_id} {label}\n') - - audio_files = sorted(audio_files) - random.shuffle(audio_files) - split_idx = int(self.split_ratio * len(audio_files)) - # split_ratio to train - train_files, dev_files = audio_files[:split_idx], audio_files[ - split_idx:] - - self.generate_csv(train_files, os.path.join(self.csv_path, 'train.csv')) - self.generate_csv(dev_files, os.path.join(self.csv_path, 'dev.csv')) - - self.generate_csv( - enroll_files, - os.path.join(self.csv_path, 'enroll.csv'), - split_chunks=False) - self.generate_csv( - test_files, - os.path.join(self.csv_path, 'test.csv'), - split_chunks=False) - - def __getitem__(self, idx): - return self._convert_to_record(idx) - - def __len__(self): - return len(self._data) diff --git a/paddlespeech/audio/features/__init__.py b/paddlespeech/audio/features/__init__.py deleted file mode 100644 index 00781397f3d35cd995db38741e93db65228bde0a..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/features/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .layers import LogMelSpectrogram -from .layers import MelSpectrogram -from .layers import MFCC -from .layers import Spectrogram diff --git a/paddlespeech/audio/features/layers.py b/paddlespeech/audio/features/layers.py deleted file mode 100644 index 292363e64d276f53369ea151209972fcc9489e01..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/features/layers.py +++ /dev/null @@ -1,328 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from functools import partial -from typing import Optional -from typing import Union - -import paddle -import paddle.nn as nn -from paddle import Tensor - -from ..functional import compute_fbank_matrix -from ..functional import create_dct -from ..functional import power_to_db -from ..functional.window import get_window - -__all__ = [ - 'Spectrogram', - 'MelSpectrogram', - 'LogMelSpectrogram', - 'MFCC', -] - - -class Spectrogram(nn.Layer): - """Compute spectrogram of given signals, typically audio waveforms. - The spectorgram is defined as the complex norm of the short-time Fourier transformation. - - Args: - n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. - hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. - win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. - power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. - dtype (str, optional): Data type of input and window. Defaults to 'float32'. - """ - - def __init__(self, - n_fft: int=512, - hop_length: Optional[int]=None, - win_length: Optional[int]=None, - window: str='hann', - power: float=2.0, - center: bool=True, - pad_mode: str='reflect', - dtype: str='float32') -> None: - super(Spectrogram, self).__init__() - - assert power > 0, 'Power of spectrogram must be > 0.' - self.power = power - - if win_length is None: - win_length = n_fft - - self.fft_window = get_window( - window, win_length, fftbins=True, dtype=dtype) - self._stft = partial( - paddle.signal.stft, - n_fft=n_fft, - hop_length=hop_length, - win_length=win_length, - window=self.fft_window, - center=center, - pad_mode=pad_mode) - self.register_buffer('fft_window', self.fft_window) - - def forward(self, x: Tensor) -> Tensor: - """ - Args: - x (Tensor): Tensor of waveforms with shape `(N, T)` - - Returns: - Tensor: Spectrograms with shape `(N, n_fft//2 + 1, num_frames)`. - """ - stft = self._stft(x) - spectrogram = paddle.pow(paddle.abs(stft), self.power) - return spectrogram - - -class MelSpectrogram(nn.Layer): - """Compute the melspectrogram of given signals, typically audio waveforms. It is computed by multiplying spectrogram with Mel filter bank matrix. - - Args: - sr (int, optional): Sample rate. Defaults to 22050. - n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. - hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. - win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. - power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. - n_mels (int, optional): Number of mel bins. Defaults to 64. - f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0. - f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None. - htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False. - norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'. - dtype (str, optional): Data type of input and window. Defaults to 'float32'. - """ - - def __init__(self, - sr: int=22050, - n_fft: int=512, - hop_length: Optional[int]=None, - win_length: Optional[int]=None, - window: str='hann', - power: float=2.0, - center: bool=True, - pad_mode: str='reflect', - n_mels: int=64, - f_min: float=50.0, - f_max: Optional[float]=None, - htk: bool=False, - norm: Union[str, float]='slaney', - dtype: str='float32') -> None: - super(MelSpectrogram, self).__init__() - - self._spectrogram = Spectrogram( - n_fft=n_fft, - hop_length=hop_length, - win_length=win_length, - window=window, - power=power, - center=center, - pad_mode=pad_mode, - dtype=dtype) - self.n_mels = n_mels - self.f_min = f_min - self.f_max = f_max - self.htk = htk - self.norm = norm - if f_max is None: - f_max = sr // 2 - self.fbank_matrix = compute_fbank_matrix( - sr=sr, - n_fft=n_fft, - n_mels=n_mels, - f_min=f_min, - f_max=f_max, - htk=htk, - norm=norm, - dtype=dtype) # float64 for better numerical results - self.register_buffer('fbank_matrix', self.fbank_matrix) - - def forward(self, x: Tensor) -> Tensor: - """ - Args: - x (Tensor): Tensor of waveforms with shape `(N, T)` - - Returns: - Tensor: Mel spectrograms with shape `(N, n_mels, num_frames)`. - """ - spect_feature = self._spectrogram(x) - mel_feature = paddle.matmul(self.fbank_matrix, spect_feature) - return mel_feature - - -class LogMelSpectrogram(nn.Layer): - """Compute log-mel-spectrogram feature of given signals, typically audio waveforms. - - Args: - sr (int, optional): Sample rate. Defaults to 22050. - n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. - hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. - win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. - power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. - n_mels (int, optional): Number of mel bins. Defaults to 64. - f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0. - f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None. - htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False. - norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'. - ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0. - amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10. - top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None. - dtype (str, optional): Data type of input and window. Defaults to 'float32'. - """ - - def __init__(self, - sr: int=22050, - n_fft: int=512, - hop_length: Optional[int]=None, - win_length: Optional[int]=None, - window: str='hann', - power: float=2.0, - center: bool=True, - pad_mode: str='reflect', - n_mels: int=64, - f_min: float=50.0, - f_max: Optional[float]=None, - htk: bool=False, - norm: Union[str, float]='slaney', - ref_value: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=None, - dtype: str='float32') -> None: - super(LogMelSpectrogram, self).__init__() - - self._melspectrogram = MelSpectrogram( - sr=sr, - n_fft=n_fft, - hop_length=hop_length, - win_length=win_length, - window=window, - power=power, - center=center, - pad_mode=pad_mode, - n_mels=n_mels, - f_min=f_min, - f_max=f_max, - htk=htk, - norm=norm, - dtype=dtype) - - self.ref_value = ref_value - self.amin = amin - self.top_db = top_db - - def forward(self, x: Tensor) -> Tensor: - """ - Args: - x (Tensor): Tensor of waveforms with shape `(N, T)` - - Returns: - Tensor: Log mel spectrograms with shape `(N, n_mels, num_frames)`. - """ - mel_feature = self._melspectrogram(x) - log_mel_feature = power_to_db( - mel_feature, - ref_value=self.ref_value, - amin=self.amin, - top_db=self.top_db) - return log_mel_feature - - -class MFCC(nn.Layer): - """Compute mel frequency cepstral coefficients(MFCCs) feature of given waveforms. - - Args: - sr (int, optional): Sample rate. Defaults to 22050. - n_mfcc (int, optional): [description]. Defaults to 40. - n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512. - hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None. - win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None. - window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'. - power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0. - center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True. - pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'. - n_mels (int, optional): Number of mel bins. Defaults to 64. - f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0. - f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None. - htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False. - norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'. - ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0. - amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10. - top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None. - dtype (str, optional): Data type of input and window. Defaults to 'float32'. - """ - - def __init__(self, - sr: int=22050, - n_mfcc: int=40, - n_fft: int=512, - hop_length: Optional[int]=None, - win_length: Optional[int]=None, - window: str='hann', - power: float=2.0, - center: bool=True, - pad_mode: str='reflect', - n_mels: int=64, - f_min: float=50.0, - f_max: Optional[float]=None, - htk: bool=False, - norm: Union[str, float]='slaney', - ref_value: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=None, - dtype: str=paddle.float32) -> None: - super(MFCC, self).__init__() - assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % ( - n_mfcc, n_mels) - self._log_melspectrogram = LogMelSpectrogram( - sr=sr, - n_fft=n_fft, - hop_length=hop_length, - win_length=win_length, - window=window, - power=power, - center=center, - pad_mode=pad_mode, - n_mels=n_mels, - f_min=f_min, - f_max=f_max, - htk=htk, - norm=norm, - ref_value=ref_value, - amin=amin, - top_db=top_db, - dtype=dtype) - self.dct_matrix = create_dct(n_mfcc=n_mfcc, n_mels=n_mels, dtype=dtype) - self.register_buffer('dct_matrix', self.dct_matrix) - - def forward(self, x: Tensor) -> Tensor: - """ - Args: - x (Tensor): Tensor of waveforms with shape `(N, T)` - - Returns: - Tensor: Mel frequency cepstral coefficients with shape `(N, n_mfcc, num_frames)`. - """ - log_mel_feature = self._log_melspectrogram(x) - mfcc = paddle.matmul( - log_mel_feature.transpose((0, 2, 1)), self.dct_matrix).transpose( - (0, 2, 1)) # (B, n_mels, L) - return mfcc diff --git a/paddlespeech/audio/functional/__init__.py b/paddlespeech/audio/functional/__init__.py deleted file mode 100644 index c85232df199e9e888b786a991f8b1d290d38e9d0..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/functional/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .functional import compute_fbank_matrix -from .functional import create_dct -from .functional import fft_frequencies -from .functional import hz_to_mel -from .functional import mel_frequencies -from .functional import mel_to_hz -from .functional import power_to_db diff --git a/paddlespeech/audio/functional/functional.py b/paddlespeech/audio/functional/functional.py deleted file mode 100644 index 19c63a9aef23c65c94b5de34bebc3974e61be736..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/functional/functional.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Modified from librosa(https://github.com/librosa/librosa) -import math -from typing import Optional -from typing import Union - -import paddle -from paddle import Tensor - -__all__ = [ - 'hz_to_mel', - 'mel_to_hz', - 'mel_frequencies', - 'fft_frequencies', - 'compute_fbank_matrix', - 'power_to_db', - 'create_dct', -] - - -def hz_to_mel(freq: Union[Tensor, float], - htk: bool=False) -> Union[Tensor, float]: - """Convert Hz to Mels. - - Args: - freq (Union[Tensor, float]): The input tensor with arbitrary shape. - htk (bool, optional): Use htk scaling. Defaults to False. - - Returns: - Union[Tensor, float]: Frequency in mels. - """ - - if htk: - if isinstance(freq, Tensor): - return 2595.0 * paddle.log10(1.0 + freq / 700.0) - else: - return 2595.0 * math.log10(1.0 + freq / 700.0) - - # Fill in the linear part - f_min = 0.0 - f_sp = 200.0 / 3 - - mels = (freq - f_min) / f_sp - - # Fill in the log-scale part - - min_log_hz = 1000.0 # beginning of log region (Hz) - min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = math.log(6.4) / 27.0 # step size for log region - - if isinstance(freq, Tensor): - target = min_log_mel + paddle.log( - freq / min_log_hz + 1e-10) / logstep # prevent nan with 1e-10 - mask = (freq > min_log_hz).astype(freq.dtype) - mels = target * mask + mels * ( - 1 - mask) # will replace by masked_fill OP in future - else: - if freq >= min_log_hz: - mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep - - return mels - - -def mel_to_hz(mel: Union[float, Tensor], - htk: bool=False) -> Union[float, Tensor]: - """Convert mel bin numbers to frequencies. - - Args: - mel (Union[float, Tensor]): The mel frequency represented as a tensor with arbitrary shape. - htk (bool, optional): Use htk scaling. Defaults to False. - - Returns: - Union[float, Tensor]: Frequencies in Hz. - """ - if htk: - return 700.0 * (10.0**(mel / 2595.0) - 1.0) - - f_min = 0.0 - f_sp = 200.0 / 3 - freqs = f_min + f_sp * mel - # And now the nonlinear scale - min_log_hz = 1000.0 # beginning of log region (Hz) - min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = math.log(6.4) / 27.0 # step size for log region - if isinstance(mel, Tensor): - target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel)) - mask = (mel > min_log_mel).astype(mel.dtype) - freqs = target * mask + freqs * ( - 1 - mask) # will replace by masked_fill OP in future - else: - if mel >= min_log_mel: - freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel)) - - return freqs - - -def mel_frequencies(n_mels: int=64, - f_min: float=0.0, - f_max: float=11025.0, - htk: bool=False, - dtype: str='float32') -> Tensor: - """Compute mel frequencies. - - Args: - n_mels (int, optional): Number of mel bins. Defaults to 64. - f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0. - fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0. - htk (bool, optional): Use htk scaling. Defaults to False. - dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'. - - Returns: - Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`. - """ - # 'Center freqs' of mel bands - uniformly spaced between limits - min_mel = hz_to_mel(f_min, htk=htk) - max_mel = hz_to_mel(f_max, htk=htk) - mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype) - freqs = mel_to_hz(mels, htk=htk) - return freqs - - -def fft_frequencies(sr: int, n_fft: int, dtype: str='float32') -> Tensor: - """Compute fourier frequencies. - - Args: - sr (int): Sample rate. - n_fft (int): Number of fft bins. - dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'. - - Returns: - Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`. - """ - return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype) - - -def compute_fbank_matrix(sr: int, - n_fft: int, - n_mels: int=64, - f_min: float=0.0, - f_max: Optional[float]=None, - htk: bool=False, - norm: Union[str, float]='slaney', - dtype: str='float32') -> Tensor: - """Compute fbank matrix. - - Args: - sr (int): Sample rate. - n_fft (int): Number of fft bins. - n_mels (int, optional): Number of mel bins. Defaults to 64. - f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0. - f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None. - htk (bool, optional): Use htk scaling. Defaults to False. - norm (Union[str, float], optional): Type of normalization. Defaults to 'slaney'. - dtype (str, optional): The data type of the return matrix. Defaults to 'float32'. - - Returns: - Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`. - """ - - if f_max is None: - f_max = float(sr) / 2 - - # Initialize the weights - weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) - - # Center freqs of each FFT bin - fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype) - - # 'Center freqs' of mel bands - uniformly spaced between limits - mel_f = mel_frequencies( - n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype) - - fdiff = mel_f[1:] - mel_f[:-1] #np.diff(mel_f) - ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0) - #ramps = np.subtract.outer(mel_f, fftfreqs) - - for i in range(n_mels): - # lower and upper slopes for all bins - lower = -ramps[i] / fdiff[i] - upper = ramps[i + 2] / fdiff[i + 1] - - # .. then intersect them with each other and zero - weights[i] = paddle.maximum( - paddle.zeros_like(lower), paddle.minimum(lower, upper)) - - # Slaney-style mel is scaled to be approx constant energy per channel - if norm == 'slaney': - enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels]) - weights *= enorm.unsqueeze(1) - elif isinstance(norm, int) or isinstance(norm, float): - weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1) - - return weights - - -def power_to_db(spect: Tensor, - ref_value: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=None) -> Tensor: - """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way. - - Args: - spect (Tensor): STFT power spectrogram. - ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0. - amin (float, optional): Minimum threshold. Defaults to 1e-10. - top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None. - - Returns: - Tensor: Power spectrogram in db scale. - """ - if amin <= 0: - raise Exception("amin must be strictly positive") - - if ref_value <= 0: - raise Exception("ref_value must be strictly positive") - - ones = paddle.ones_like(spect) - log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, spect)) - log_spec -= 10.0 * math.log10(max(ref_value, amin)) - - if top_db is not None: - if top_db < 0: - raise Exception("top_db must be non-negative") - log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db)) - - return log_spec - - -def create_dct(n_mfcc: int, - n_mels: int, - norm: Optional[str]='ortho', - dtype: str='float32') -> Tensor: - """Create a discrete cosine transform(DCT) matrix. - - Args: - n_mfcc (int): Number of mel frequency cepstral coefficients. - n_mels (int): Number of mel filterbanks. - norm (Optional[str], optional): Normalizaiton type. Defaults to 'ortho'. - dtype (str, optional): The data type of the return matrix. Defaults to 'float32'. - - Returns: - Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`. - """ - n = paddle.arange(n_mels, dtype=dtype) - k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1) - dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) * - k) # size (n_mfcc, n_mels) - if norm is None: - dct *= 2.0 - else: - assert norm == "ortho" - dct[0] *= 1.0 / math.sqrt(2.0) - dct *= math.sqrt(2.0 / float(n_mels)) - return dct.T diff --git a/paddlespeech/audio/functional/window.py b/paddlespeech/audio/functional/window.py deleted file mode 100644 index c99d50462e355d4f1c199f74d737fc13c339d630..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/functional/window.py +++ /dev/null @@ -1,337 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -import math -from typing import List -from typing import Tuple -from typing import Union - -import paddle -from paddle import Tensor - -__all__ = [ - 'get_window', -] - - -def _cat(x: List[Tensor], data_type: str) -> Tensor: - l = [paddle.to_tensor(_, data_type) for _ in x] - return paddle.concat(l) - - -def _acosh(x: Union[Tensor, float]) -> Tensor: - if isinstance(x, float): - return math.log(x + math.sqrt(x**2 - 1)) - return paddle.log(x + paddle.sqrt(paddle.square(x) - 1)) - - -def _extend(M: int, sym: bool) -> bool: - """Extend window by 1 sample if needed for DFT-even symmetry. """ - if not sym: - return M + 1, True - else: - return M, False - - -def _len_guards(M: int) -> bool: - """Handle small or incorrect window lengths. """ - if int(M) != M or M < 0: - raise ValueError('Window length M must be a non-negative integer') - - return M <= 1 - - -def _truncate(w: Tensor, needed: bool) -> Tensor: - """Truncate window by 1 sample if needed for DFT-even symmetry. """ - if needed: - return w[:-1] - else: - return w - - -def _general_gaussian(M: int, p, sig, sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a window with a generalized Gaussian shape. - This function is consistent with scipy.signal.windows.general_gaussian(). - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - - n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0 - w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p)) - - return _truncate(w, needs_trunc) - - -def _general_cosine(M: int, a: float, sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a generic weighted sum of cosine terms window. - This function is consistent with scipy.signal.windows.general_cosine(). - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype) - w = paddle.zeros((M, ), dtype=dtype) - for k in range(len(a)): - w += a[k] * paddle.cos(k * fac) - return _truncate(w, needs_trunc) - - -def _general_hamming(M: int, alpha: float, sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a generalized Hamming window. - This function is consistent with scipy.signal.windows.general_hamming() - """ - return _general_cosine(M, [alpha, 1. - alpha], sym, dtype=dtype) - - -def _taylor(M: int, - nbar=4, - sll=30, - norm=True, - sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a Taylor window. - The Taylor window taper function approximates the Dolph-Chebyshev window's - constant sidelobe level for a parameterized number of near-in sidelobes. - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - # Original text uses a negative sidelobe level parameter and then negates - # it in the calculation of B. To keep consistent with other methods we - # assume the sidelobe level parameter to be positive. - B = 10**(sll / 20) - A = _acosh(B) / math.pi - s2 = nbar**2 / (A**2 + (nbar - 0.5)**2) - ma = paddle.arange(1, nbar, dtype=dtype) - - Fm = paddle.empty((nbar - 1, ), dtype=dtype) - signs = paddle.empty_like(ma) - signs[::2] = 1 - signs[1::2] = -1 - m2 = ma * ma - for mi in range(len(ma)): - numer = signs[mi] * paddle.prod(1 - m2[mi] / s2 / (A**2 + (ma - 0.5)**2 - )) - if mi == 0: - denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1:]) - elif mi == len(ma) - 1: - denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) - else: - denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) * paddle.prod(1 - m2[ - mi] / m2[mi + 1:]) - - Fm[mi] = numer / denom - - def W(n): - return 1 + 2 * paddle.matmul( - Fm.unsqueeze(0), - paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M)) - - w = W(paddle.arange(0, M, dtype=dtype)) - - # normalize (Note that this is not described in the original text [1]) - if norm: - scale = 1.0 / W((M - 1) / 2) - w *= scale - w = w.squeeze() - return _truncate(w, needs_trunc) - - -def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a Hamming window. - The Hamming window is a taper formed by using a raised cosine with - non-zero endpoints, optimized to minimize the nearest side lobe. - """ - return _general_hamming(M, 0.54, sym, dtype=dtype) - - -def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a Hann window. - The Hann window is a taper formed by using a raised cosine or sine-squared - with ends that touch zero. - """ - return _general_hamming(M, 0.5, sym, dtype=dtype) - - -def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a Tukey window. - The Tukey window is also known as a tapered cosine window. - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - - if alpha <= 0: - return paddle.ones((M, ), dtype=dtype) - elif alpha >= 1.0: - return hann(M, sym=sym) - - M, needs_trunc = _extend(M, sym) - - n = paddle.arange(0, M, dtype=dtype) - width = int(alpha * (M - 1) / 2.0) - n1 = n[0:width + 1] - n2 = n[width + 1:M - width - 1] - n3 = n[M - width - 1:] - - w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1)))) - w2 = paddle.ones(n2.shape, dtype=dtype) - w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha / - (M - 1)))) - w = paddle.concat([w1, w2, w3]) - - return _truncate(w, needs_trunc) - - -def _kaiser(M: int, beta: float, sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a Kaiser window. - The Kaiser window is a taper formed by using a Bessel function. - """ - raise NotImplementedError() - - -def _gaussian(M: int, std: float, sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a Gaussian window. - The Gaussian widows has a Gaussian shape defined by the standard deviation(std). - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - - n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0 - sig2 = 2 * std * std - w = paddle.exp(-n**2 / sig2) - - return _truncate(w, needs_trunc) - - -def _exponential(M: int, - center=None, - tau=1., - sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute an exponential (or Poisson) window. """ - if sym and center is not None: - raise ValueError("If sym==True, center must be None.") - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - - if center is None: - center = (M - 1) / 2 - - n = paddle.arange(0, M, dtype=dtype) - w = paddle.exp(-paddle.abs(n - center) / tau) - - return _truncate(w, needs_trunc) - - -def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a triangular window. - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - - n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype) - if M % 2 == 0: - w = (2 * n - 1.0) / M - w = paddle.concat([w, w[::-1]]) - else: - w = 2 * n / (M + 1.0) - w = paddle.concat([w, w[-2::-1]]) - - return _truncate(w, needs_trunc) - - -def _bohman(M: int, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a Bohman window. - The Bohman window is the autocorrelation of a cosine window. - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - - fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1]) - w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin( - math.pi * fac) - w = _cat([0, w, 0], dtype) - - return _truncate(w, needs_trunc) - - -def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a Blackman window. - The Blackman window is a taper formed by using the first three terms of - a summation of cosines. It was designed to have close to the minimal - leakage possible. It is close to optimal, only slightly worse than a - Kaiser window. - """ - return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype) - - -def _cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor: - """Compute a window with a simple cosine shape. - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + .5)) - - return _truncate(w, needs_trunc) - - -def get_window(window: Union[str, Tuple[str, float]], - win_length: int, - fftbins: bool=True, - dtype: str='float64') -> Tensor: - """Return a window of a given length and type. - - Args: - window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. - win_length (int): Number of samples. - fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True. - dtype (str, optional): The data type of the return window. Defaults to 'float64'. - - Returns: - Tensor: The window represented as a tensor. - """ - sym = not fftbins - - args = () - if isinstance(window, tuple): - winstr = window[0] - if len(window) > 1: - args = window[1:] - elif isinstance(window, str): - if window in ['gaussian', 'exponential']: - raise ValueError("The '" + window + "' window needs one or " - "more parameters -- pass a tuple.") - else: - winstr = window - else: - raise ValueError("%s as window type is not supported." % - str(type(window))) - - try: - winfunc = eval('_' + winstr) - except KeyError as e: - raise ValueError("Unknown window type.") from e - - params = (win_length, ) + args - kwargs = {'sym': sym} - return winfunc(*params, dtype=dtype, **kwargs) diff --git a/paddlespeech/audio/io/__init__.py b/paddlespeech/audio/io/__init__.py deleted file mode 100644 index 185a92b8d94d3426d616c0624f0f2ee04339349e..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/io/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/paddlespeech/audio/kaldi/__init__.py b/paddlespeech/audio/kaldi/__init__.py deleted file mode 100644 index f951e280a58b123965c46e6951e610740cd19bb4..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/kaldi/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .kaldi import fbank -from .kaldi import pitch diff --git a/paddlespeech/audio/kaldi/kaldi.py b/paddlespeech/audio/kaldi/kaldi.py deleted file mode 100644 index ff0fd8d9db7669b018ee88366e8ad8c1be31f378..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/kaldi/kaldi.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddlespeech -from paddlespeech.audio._internal import module_utils - -__all__ = [ - 'fbank', - 'pitch', -] - - -@module_utils.requires_kaldi() -def fbank( - wav, - samp_freq: int=16000, - frame_shift_ms: float=10.0, - frame_length_ms: float=25.0, - dither: float=0.0, - preemph_coeff: float=0.97, - remove_dc_offset: bool=True, - window_type: str='povey', - round_to_power_of_two: bool=True, - blackman_coeff: float=0.42, - snip_edges: bool=True, - allow_downsample: bool=False, - allow_upsample: bool=False, - max_feature_vectors: int=-1, - num_bins: int=23, - low_freq: float=20, - high_freq: float=0, - vtln_low: float=100, - vtln_high: float=-500, - debug_mel: bool=False, - htk_mode: bool=False, - use_energy: bool=False, # fbank opts - energy_floor: float=0.0, - raw_energy: bool=True, - htk_compat: bool=False, - use_log_fbank: bool=True, - use_power: bool=True): - frame_opts = paddlespeech.audio._paddleaudio.FrameExtractionOptions() - mel_opts = paddlespeech.audio._paddleaudio.MelBanksOptions() - fbank_opts = paddlespeech.audio._paddleaudio.FbankOptions() - frame_opts.samp_freq = samp_freq - frame_opts.frame_shift_ms = frame_shift_ms - frame_opts.frame_length_ms = frame_length_ms - frame_opts.dither = dither - frame_opts.preemph_coeff = preemph_coeff - frame_opts.remove_dc_offset = remove_dc_offset - frame_opts.window_type = window_type - frame_opts.round_to_power_of_two = round_to_power_of_two - frame_opts.blackman_coeff = blackman_coeff - frame_opts.snip_edges = snip_edges - frame_opts.allow_downsample = allow_downsample - frame_opts.allow_upsample = allow_upsample - frame_opts.max_feature_vectors = max_feature_vectors - - mel_opts.num_bins = num_bins - mel_opts.low_freq = low_freq - mel_opts.high_freq = high_freq - mel_opts.vtln_low = vtln_low - mel_opts.vtln_high = vtln_high - mel_opts.debug_mel = debug_mel - mel_opts.htk_mode = htk_mode - - fbank_opts.use_energy = use_energy - fbank_opts.energy_floor = energy_floor - fbank_opts.raw_energy = raw_energy - fbank_opts.htk_compat = htk_compat - fbank_opts.use_log_fbank = use_log_fbank - fbank_opts.use_power = use_power - feat = paddlespeech.audio._paddleaudio.ComputeFbank(frame_opts, mel_opts, fbank_opts, wav) - return feat - - -@module_utils.requires_kaldi() -def pitch(wav, - samp_freq: int=16000, - frame_shift_ms: float=10.0, - frame_length_ms: float=25.0, - preemph_coeff: float=0.0, - min_f0: int=50, - max_f0: int=400, - soft_min_f0: float=10.0, - penalty_factor: float=0.1, - lowpass_cutoff: int=1000, - resample_freq: int=4000, - delta_pitch: float=0.005, - nccf_ballast: int=7000, - lowpass_filter_width: int=1, - upsample_filter_width: int=5, - max_frames_latency: int=0, - frames_per_chunk: int=0, - simulate_first_pass_online: bool=False, - recompute_frame: int=500, - nccf_ballast_online: bool=False, - snip_edges: bool=True): - pitch_opts = paddlespeech.audio._paddleaudio.PitchExtractionOptions() - pitch_opts.samp_freq = samp_freq - pitch_opts.frame_shift_ms = frame_shift_ms - pitch_opts.frame_length_ms = frame_length_ms - pitch_opts.preemph_coeff = preemph_coeff - pitch_opts.min_f0 = min_f0 - pitch_opts.max_f0 = max_f0 - pitch_opts.soft_min_f0 = soft_min_f0 - pitch_opts.penalty_factor = penalty_factor - pitch_opts.lowpass_cutoff = lowpass_cutoff - pitch_opts.resample_freq = resample_freq - pitch_opts.delta_pitch = delta_pitch - pitch_opts.nccf_ballast = nccf_ballast - pitch_opts.lowpass_filter_width = lowpass_filter_width - pitch_opts.upsample_filter_width = upsample_filter_width - pitch_opts.max_frames_latency = max_frames_latency - pitch_opts.frames_per_chunk = frames_per_chunk - pitch_opts.simulate_first_pass_online = simulate_first_pass_online - pitch_opts.recompute_frame = recompute_frame - pitch_opts.nccf_ballast_online = nccf_ballast_online - pitch_opts.snip_edges = snip_edges - pitch = paddlespeech.audio._paddleaudio.ComputeKaldiPitch(pitch_opts, wav) - return pitch diff --git a/paddlespeech/audio/lib/.gitignore b/paddlespeech/audio/lib/.gitignore deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/paddlespeech/audio/metric/__init__.py b/paddlespeech/audio/metric/__init__.py deleted file mode 100644 index 7ce6f5cfffda1f475c2cc6b2734c98027957d123..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/metric/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from .eer import compute_eer -from .eer import compute_minDCF diff --git a/paddlespeech/audio/metric/eer.py b/paddlespeech/audio/metric/eer.py deleted file mode 100644 index a1166d3f93a2135b692ad822aae8134ffd1f5295..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/metric/eer.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import List - -import numpy as np -import paddle -from sklearn.metrics import roc_curve - - -def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]: - """Compute EER and return score threshold. - - Args: - labels (np.ndarray): the trial label, shape: [N], one-dimention, N refer to the samples num - scores (np.ndarray): the trial scores, shape: [N], one-dimention, N refer to the samples num - - Returns: - List[float]: eer and the specific threshold - """ - fpr, tpr, threshold = roc_curve(y_true=labels, y_score=scores) - fnr = 1 - tpr - eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))] - eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] - return eer, eer_threshold - - -def compute_minDCF(positive_scores, - negative_scores, - c_miss=1.0, - c_fa=1.0, - p_target=0.01): - """ - This is modified from SpeechBrain - https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/utils/metric_stats.py#L509 - Computes the minDCF metric normally used to evaluate speaker verification - systems. The min_DCF is the minimum of the following C_det function computed - within the defined threshold range: - - C_det = c_miss * p_miss * p_target + c_fa * p_fa * (1 -p_target) - - where p_miss is the missing probability and p_fa is the probability of having - a false alarm. - - Args: - positive_scores (Paddle.Tensor): The scores from entries of the same class. - negative_scores (Paddle.Tensor): The scores from entries of different classes. - c_miss (float, optional): Cost assigned to a missing error (default 1.0). - c_fa (float, optional): Cost assigned to a false alarm (default 1.0). - p_target (float, optional): Prior probability of having a target (default 0.01). - - Returns: - List[float]: min dcf and the specific threshold - """ - # Computing candidate thresholds - if len(positive_scores.shape) > 1: - positive_scores = positive_scores.squeeze() - - if len(negative_scores.shape) > 1: - negative_scores = negative_scores.squeeze() - - thresholds = paddle.sort(paddle.concat([positive_scores, negative_scores])) - thresholds = paddle.unique(thresholds) - - # Adding intermediate thresholds - interm_thresholds = (thresholds[0:-1] + thresholds[1:]) / 2 - thresholds = paddle.sort(paddle.concat([thresholds, interm_thresholds])) - - # Computing False Rejection Rate (miss detection) - positive_scores = paddle.concat( - len(thresholds) * [positive_scores.unsqueeze(0)]) - pos_scores_threshold = positive_scores.transpose(perm=[1, 0]) <= thresholds - p_miss = (pos_scores_threshold.sum(0) - ).astype("float32") / positive_scores.shape[1] - del positive_scores - del pos_scores_threshold - - # Computing False Acceptance Rate (false alarm) - negative_scores = paddle.concat( - len(thresholds) * [negative_scores.unsqueeze(0)]) - neg_scores_threshold = negative_scores.transpose(perm=[1, 0]) > thresholds - p_fa = (neg_scores_threshold.sum(0) - ).astype("float32") / negative_scores.shape[1] - del negative_scores - del neg_scores_threshold - - c_det = c_miss * p_miss * p_target + c_fa * p_fa * (1 - p_target) - c_min = paddle.min(c_det, axis=0) - min_index = paddle.argmin(c_det, axis=0) - return float(c_min), float(thresholds[min_index]) diff --git a/paddlespeech/audio/sox_effects/__init__.py b/paddlespeech/audio/sox_effects/__init__.py deleted file mode 100644 index d68158776830d05195131300886f8f8a43be7cff..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/sox_effects/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -from paddlespeech.audio._internal import module_utils as _mod_utils - -from .sox_effects import ( - apply_effects_file, - apply_effects_tensor, - effect_names, - init_sox_effects, - shutdown_sox_effects, -) - - -if _mod_utils.is_sox_available(): - import atexit - - init_sox_effects() - atexit.register(shutdown_sox_effects) - -__all__ = [ - "init_sox_effects", - "shutdown_sox_effects", - "effect_names", - "apply_effects_tensor", - "apply_effects_file", -] - diff --git a/paddlespeech/audio/sox_effects/sox_effects.py b/paddlespeech/audio/sox_effects/sox_effects.py deleted file mode 100644 index e9b839c1ad2f07977a173406feff9932fc578957..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/sox_effects/sox_effects.py +++ /dev/null @@ -1,238 +0,0 @@ -import os -from typing import List, Optional, Tuple -import paddle -import numpy - -from paddlespeech.audio._internal import module_utils as _mod_utils -from paddlespeech.audio.utils.sox_utils import list_effects -from paddlespeech.audio import _paddleaudio as paddleaudio - -#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/sox_effects/sox_effects.py - -@_mod_utils.requires_sox() -def init_sox_effects(): - """Initialize resources required to use sox effects. - - Note: - You do not need to call this function manually. It is called automatically. - - Once initialized, you do not need to call this function again across the multiple uses of - sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet. - Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing - again will result in error. - """ - paddleaudio.sox_effects_initialize_sox_effects() - - -@_mod_utils.requires_sox() -def shutdown_sox_effects(): - """Clean up resources required to use sox effects. - - Note: - You do not need to call this function manually. It is called automatically. - - It is safe to call this function multiple times. - Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and - initializing again will result in error. - """ - paddleaudio.sox_effects_shutdown_sox_effects() - - -@_mod_utils.requires_sox() -def effect_names() -> List[str]: - """Gets list of valid sox effect names - - Returns: - List[str]: list of available effect names. - - Example - >>> paddleaudio.sox_effects.effect_names() - ['allpass', 'band', 'bandpass', ... ] - """ - return list(list_effects().keys()) - - -@_mod_utils.requires_sox() -def apply_effects_tensor( - tensor: paddle.Tensor, - sample_rate: int, - effects: List[List[str]], - channels_first: bool = True, -) -> Tuple[paddle.Tensor, int]: - """Apply sox effects to given Tensor - - .. devices:: CPU - - Note: - This function only works on CPU Tensors. - This function works in the way very similar to ``sox`` command, however there are slight - differences. For example, ``sox`` command adds certain effects automatically (such as - ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does - only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also - need to give ``rate`` effect with desired sampling rate.). - - Args: - tensor (paddle.Tensor): Input 2D CPU Tensor. - sample_rate (int): Sample rate - effects (List[List[str]]): List of effects. - channels_first (bool, optional): Indicates if the input Tensor's dimension is - `[channels, time]` or `[time, channels]` - - Returns: - (Tensor, int): Resulting Tensor and sample rate. - The resulting Tensor has the same ``dtype`` as the input Tensor, and - the same channels order. The shape of the Tensor can be different based on the - effects applied. Sample rate can also be different based on the effects applied. - - Example - Basic usage - >>> - >>> # Defines the effects to apply - >>> effects = [ - ... ['gain', '-n'], # normalises to 0dB - ... ['pitch', '5'], # 5 cent pitch shift - ... ['rate', '8000'], # resample to 8000 Hz - ... ] - >>> - >>> # Generate pseudo wave: - >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second - >>> sample_rate = 16000 - >>> waveform = 2 * paddle.rand([2, sample_rate * 1]) - 1 - >>> waveform.shape - paddle.Size([2, 16000]) - >>> waveform - tensor([[ 0.3138, 0.7620, -0.9019, ..., -0.7495, -0.4935, 0.5442], - [-0.0832, 0.0061, 0.8233, ..., -0.5176, -0.9140, -0.2434]]) - >>> - >>> # Apply effects - >>> waveform, sample_rate = apply_effects_tensor( - ... wave_form, sample_rate, effects, channels_first=True) - >>> - >>> # Check the result - >>> # The new waveform is sampling rate 8000, 1 second. - >>> # normalization and channel order are preserved - >>> waveform.shape - paddle.Size([2, 8000]) - >>> waveform - tensor([[ 0.5054, -0.5518, -0.4800, ..., -0.0076, 0.0096, -0.0110], - [ 0.1331, 0.0436, -0.3783, ..., -0.0035, 0.0012, 0.0008]]) - >>> sample_rate - 8000 - - """ - tensor_np = tensor.numpy() - ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate, effects, channels_first) - if ret is not None: - return (paddle.to_tensor(ret[0]), ret[1]) - raise RuntimeError("Failed to apply sox effect") - - -@_mod_utils.requires_sox() -def apply_effects_file( - path: str, - effects: List[List[str]], - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[paddle.Tensor, int]: - """Apply sox effects to the audio file and load the resulting data as Tensor - - Note: - This function works in the way very similar to ``sox`` command, however there are slight - differences. For example, ``sox`` commnad adds certain effects automatically (such as - ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given - effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate`` - effect with desired sampling rate, because internally, ``speed`` effects only alter sampling - rate and leave samples untouched. - - Args: - path (path-like object or file-like object): - effects (List[List[str]]): List of effects. - normalize (bool, optional): - When ``True``, this function always return ``float32``, and sample values are - normalized to ``[-1.0, 1.0]``. - If input file is integer WAV, giving ``False`` will change the resulting Tensor type to - integer type. This argument has no effect for formats other - than integer WAV type. - channels_first (bool, optional): When True, the returned Tensor has dimension `[channel, time]`. - Otherwise, the returned Tensor's dimension is `[time, channel]`. - format (str or None, optional): - Override the format detection with the given format. - Providing the argument might help when libsox can not infer the format - from header or extension, - - Returns: - (Tensor, int): Resulting Tensor and sample rate. - If ``normalize=True``, the resulting Tensor is always ``float32`` type. - If ``normalize=False`` and the input audio file is of integer WAV file, then the - resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported) - If ``channels_first=True``, the resulting Tensor has dimension `[channel, time]`, - otherwise `[time, channel]`. - - Example - Basic usage - >>> - >>> # Defines the effects to apply - >>> effects = [ - ... ['gain', '-n'], # normalises to 0dB - ... ['pitch', '5'], # 5 cent pitch shift - ... ['rate', '8000'], # resample to 8000 Hz - ... ] - >>> - >>> # Apply effects and load data with channels_first=True - >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True) - >>> - >>> # Check the result - >>> waveform.shape - paddle.Size([2, 8000]) - >>> waveform - tensor([[ 5.1151e-03, 1.8073e-02, 2.2188e-02, ..., 1.0431e-07, - -1.4761e-07, 1.8114e-07], - [-2.6924e-03, 2.1860e-03, 1.0650e-02, ..., 6.4122e-07, - -5.6159e-07, 4.8103e-07]]) - >>> sample_rate - 8000 - - Example - Apply random speed perturbation to dataset - >>> - >>> # Load data from file, apply random speed perturbation - >>> class RandomPerturbationFile(paddle.utils.data.Dataset): - ... \"\"\"Given flist, apply random speed perturbation - ... - ... Suppose all the input files are at least one second long. - ... \"\"\" - ... def __init__(self, flist: List[str], sample_rate: int): - ... super().__init__() - ... self.flist = flist - ... self.sample_rate = sample_rate - ... - ... def __getitem__(self, index): - ... speed = 0.5 + 1.5 * random.randn() - ... effects = [ - ... ['gain', '-n', '-10'], # apply 10 db attenuation - ... ['remix', '-'], # merge all the channels - ... ['speed', f'{speed:.5f}'], # duration is now 0.5 ~ 2.0 seconds. - ... ['rate', f'{self.sample_rate}'], - ... ['pad', '0', '1.5'], # add 1.5 seconds silence at the end - ... ['trim', '0', '2'], # get the first 2 seconds - ... ] - ... waveform, _ = paddleaudio.sox_effects.apply_effects_file( - ... self.flist[index], effects) - ... return waveform - ... - ... def __len__(self): - ... return len(self.flist) - ... - >>> dataset = RandomPerturbationFile(file_list, sample_rate=8000) - >>> loader = paddle.utils.data.DataLoader(dataset, batch_size=32) - >>> for batch in loader: - >>> pass - """ - if hasattr(path, "read"): - ret = paddleaudio.apply_effects_fileobj(path, effects, normalize, channels_first, format) - if ret is None: - raise RuntimeError("Failed to load audio from {}".format(path)) - return (paddle.to_tensor(ret[0]), ret[1]) - path = os.fspath(path) - ret = paddleaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format) - if ret is not None: - return (paddle.to_tensor(ret[0]), ret[1]) - raise RuntimeError("Failed to load audio from {}".format(path)) \ No newline at end of file diff --git a/paddlespeech/audio/src/CMakeLists.txt b/paddlespeech/audio/src/CMakeLists.txt deleted file mode 100644 index 4c46fbe2483a850c519ab8cdbda90ca4dac31210..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/CMakeLists.txt +++ /dev/null @@ -1,201 +0,0 @@ -if (MSVC) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -endif() - -################################################################################ -# libpaddleaudio -################################################################################ -set( - LIBPADDLEAUDIO_SOURCES - utils.cpp - ) - -set( - LIBPADDLEAUDIO_INCLUDE_DIRS - ${PROJECT_SOURCE_DIR} - ) - -set( - LIBPADDLEAUDIO_LINK_LIBRARIES - ) - -set( - LIBPADDLEAUDIO_COMPILE_DEFINITIONS) - -#------------------------------------------------------------------------------# -# START OF CUSTOMIZATION LOGICS -#------------------------------------------------------------------------------# - -if(BUILD_SOX) - list( - APPEND - LIBPADDLEAUDIO_LINK_LIBRARIES - libsox - ) - list( - APPEND - LIBPADDLEAUDIO_SOURCES - #sox/io.cpp - #sox/utils.cpp - #sox/effects.cpp - #sox/effects_chain.cpp - #sox/types.cpp - ) - list( - APPEND - LIBPADDLEAUDIO_COMPILE_DEFINITIONS - INCLUDE_SOX - ) -endif() - - -if(BUILD_KALDI) - list( - APPEND - LIBPADDLEAUDIO_LINK_LIBRARIES - libkaldi - ) - list( - APPEND - LIBPADDLEAUDIO_COMPILE_DEFINITIONS - INCLUDE_KALDI - COMPILE_WITHOUT_OPENFST - ) -endif() - -#------------------------------------------------------------------------------# -# END OF CUSTOMIZATION LOGICS -#------------------------------------------------------------------------------# - -function (define_library name source include_dirs link_libraries compile_defs) - add_library(${name} SHARED ${source}) - target_include_directories(${name} PRIVATE ${include_dirs}) - target_link_libraries(${name} ${link_libraries}) - target_compile_definitions(${name} PRIVATE ${compile_defs}) - set_target_properties(${name} PROPERTIES PREFIX "") - if (MSVC) - set_target_properties(${name} PROPERTIES SUFFIX ".pyd") - endif(MSVC) - install( - TARGETS ${name} - LIBRARY DESTINATION lib - RUNTIME DESTINATION lib # For Windows - ) -endfunction() - - -define_library( - libpaddleaudio - "${LIBPADDLEAUDIO_SOURCES}" - "${LIBPADDLEAUDIO_INCLUDE_DIRS}" - "${LIBPADDLEAUDIO_LINK_LIBRARIES}" - "${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}" -) - -if (APPLE) - set(TORCHAUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "") -else() - set(TORCHAUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "") -endif() - - ################################################################################ -# _paddleaudio.so -################################################################################ -if (BUILD_PADDLEAUDIO_PYTHON_EXTENSION) -if (WIN32) - find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development) - set(ADDITIONAL_ITEMS Python3::Python) -endif() -function(define_extension name sources include_dirs libraries definitions) - add_library(${name} SHARED ${sources}) - target_compile_definitions(${name} PRIVATE "${definitions}") - target_include_directories( - ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs}) - target_link_libraries( - ${name} - ${libraries} - ${TORCH_PYTHON_LIBRARY} - ${ADDITIONAL_ITEMS} - ) - set_target_properties(${name} PROPERTIES PREFIX "") - if (MSVC) - set_target_properties(${name} PROPERTIES SUFFIX ".pyd") - endif(MSVC) - if (APPLE) - # https://github.com/facebookarchive/caffe2/issues/854#issuecomment-364538485 - # https://github.com/pytorch/pytorch/commit/73f6715f4725a0723d8171d3131e09ac7abf0666 - set_target_properties(${name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") - endif() - install( - TARGETS ${name} - LIBRARY DESTINATION . - RUNTIME DESTINATION . # For Windows - ) -endfunction() - -set( - EXTENSION_SOURCES - pybind/pybind.cpp - ) -#----------------------------------------------------------------------------# -# START OF CUSTOMIZATION LOGICS -#----------------------------------------------------------------------------# -if(BUILD_SOX) - list( - APPEND - EXTENSION_SOURCES - pybind/sox/effects.cpp - pybind/sox/effects_chain.cpp - pybind/sox/io.cpp - pybind/sox/types.cpp - pybind/sox/utils.cpp - ) -endif() - -if(BUILD_KALDI) - list( - APPEND - EXTENSION_SOURCES - pybind/kaldi/kaldi_feature_wrapper.cc - pybind/kaldi/kaldi_feature.cc - ) -endif() -#----------------------------------------------------------------------------# -# END OF CUSTOMIZATION LOGICS -#----------------------------------------------------------------------------# -define_extension( - _paddleaudio - "${EXTENSION_SOURCES}" - "" - libpaddleaudio - "${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}" - ) -# if(BUILD_CTC_DECODER) -# set( -# DECODER_EXTENSION_SOURCES -# decoder/bindings/pybind.cpp -# ) -# define_extension( -# _paddleaudio_decoder -# "${DECODER_EXTENSION_SOURCES}" -# "" -# "libpaddleaudio_decoder" -# "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}" -# ) -# endif() -# if(USE_FFMPEG) -# set( -# FFMPEG_EXTENSION_SOURCES -# ffmpeg/pybind/typedefs.cpp -# ffmpeg/pybind/pybind.cpp -# ffmpeg/pybind/stream_reader.cpp -# ) -# define_extension( -# _paddleaudio_ffmpeg -# "${FFMPEG_EXTENSION_SOURCES}" -# "${FFMPEG_INCLUDE_DIRS}" -# "libpaddleaudio_ffmpeg" -# "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}" -# ) -# endif() -endif() diff --git a/paddlespeech/audio/src/optional/COPYING b/paddlespeech/audio/src/optional/COPYING deleted file mode 100644 index 0e259d42c996742e9e3cba14c677129b2c1b6311..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/optional/COPYING +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. diff --git a/paddlespeech/audio/src/optional/optional.hpp b/paddlespeech/audio/src/optional/optional.hpp deleted file mode 100644 index bceb41135712a879bf2c205138d54bf06b4f1209..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/optional/optional.hpp +++ /dev/null @@ -1,2182 +0,0 @@ - -/// -// optional - An implementation of std::optional with extensions -// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama) -// -// Documentation available at https://tl.tartanllama.xyz/ -// -// To the extent possible under law, the author(s) have dedicated all -// copyright and related and neighboring rights to this software to the -// public domain worldwide. This software is distributed without any warranty. -// -// You should have received a copy of the CC0 Public Domain Dedication -// along with this software. If not, see -// . -// https://github.com/TartanLlama/optional -/// - -#ifndef TL_OPTIONAL_HPP -#define TL_OPTIONAL_HPP - -#define TL_OPTIONAL_VERSION_MAJOR 1 -#define TL_OPTIONAL_VERSION_MINOR 0 -#define TL_OPTIONAL_VERSION_PATCH 0 - -#include -#include -#include -#include -#include - -#if (defined(_MSC_VER) && _MSC_VER == 1900) -#define TL_OPTIONAL_MSVC2015 -#endif - -#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ - !defined(__clang__)) -#define TL_OPTIONAL_GCC49 -#endif - -#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \ - !defined(__clang__)) -#define TL_OPTIONAL_GCC54 -#endif - -#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \ - !defined(__clang__)) -#define TL_OPTIONAL_GCC55 -#endif - -#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ - !defined(__clang__)) -// GCC < 5 doesn't support overloading on const&& for member functions -#define TL_OPTIONAL_NO_CONSTRR - -// GCC < 5 doesn't support some standard C++11 type traits -#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ - std::has_trivial_copy_constructor::value -#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ - std::has_trivial_copy_assign::value - -// This one will be different for GCC 5.7 if it's ever supported -#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \ - std::is_trivially_destructible::value - -// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks -// std::vector -// for non-copyable types -#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__)) -#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX -#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX -namespace tl { -namespace detail { -template -struct is_trivially_copy_constructible - : std::is_trivially_copy_constructible {}; -#ifdef _GLIBCXX_VECTOR -template -struct is_trivially_copy_constructible> - : std::is_trivially_copy_constructible {}; -#endif -} -} -#endif - -#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ - tl::detail::is_trivially_copy_constructible::value -#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ - std::is_trivially_copy_assignable::value -#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \ - std::is_trivially_destructible::value -#else -#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \ - std::is_trivially_copy_constructible::value -#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \ - std::is_trivially_copy_assignable::value -#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \ - std::is_trivially_destructible::value -#endif - -#if __cplusplus > 201103L -#define TL_OPTIONAL_CXX14 -#endif - -// constexpr implies const in C++11, not C++14 -#if (__cplusplus == 201103L || defined(TL_OPTIONAL_MSVC2015) || \ - defined(TL_OPTIONAL_GCC49)) -#define TL_OPTIONAL_11_CONSTEXPR -#else -#define TL_OPTIONAL_11_CONSTEXPR constexpr -#endif - -namespace tl { -#ifndef TL_MONOSTATE_INPLACE_MUTEX -#define TL_MONOSTATE_INPLACE_MUTEX -/// Used to represent an optional with no data; essentially a bool -class monostate {}; - -/// A tag type to tell optional to construct its value in-place -struct in_place_t { - explicit in_place_t() = default; -}; -/// A tag to tell optional to construct its value in-place -static constexpr in_place_t in_place{}; -#endif - -template -class optional; - -namespace detail { -#ifndef TL_TRAITS_MUTEX -#define TL_TRAITS_MUTEX -// C++14-style aliases for brevity -template -using remove_const_t = typename std::remove_const::type; -template -using remove_reference_t = typename std::remove_reference::type; -template -using decay_t = typename std::decay::type; -template -using enable_if_t = typename std::enable_if::type; -template -using conditional_t = typename std::conditional::type; - -// std::conjunction from C++17 -template -struct conjunction : std::true_type {}; -template -struct conjunction : B {}; -template -struct conjunction - : std::conditional, B>::type {}; - -#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L -#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND -#endif - -// In C++11 mode, there's an issue in libc++'s std::mem_fn -// which results in a hard-error when using it in a noexcept expression -// in some cases. This is a check to workaround the common failing case. -#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND -template -struct is_pointer_to_non_const_member_func : std::false_type {}; -template -struct is_pointer_to_non_const_member_func - : std::true_type {}; -template -struct is_pointer_to_non_const_member_func - : std::true_type {}; -template -struct is_pointer_to_non_const_member_func - : std::true_type {}; -template -struct is_pointer_to_non_const_member_func - : std::true_type {}; -template -struct is_pointer_to_non_const_member_func - : std::true_type {}; -template -struct is_pointer_to_non_const_member_func - : std::true_type {}; - -template -struct is_const_or_const_ref : std::false_type {}; -template -struct is_const_or_const_ref : std::true_type {}; -template -struct is_const_or_const_ref : std::true_type {}; -#endif - -// std::invoke from C++17 -// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround -template < - typename Fn, - typename... Args, -#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND - typename = enable_if_t::value && - is_const_or_const_ref::value)>, -#endif - typename = enable_if_t>::value>, - int = 0> -constexpr auto invoke(Fn &&f, Args &&... args) noexcept( - noexcept(std::mem_fn(f)(std::forward(args)...))) - -> decltype(std::mem_fn(f)(std::forward(args)...)) { - return std::mem_fn(f)(std::forward(args)...); -} - -template >::value>> -constexpr auto invoke(Fn &&f, Args &&... args) noexcept( - noexcept(std::forward(f)(std::forward(args)...))) - -> decltype(std::forward(f)(std::forward(args)...)) { - return std::forward(f)(std::forward(args)...); -} - -// std::invoke_result from C++17 -template -struct invoke_result_impl; - -template -struct invoke_result_impl< - F, - decltype(detail::invoke(std::declval(), std::declval()...), void()), - Us...> { - using type = - decltype(detail::invoke(std::declval(), std::declval()...)); -}; - -template -using invoke_result = invoke_result_impl; - -template -using invoke_result_t = typename invoke_result::type; - -#if defined(_MSC_VER) && _MSC_VER <= 1900 -// TODO make a version which works with MSVC 2015 -template -struct is_swappable : std::true_type {}; - -template -struct is_nothrow_swappable : std::true_type {}; -#else -// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept -namespace swap_adl_tests { -// if swap ADL finds this then it would call std::swap otherwise (same -// signature) -struct tag {}; - -template -tag swap(T &, T &); -template -tag swap(T (&a)[N], T (&b)[N]); - -// helper functions to test if an unqualified swap is possible, and if it -// becomes std::swap -template -std::false_type can_swap(...) noexcept(false); -template (), std::declval()))> -std::true_type can_swap(int) noexcept(noexcept(swap(std::declval(), - std::declval()))); - -template -std::false_type uses_std(...); -template -std::is_same(), std::declval())), tag> -uses_std(int); - -template -struct is_std_swap_noexcept - : std::integral_constant::value && - std::is_nothrow_move_assignable::value> {}; - -template -struct is_std_swap_noexcept : is_std_swap_noexcept {}; - -template -struct is_adl_swap_noexcept - : std::integral_constant(0))> {}; -} // namespace swap_adl_tests - -template -struct is_swappable - : std::integral_constant< - bool, - decltype(detail::swap_adl_tests::can_swap(0))::value && - (!decltype(detail::swap_adl_tests::uses_std(0))::value || - (std::is_move_assignable::value && - std::is_move_constructible::value))> {}; - -template -struct is_swappable - : std::integral_constant< - bool, - decltype(detail::swap_adl_tests::can_swap(0))::value && - (!decltype( - detail::swap_adl_tests::uses_std(0))::value || - is_swappable::value)> {}; - -template -struct is_nothrow_swappable - : std::integral_constant< - bool, - is_swappable::value && - ((decltype(detail::swap_adl_tests::uses_std(0))::value - &&detail::swap_adl_tests::is_std_swap_noexcept::value) || - (!decltype(detail::swap_adl_tests::uses_std(0))::value && - detail::swap_adl_tests::is_adl_swap_noexcept::value))> { -}; -#endif -#endif - -// std::void_t from C++17 -template -struct voider { - using type = void; -}; -template -using void_t = typename voider::type; - -// Trait for checking if a type is a tl::optional -template -struct is_optional_impl : std::false_type {}; -template -struct is_optional_impl> : std::true_type {}; -template -using is_optional = is_optional_impl>; - -// Change void to tl::monostate -template -using fixup_void = conditional_t::value, monostate, U>; - -template > -using get_map_return = optional>>; - -// Check if invoking F for some Us returns void -template -struct returns_void_impl; -template -struct returns_void_impl>, U...> - : std::is_void> {}; -template -using returns_void = returns_void_impl; - -template -using enable_if_ret_void = enable_if_t::value>; - -template -using disable_if_ret_void = enable_if_t::value>; - -template -using enable_forward_value = - detail::enable_if_t::value && - !std::is_same, in_place_t>::value && - !std::is_same, detail::decay_t>::value>; - -template -using enable_from_other = detail::enable_if_t< - std::is_constructible::value && - !std::is_constructible &>::value && - !std::is_constructible &&>::value && - !std::is_constructible &>::value && - !std::is_constructible &&>::value && - !std::is_convertible &, T>::value && - !std::is_convertible &&, T>::value && - !std::is_convertible &, T>::value && - !std::is_convertible &&, T>::value>; - -template -using enable_assign_forward = detail::enable_if_t< - !std::is_same, detail::decay_t>::value && - !detail::conjunction, - std::is_same>>::value && - std::is_constructible::value && std::is_assignable::value>; - -template -using enable_assign_from_other = detail::enable_if_t< - std::is_constructible::value && - std::is_assignable::value && - !std::is_constructible &>::value && - !std::is_constructible &&>::value && - !std::is_constructible &>::value && - !std::is_constructible &&>::value && - !std::is_convertible &, T>::value && - !std::is_convertible &&, T>::value && - !std::is_convertible &, T>::value && - !std::is_convertible &&, T>::value && - !std::is_assignable &>::value && - !std::is_assignable &&>::value && - !std::is_assignable &>::value && - !std::is_assignable &&>::value>; - -// The storage base manages the actual storage, and correctly propagates -// trivial destruction from T. This case is for when T is not trivially -// destructible. -template ::value> -struct optional_storage_base { - TL_OPTIONAL_11_CONSTEXPR optional_storage_base() noexcept - : m_dummy(), - m_has_value(false) {} - - template - TL_OPTIONAL_11_CONSTEXPR optional_storage_base(in_place_t, U &&... u) - : m_value(std::forward(u)...), m_has_value(true) {} - - ~optional_storage_base() { - if (m_has_value) { - m_value.~T(); - m_has_value = false; - } - } - - struct dummy {}; - union { - dummy m_dummy; - T m_value; - }; - - bool m_has_value; -}; - -// This case is for when T is trivially destructible. -template -struct optional_storage_base { - TL_OPTIONAL_11_CONSTEXPR optional_storage_base() noexcept - : m_dummy(), - m_has_value(false) {} - - template - TL_OPTIONAL_11_CONSTEXPR optional_storage_base(in_place_t, U &&... u) - : m_value(std::forward(u)...), m_has_value(true) {} - - // No destructor, so this class is trivially destructible - - struct dummy {}; - union { - dummy m_dummy; - T m_value; - }; - - bool m_has_value = false; -}; - -// This base class provides some handy member functions which can be used in -// further derived classes -template -struct optional_operations_base : optional_storage_base { - using optional_storage_base::optional_storage_base; - - void hard_reset() noexcept { - get().~T(); - this->m_has_value = false; - } - - template - void construct(Args &&... args) noexcept { - new (std::addressof(this->m_value)) T(std::forward(args)...); - this->m_has_value = true; - } - - template - void assign(Opt &&rhs) { - if (this->has_value()) { - if (rhs.has_value()) { - this->m_value = std::forward(rhs).get(); - } else { - this->m_value.~T(); - this->m_has_value = false; - } - } - - else if (rhs.has_value()) { - construct(std::forward(rhs).get()); - } - } - - bool has_value() const { return this->m_has_value; } - - TL_OPTIONAL_11_CONSTEXPR T &get() & { return this->m_value; } - TL_OPTIONAL_11_CONSTEXPR const T &get() const & { return this->m_value; } - TL_OPTIONAL_11_CONSTEXPR T &&get() && { return std::move(this->m_value); } -#ifndef TL_OPTIONAL_NO_CONSTRR - constexpr const T &&get() const && { return std::move(this->m_value); } -#endif -}; - -// This class manages conditionally having a trivial copy constructor -// This specialization is for when T is trivially copy constructible -template -struct optional_copy_base : optional_operations_base { - using optional_operations_base::optional_operations_base; -}; - -// This specialization is for when T is not trivially copy constructible -template -struct optional_copy_base : optional_operations_base { - using optional_operations_base::optional_operations_base; - - optional_copy_base() = default; - optional_copy_base(const optional_copy_base &rhs) - : optional_operations_base() { - if (rhs.has_value()) { - this->construct(rhs.get()); - } else { - this->m_has_value = false; - } - } - - optional_copy_base(optional_copy_base &&rhs) = default; - optional_copy_base &operator=(const optional_copy_base &rhs) = default; - optional_copy_base &operator=(optional_copy_base &&rhs) = default; -}; - -// This class manages conditionally having a trivial move constructor -// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it -// doesn't implement an analogue to std::is_trivially_move_constructible. We -// have to make do with a non-trivial move constructor even if T is trivially -// move constructible -#ifndef TL_OPTIONAL_GCC49 -template ::value> -struct optional_move_base : optional_copy_base { - using optional_copy_base::optional_copy_base; -}; -#else -template -struct optional_move_base; -#endif -template -struct optional_move_base : optional_copy_base { - using optional_copy_base::optional_copy_base; - - optional_move_base() = default; - optional_move_base(const optional_move_base &rhs) = default; - - optional_move_base(optional_move_base &&rhs) noexcept( - std::is_nothrow_move_constructible::value) { - if (rhs.has_value()) { - this->construct(std::move(rhs.get())); - } else { - this->m_has_value = false; - } - } - optional_move_base &operator=(const optional_move_base &rhs) = default; - optional_move_base &operator=(optional_move_base &&rhs) = default; -}; - -// This class manages conditionally having a trivial copy assignment operator -template -struct optional_copy_assign_base : optional_move_base { - using optional_move_base::optional_move_base; -}; - -template -struct optional_copy_assign_base : optional_move_base { - using optional_move_base::optional_move_base; - - optional_copy_assign_base() = default; - optional_copy_assign_base(const optional_copy_assign_base &rhs) = default; - - optional_copy_assign_base(optional_copy_assign_base &&rhs) = default; - optional_copy_assign_base &operator=(const optional_copy_assign_base &rhs) { - this->assign(rhs); - return *this; - } - optional_copy_assign_base &operator=(optional_copy_assign_base &&rhs) = - default; -}; - -// This class manages conditionally having a trivial move assignment operator -// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it -// doesn't implement an analogue to std::is_trivially_move_assignable. We have -// to make do with a non-trivial move assignment operator even if T is trivially -// move assignable -#ifndef TL_OPTIONAL_GCC49 -template ::value - &&std::is_trivially_move_constructible::value - &&std::is_trivially_move_assignable::value> -struct optional_move_assign_base : optional_copy_assign_base { - using optional_copy_assign_base::optional_copy_assign_base; -}; -#else -template -struct optional_move_assign_base; -#endif - -template -struct optional_move_assign_base : optional_copy_assign_base { - using optional_copy_assign_base::optional_copy_assign_base; - - optional_move_assign_base() = default; - optional_move_assign_base(const optional_move_assign_base &rhs) = default; - - optional_move_assign_base(optional_move_assign_base &&rhs) = default; - - optional_move_assign_base &operator=(const optional_move_assign_base &rhs) = - default; - - optional_move_assign_base & - operator=(optional_move_assign_base &&rhs) noexcept( - std::is_nothrow_move_constructible::value - &&std::is_nothrow_move_assignable::value) { - this->assign(std::move(rhs)); - return *this; - } -}; - -// optional_delete_ctor_base will conditionally delete copy and move -// constructors depending on whether T is copy/move constructible -template ::value, - bool EnableMove = std::is_move_constructible::value> -struct optional_delete_ctor_base { - optional_delete_ctor_base() = default; - optional_delete_ctor_base(const optional_delete_ctor_base &) = default; - optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default; - optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) = - default; - optional_delete_ctor_base &operator=( - optional_delete_ctor_base &&) noexcept = default; -}; - -template -struct optional_delete_ctor_base { - optional_delete_ctor_base() = default; - optional_delete_ctor_base(const optional_delete_ctor_base &) = default; - optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete; - optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) = - default; - optional_delete_ctor_base &operator=( - optional_delete_ctor_base &&) noexcept = default; -}; - -template -struct optional_delete_ctor_base { - optional_delete_ctor_base() = default; - optional_delete_ctor_base(const optional_delete_ctor_base &) = delete; - optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default; - optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) = - default; - optional_delete_ctor_base &operator=( - optional_delete_ctor_base &&) noexcept = default; -}; - -template -struct optional_delete_ctor_base { - optional_delete_ctor_base() = default; - optional_delete_ctor_base(const optional_delete_ctor_base &) = delete; - optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete; - optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) = - default; - optional_delete_ctor_base &operator=( - optional_delete_ctor_base &&) noexcept = default; -}; - -// optional_delete_assign_base will conditionally delete copy and move -// constructors depending on whether T is copy/move constructible + assignable -template ::value && - std::is_copy_assignable::value), - bool EnableMove = (std::is_move_constructible::value && - std::is_move_assignable::value)> -struct optional_delete_assign_base { - optional_delete_assign_base() = default; - optional_delete_assign_base(const optional_delete_assign_base &) = default; - optional_delete_assign_base(optional_delete_assign_base &&) noexcept = - default; - optional_delete_assign_base &operator=( - const optional_delete_assign_base &) = default; - optional_delete_assign_base &operator=( - optional_delete_assign_base &&) noexcept = default; -}; - -template -struct optional_delete_assign_base { - optional_delete_assign_base() = default; - optional_delete_assign_base(const optional_delete_assign_base &) = default; - optional_delete_assign_base(optional_delete_assign_base &&) noexcept = - default; - optional_delete_assign_base &operator=( - const optional_delete_assign_base &) = default; - optional_delete_assign_base &operator=( - optional_delete_assign_base &&) noexcept = delete; -}; - -template -struct optional_delete_assign_base { - optional_delete_assign_base() = default; - optional_delete_assign_base(const optional_delete_assign_base &) = default; - optional_delete_assign_base(optional_delete_assign_base &&) noexcept = - default; - optional_delete_assign_base &operator=( - const optional_delete_assign_base &) = delete; - optional_delete_assign_base &operator=( - optional_delete_assign_base &&) noexcept = default; -}; - -template -struct optional_delete_assign_base { - optional_delete_assign_base() = default; - optional_delete_assign_base(const optional_delete_assign_base &) = default; - optional_delete_assign_base(optional_delete_assign_base &&) noexcept = - default; - optional_delete_assign_base &operator=( - const optional_delete_assign_base &) = delete; - optional_delete_assign_base &operator=( - optional_delete_assign_base &&) noexcept = delete; -}; - -} // namespace detail - -/// A tag type to represent an empty optional -struct nullopt_t { - struct do_not_use {}; - constexpr explicit nullopt_t(do_not_use, do_not_use) noexcept {} -}; -/// Represents an empty optional -static constexpr nullopt_t nullopt{nullopt_t::do_not_use{}, - nullopt_t::do_not_use{}}; - -class bad_optional_access : public std::exception { - public: - bad_optional_access() = default; - const char *what() const noexcept { return "Optional has no value"; } -}; - -/// An optional object is an object that contains the storage for another -/// object and manages the lifetime of this contained object, if any. The -/// contained object may be initialized after the optional object has been -/// initialized, and may be destroyed before the optional object has been -/// destroyed. The initialization state of the contained object is tracked by -/// the optional object. -template -class optional : private detail::optional_move_assign_base, - private detail::optional_delete_ctor_base, - private detail::optional_delete_assign_base { - using base = detail::optional_move_assign_base; - - static_assert(!std::is_same::value, - "instantiation of optional with in_place_t is ill-formed"); - static_assert(!std::is_same, nullopt_t>::value, - "instantiation of optional with nullopt_t is ill-formed"); - - public: -// The different versions for C++14 and 11 are needed because deduced return -// types are not SFINAE-safe. This provides better support for things like -// generic lambdas. C.f. -// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0826r0.html -#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \ - !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55) - /// Carries out some operation which returns an optional on the stored - /// object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - - template - TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : result(nullopt); - } - - template - constexpr auto and_then(F &&f) const & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr auto and_then(F &&f) const && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : result(nullopt); - } -#endif -#else - /// Carries out some operation which returns an optional on the stored - /// object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t and_then(F &&f) & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - - template - TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t and_then( - F &&f) && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : result(nullopt); - } - - template - constexpr detail::invoke_result_t and_then(F &&f) const & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr detail::invoke_result_t and_then(F &&f) const && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : result(nullopt); - } -#endif -#endif - -#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \ - !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55) - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) & { - return optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) && { - return optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr auto map(F &&f) const & { - return optional_map_impl(*this, std::forward(f)); - } - - template - constexpr auto map(F &&f) const && { - return optional_map_impl(std::move(*this), std::forward(f)); - } -#else - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR decltype( - optional_map_impl(std::declval(), std::declval())) - map(F &&f) & { - return optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR decltype( - optional_map_impl(std::declval(), std::declval())) - map(F &&f) && { - return optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr decltype(optional_map_impl(std::declval(), - std::declval())) - map(F &&f) const & { - return optional_map_impl(*this, std::forward(f)); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr decltype(optional_map_impl(std::declval(), - std::declval())) - map(F &&f) const && { - return optional_map_impl(std::move(*this), std::forward(f)); - } -#endif -#endif - -#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \ - !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55) - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) & { - return optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) && { - return optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr auto transform(F &&f) const & { - return optional_map_impl(*this, std::forward(f)); - } - - template - constexpr auto transform(F &&f) const && { - return optional_map_impl(std::move(*this), std::forward(f)); - } -#else - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR decltype( - optional_map_impl(std::declval(), std::declval())) - transform(F &&f) & { - return optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR decltype( - optional_map_impl(std::declval(), std::declval())) - transform(F &&f) && { - return optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr decltype(optional_map_impl(std::declval(), - std::declval())) - transform(F &&f) const & { - return optional_map_impl(*this, std::forward(f)); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr decltype(optional_map_impl(std::declval(), - std::declval())) - transform(F &&f) const && { - return optional_map_impl(std::move(*this), std::forward(f)); - } -#endif -#endif - - /// Calls `f` if the optional is empty - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & { - if (has_value()) return *this; - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & { - return has_value() ? *this : std::forward(f)(); - } - - template * = nullptr> - optional or_else(F &&f) && { - if (has_value()) return std::move(*this); - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) && { - return has_value() ? std::move(*this) : std::forward(f)(); - } - - template * = nullptr> - optional or_else(F &&f) const & { - if (has_value()) return *this; - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) const & { - return has_value() ? *this : std::forward(f)(); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template * = nullptr> - optional or_else(F &&f) const && { - if (has_value()) return std::move(*this); - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional or_else(F &&f) const && { - return has_value() ? std::move(*this) : std::forward(f)(); - } -#endif - - /// Maps the stored value with `f` if there is one, otherwise returns `u`. - template - U map_or(F &&f, U &&u) & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u); - } - - template - U map_or(F &&f, U &&u) && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u); - } - - template - U map_or(F &&f, U &&u) const & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - U map_or(F &&f, U &&u) const && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u); - } -#endif - - /// Maps the stored value with `f` if there is one, otherwise calls - /// `u` and returns the result. - template - detail::invoke_result_t map_or_else(F &&f, U &&u) & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u)(); - } - - template - detail::invoke_result_t map_or_else(F &&f, U &&u) && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u)(); - } - - template - detail::invoke_result_t map_or_else(F &&f, U &&u) const & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u)(); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - detail::invoke_result_t map_or_else(F &&f, U &&u) const && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u)(); - } -#endif - - /// Returns `u` if `*this` has a value, otherwise an empty optional. - template - constexpr optional::type> conjunction(U &&u) const { - using result = optional>; - return has_value() ? result{u} : result{nullopt}; - } - - /// Returns `rhs` if `*this` is empty, otherwise the current value. - TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) & { - return has_value() ? *this : rhs; - } - - constexpr optional disjunction(const optional &rhs) const & { - return has_value() ? *this : rhs; - } - - TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) && { - return has_value() ? std::move(*this) : rhs; - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - constexpr optional disjunction(const optional &rhs) const && { - return has_value() ? std::move(*this) : rhs; - } -#endif - - TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) & { - return has_value() ? *this : std::move(rhs); - } - - constexpr optional disjunction(optional &&rhs) const & { - return has_value() ? *this : std::move(rhs); - } - - TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) && { - return has_value() ? std::move(*this) : std::move(rhs); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - constexpr optional disjunction(optional &&rhs) const && { - return has_value() ? std::move(*this) : std::move(rhs); - } -#endif - - /// Takes the value out of the optional, leaving it empty - optional take() { - optional ret = std::move(*this); - reset(); - return ret; - } - - using value_type = T; - - /// Constructs an optional that does not contain a value. - constexpr optional() noexcept = default; - - constexpr optional(nullopt_t) noexcept {} - - /// Copy constructor - /// - /// If `rhs` contains a value, the stored value is direct-initialized with - /// it. Otherwise, the constructed optional is empty. - TL_OPTIONAL_11_CONSTEXPR optional(const optional &rhs) = default; - - /// Move constructor - /// - /// If `rhs` contains a value, the stored value is direct-initialized with - /// it. Otherwise, the constructed optional is empty. - TL_OPTIONAL_11_CONSTEXPR optional(optional &&rhs) = default; - - /// Constructs the stored value in-place using the given arguments. - template - constexpr explicit optional( - detail::enable_if_t::value, - in_place_t>, - Args &&... args) - : base(in_place, std::forward(args)...) {} - - template - TL_OPTIONAL_11_CONSTEXPR explicit optional( - detail::enable_if_t &, - Args &&...>::value, - in_place_t>, - std::initializer_list il, - Args &&... args) { - this->construct(il, std::forward(args)...); - } - - /// Constructs the stored value with `u`. - template < - class U = T, - detail::enable_if_t::value> * = nullptr, - detail::enable_forward_value * = nullptr> - constexpr optional(U &&u) : base(in_place, std::forward(u)) {} - - template < - class U = T, - detail::enable_if_t::value> * = nullptr, - detail::enable_forward_value * = nullptr> - constexpr explicit optional(U &&u) : base(in_place, std::forward(u)) {} - - /// Converting copy constructor. - template * = nullptr, - detail::enable_if_t::value> * = - nullptr> - optional(const optional &rhs) { - if (rhs.has_value()) { - this->construct(*rhs); - } - } - - template * = nullptr, - detail::enable_if_t::value> * = - nullptr> - explicit optional(const optional &rhs) { - if (rhs.has_value()) { - this->construct(*rhs); - } - } - - /// Converting move constructor. - template < - class U, - detail::enable_from_other * = nullptr, - detail::enable_if_t::value> * = nullptr> - optional(optional &&rhs) { - if (rhs.has_value()) { - this->construct(std::move(*rhs)); - } - } - - template < - class U, - detail::enable_from_other * = nullptr, - detail::enable_if_t::value> * = nullptr> - explicit optional(optional &&rhs) { - if (rhs.has_value()) { - this->construct(std::move(*rhs)); - } - } - - /// Destroys the stored value if there is one. - ~optional() = default; - - /// Assignment to empty. - /// - /// Destroys the current value if there is one. - optional &operator=(nullopt_t) noexcept { - if (has_value()) { - this->m_value.~T(); - this->m_has_value = false; - } - - return *this; - } - - /// Copy assignment. - /// - /// Copies the value from `rhs` if there is one. Otherwise resets the stored - /// value in `*this`. - optional &operator=(const optional &rhs) = default; - - /// Move assignment. - /// - /// Moves the value from `rhs` if there is one. Otherwise resets the stored - /// value in `*this`. - optional &operator=(optional &&rhs) = default; - - /// Assigns the stored value from `u`, destroying the old value if there was - /// one. - template * = nullptr> - optional &operator=(U &&u) { - if (has_value()) { - this->m_value = std::forward(u); - } else { - this->construct(std::forward(u)); - } - - return *this; - } - - /// Converting copy assignment operator. - /// - /// Copies the value from `rhs` if there is one. Otherwise resets the stored - /// value in `*this`. - template * = nullptr> - optional &operator=(const optional &rhs) { - if (has_value()) { - if (rhs.has_value()) { - this->m_value = *rhs; - } else { - this->hard_reset(); - } - } - - if (rhs.has_value()) { - this->construct(*rhs); - } - - return *this; - } - - // TODO check exception guarantee - /// Converting move assignment operator. - /// - /// Moves the value from `rhs` if there is one. Otherwise resets the stored - /// value in `*this`. - template * = nullptr> - optional &operator=(optional &&rhs) { - if (has_value()) { - if (rhs.has_value()) { - this->m_value = std::move(*rhs); - } else { - this->hard_reset(); - } - } - - if (rhs.has_value()) { - this->construct(std::move(*rhs)); - } - - return *this; - } - - /// Constructs the value in-place, destroying the current one if there is - /// one. - template - T &emplace(Args &&... args) { - static_assert(std::is_constructible::value, - "T must be constructible with Args"); - - *this = nullopt; - this->construct(std::forward(args)...); - return value(); - } - - template - detail::enable_if_t< - std::is_constructible &, Args &&...>::value, - T &> - emplace(std::initializer_list il, Args &&... args) { - *this = nullopt; - this->construct(il, std::forward(args)...); - return value(); - } - - /// Swaps this optional with the other. - /// - /// If neither optionals have a value, nothing happens. - /// If both have a value, the values are swapped. - /// If one has a value, it is moved to the other and the movee is left - /// valueless. - void swap(optional &rhs) noexcept( - std::is_nothrow_move_constructible::value - &&detail::is_nothrow_swappable::value) { - using std::swap; - if (has_value()) { - if (rhs.has_value()) { - swap(**this, *rhs); - } else { - new (std::addressof(rhs.m_value)) T(std::move(this->m_value)); - this->m_value.T::~T(); - } - } else if (rhs.has_value()) { - new (std::addressof(this->m_value)) T(std::move(rhs.m_value)); - rhs.m_value.T::~T(); - } - swap(this->m_has_value, rhs.m_has_value); - } - - /// Returns a pointer to the stored value - constexpr const T *operator->() const { - return std::addressof(this->m_value); - } - - TL_OPTIONAL_11_CONSTEXPR T *operator->() { - return std::addressof(this->m_value); - } - - /// Returns the stored value - TL_OPTIONAL_11_CONSTEXPR T &operator*() & { return this->m_value; } - - constexpr const T &operator*() const & { return this->m_value; } - - TL_OPTIONAL_11_CONSTEXPR T &&operator*() && { - return std::move(this->m_value); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - constexpr const T &&operator*() const && { - return std::move(this->m_value); - } -#endif - - /// Returns whether or not the optional has a value - constexpr bool has_value() const noexcept { return this->m_has_value; } - - constexpr explicit operator bool() const noexcept { - return this->m_has_value; - } - - /// Returns the contained value if there is one, otherwise throws - /// bad_optional_access - TL_OPTIONAL_11_CONSTEXPR T &value() & { - if (has_value()) return this->m_value; - throw bad_optional_access(); - } - TL_OPTIONAL_11_CONSTEXPR const T &value() const & { - if (has_value()) return this->m_value; - throw bad_optional_access(); - } - TL_OPTIONAL_11_CONSTEXPR T &&value() && { - if (has_value()) return std::move(this->m_value); - throw bad_optional_access(); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - TL_OPTIONAL_11_CONSTEXPR const T &&value() const && { - if (has_value()) return std::move(this->m_value); - throw bad_optional_access(); - } -#endif - - /// Returns the stored value if there is one, otherwise returns `u` - template - constexpr T value_or(U &&u) const & { - static_assert(std::is_copy_constructible::value && - std::is_convertible::value, - "T must be copy constructible and convertible from U"); - return has_value() ? **this : static_cast(std::forward(u)); - } - - template - TL_OPTIONAL_11_CONSTEXPR T value_or(U &&u) && { - static_assert(std::is_move_constructible::value && - std::is_convertible::value, - "T must be move constructible and convertible from U"); - return has_value() ? **this : static_cast(std::forward(u)); - } - - /// Destroys the stored value if one exists, making the optional empty - void reset() noexcept { - if (has_value()) { - this->m_value.~T(); - this->m_has_value = false; - } - } -}; // namespace tl - -/// Compares two optional objects -template -inline constexpr bool operator==(const optional &lhs, - const optional &rhs) { - return lhs.has_value() == rhs.has_value() && - (!lhs.has_value() || *lhs == *rhs); -} -template -inline constexpr bool operator!=(const optional &lhs, - const optional &rhs) { - return lhs.has_value() != rhs.has_value() || - (lhs.has_value() && *lhs != *rhs); -} -template -inline constexpr bool operator<(const optional &lhs, - const optional &rhs) { - return rhs.has_value() && (!lhs.has_value() || *lhs < *rhs); -} -template -inline constexpr bool operator>(const optional &lhs, - const optional &rhs) { - return lhs.has_value() && (!rhs.has_value() || *lhs > *rhs); -} -template -inline constexpr bool operator<=(const optional &lhs, - const optional &rhs) { - return !lhs.has_value() || (rhs.has_value() && *lhs <= *rhs); -} -template -inline constexpr bool operator>=(const optional &lhs, - const optional &rhs) { - return !rhs.has_value() || (lhs.has_value() && *lhs >= *rhs); -} - -/// Compares an optional to a `nullopt` -template -inline constexpr bool operator==(const optional &lhs, nullopt_t) noexcept { - return !lhs.has_value(); -} -template -inline constexpr bool operator==(nullopt_t, const optional &rhs) noexcept { - return !rhs.has_value(); -} -template -inline constexpr bool operator!=(const optional &lhs, nullopt_t) noexcept { - return lhs.has_value(); -} -template -inline constexpr bool operator!=(nullopt_t, const optional &rhs) noexcept { - return rhs.has_value(); -} -template -inline constexpr bool operator<(const optional &, nullopt_t) noexcept { - return false; -} -template -inline constexpr bool operator<(nullopt_t, const optional &rhs) noexcept { - return rhs.has_value(); -} -template -inline constexpr bool operator<=(const optional &lhs, nullopt_t) noexcept { - return !lhs.has_value(); -} -template -inline constexpr bool operator<=(nullopt_t, const optional &) noexcept { - return true; -} -template -inline constexpr bool operator>(const optional &lhs, nullopt_t) noexcept { - return lhs.has_value(); -} -template -inline constexpr bool operator>(nullopt_t, const optional &) noexcept { - return false; -} -template -inline constexpr bool operator>=(const optional &, nullopt_t) noexcept { - return true; -} -template -inline constexpr bool operator>=(nullopt_t, const optional &rhs) noexcept { - return !rhs.has_value(); -} - -/// Compares the optional with a value. -template -inline constexpr bool operator==(const optional &lhs, const U &rhs) { - return lhs.has_value() ? *lhs == rhs : false; -} -template -inline constexpr bool operator==(const U &lhs, const optional &rhs) { - return rhs.has_value() ? lhs == *rhs : false; -} -template -inline constexpr bool operator!=(const optional &lhs, const U &rhs) { - return lhs.has_value() ? *lhs != rhs : true; -} -template -inline constexpr bool operator!=(const U &lhs, const optional &rhs) { - return rhs.has_value() ? lhs != *rhs : true; -} -template -inline constexpr bool operator<(const optional &lhs, const U &rhs) { - return lhs.has_value() ? *lhs < rhs : true; -} -template -inline constexpr bool operator<(const U &lhs, const optional &rhs) { - return rhs.has_value() ? lhs < *rhs : false; -} -template -inline constexpr bool operator<=(const optional &lhs, const U &rhs) { - return lhs.has_value() ? *lhs <= rhs : true; -} -template -inline constexpr bool operator<=(const U &lhs, const optional &rhs) { - return rhs.has_value() ? lhs <= *rhs : false; -} -template -inline constexpr bool operator>(const optional &lhs, const U &rhs) { - return lhs.has_value() ? *lhs > rhs : false; -} -template -inline constexpr bool operator>(const U &lhs, const optional &rhs) { - return rhs.has_value() ? lhs > *rhs : true; -} -template -inline constexpr bool operator>=(const optional &lhs, const U &rhs) { - return lhs.has_value() ? *lhs >= rhs : false; -} -template -inline constexpr bool operator>=(const U &lhs, const optional &rhs) { - return rhs.has_value() ? lhs >= *rhs : true; -} - -template ::value> * = nullptr, - detail::enable_if_t::value> * = nullptr> -void swap(optional &lhs, - optional &rhs) noexcept(noexcept(lhs.swap(rhs))) { - return lhs.swap(rhs); -} - -namespace detail { -struct i_am_secret {}; -} // namespace detail - -template ::value, - detail::decay_t, - T>> -inline constexpr optional make_optional(U &&v) { - return optional(std::forward(v)); -} - -template -inline constexpr optional make_optional(Args &&... args) { - return optional(in_place, std::forward(args)...); -} -template -inline constexpr optional make_optional(std::initializer_list il, - Args &&... args) { - return optional(in_place, il, std::forward(args)...); -} - -#if __cplusplus >= 201703L -template -optional(T)->optional; -#endif - -/// \exclude -namespace detail { -#ifdef TL_OPTIONAL_CXX14 -template (), - *std::declval())), - detail::enable_if_t::value> * = nullptr> -constexpr auto optional_map_impl(Opt &&opt, F &&f) { - return opt.has_value() - ? detail::invoke(std::forward(f), *std::forward(opt)) - : optional(nullopt); -} - -template (), - *std::declval())), - detail::enable_if_t::value> * = nullptr> -auto optional_map_impl(Opt &&opt, F &&f) { - if (opt.has_value()) { - detail::invoke(std::forward(f), *std::forward(opt)); - return make_optional(monostate{}); - } - - return optional(nullopt); -} -#else -template (), - *std::declval())), - detail::enable_if_t::value> * = nullptr> - -constexpr auto optional_map_impl(Opt &&opt, F &&f) -> optional { - return opt.has_value() - ? detail::invoke(std::forward(f), *std::forward(opt)) - : optional(nullopt); -} - -template (), - *std::declval())), - detail::enable_if_t::value> * = nullptr> - -auto optional_map_impl(Opt &&opt, F &&f) -> optional { - if (opt.has_value()) { - detail::invoke(std::forward(f), *std::forward(opt)); - return monostate{}; - } - - return nullopt; -} -#endif -} // namespace detail - -/// Specialization for when `T` is a reference. `optional` acts similarly -/// to a `T*`, but provides more operations and shows intent more clearly. -template -class optional { - public: -// The different versions for C++14 and 11 are needed because deduced return -// types are not SFINAE-safe. This provides better support for things like -// generic lambdas. C.f. -// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0826r0.html -#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \ - !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55) - - /// Carries out some operation which returns an optional on the stored - /// object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - - template - TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - - template - constexpr auto and_then(F &&f) const & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr auto and_then(F &&f) const && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } -#endif -#else - /// Carries out some operation which returns an optional on the stored - /// object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t and_then(F &&f) & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - - template - TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t and_then( - F &&f) && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - - template - constexpr detail::invoke_result_t and_then(F &&f) const & { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr detail::invoke_result_t and_then(F &&f) const && { - using result = detail::invoke_result_t; - static_assert(detail::is_optional::value, - "F must return an optional"); - - return has_value() ? detail::invoke(std::forward(f), **this) - : result(nullopt); - } -#endif -#endif - -#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \ - !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55) - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) & { - return detail::optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr auto map(F &&f) const & { - return detail::optional_map_impl(*this, std::forward(f)); - } - - template - constexpr auto map(F &&f) const && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } -#else - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl( - std::declval(), std::declval())) - map(F &&f) & { - return detail::optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl( - std::declval(), std::declval())) - map(F &&f) && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr decltype(detail::optional_map_impl( - std::declval(), std::declval())) - map(F &&f) const & { - return detail::optional_map_impl(*this, std::forward(f)); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr decltype(detail::optional_map_impl( - std::declval(), std::declval())) - map(F &&f) const && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } -#endif -#endif - -#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \ - !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55) - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) & { - return detail::optional_map_impl(*this, std::forward(f)); - } - - template - TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr auto transform(F &&f) const & { - return detail::optional_map_impl(*this, std::forward(f)); - } - - template - constexpr auto transform(F &&f) const && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } -#else - /// Carries out some operation on the stored object if there is one. - template - TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl( - std::declval(), std::declval())) - transform(F &&f) & { - return detail::optional_map_impl(*this, std::forward(f)); - } - - /// \group map - /// \synopsis template auto transform(F &&f) &&; - template - TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl( - std::declval(), std::declval())) - transform(F &&f) && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } - - template - constexpr decltype(detail::optional_map_impl( - std::declval(), std::declval())) - transform(F &&f) const & { - return detail::optional_map_impl(*this, std::forward(f)); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - constexpr decltype(detail::optional_map_impl( - std::declval(), std::declval())) - transform(F &&f) const && { - return detail::optional_map_impl(std::move(*this), std::forward(f)); - } -#endif -#endif - - /// Calls `f` if the optional is empty - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & { - if (has_value()) return *this; - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & { - return has_value() ? *this : std::forward(f)(); - } - - template * = nullptr> - optional or_else(F &&f) && { - if (has_value()) return std::move(*this); - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) && { - return has_value() ? std::move(*this) : std::forward(f)(); - } - - template * = nullptr> - optional or_else(F &&f) const & { - if (has_value()) return *this; - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) const & { - return has_value() ? *this : std::forward(f)(); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template * = nullptr> - optional or_else(F &&f) const && { - if (has_value()) return std::move(*this); - - std::forward(f)(); - return nullopt; - } - - template * = nullptr> - optional or_else(F &&f) const && { - return has_value() ? std::move(*this) : std::forward(f)(); - } -#endif - - /// Maps the stored value with `f` if there is one, otherwise returns `u` - template - U map_or(F &&f, U &&u) & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u); - } - - template - U map_or(F &&f, U &&u) && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u); - } - - template - U map_or(F &&f, U &&u) const & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - U map_or(F &&f, U &&u) const && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u); - } -#endif - - /// Maps the stored value with `f` if there is one, otherwise calls - /// `u` and returns the result. - template - detail::invoke_result_t map_or_else(F &&f, U &&u) & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u)(); - } - - template - detail::invoke_result_t map_or_else(F &&f, U &&u) && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u)(); - } - - template - detail::invoke_result_t map_or_else(F &&f, U &&u) const & { - return has_value() ? detail::invoke(std::forward(f), **this) - : std::forward(u)(); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - template - detail::invoke_result_t map_or_else(F &&f, U &&u) const && { - return has_value() - ? detail::invoke(std::forward(f), std::move(**this)) - : std::forward(u)(); - } -#endif - - /// Returns `u` if `*this` has a value, otherwise an empty optional. - template - constexpr optional::type> conjunction(U &&u) const { - using result = optional>; - return has_value() ? result{u} : result{nullopt}; - } - - /// Returns `rhs` if `*this` is empty, otherwise the current value. - TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) & { - return has_value() ? *this : rhs; - } - - constexpr optional disjunction(const optional &rhs) const & { - return has_value() ? *this : rhs; - } - - TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) && { - return has_value() ? std::move(*this) : rhs; - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - constexpr optional disjunction(const optional &rhs) const && { - return has_value() ? std::move(*this) : rhs; - } -#endif - - TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) & { - return has_value() ? *this : std::move(rhs); - } - - constexpr optional disjunction(optional &&rhs) const & { - return has_value() ? *this : std::move(rhs); - } - - TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) && { - return has_value() ? std::move(*this) : std::move(rhs); - } - -#ifndef TL_OPTIONAL_NO_CONSTRR - constexpr optional disjunction(optional &&rhs) const && { - return has_value() ? std::move(*this) : std::move(rhs); - } -#endif - - /// Takes the value out of the optional, leaving it empty - optional take() { - optional ret = std::move(*this); - reset(); - return ret; - } - - using value_type = T &; - - /// Constructs an optional that does not contain a value. - constexpr optional() noexcept : m_value(nullptr) {} - - constexpr optional(nullopt_t) noexcept : m_value(nullptr) {} - - /// Copy constructor - /// - /// If `rhs` contains a value, the stored value is direct-initialized with - /// it. Otherwise, the constructed optional is empty. - TL_OPTIONAL_11_CONSTEXPR optional(const optional &rhs) noexcept = default; - - /// Move constructor - /// - /// If `rhs` contains a value, the stored value is direct-initialized with - /// it. Otherwise, the constructed optional is empty. - TL_OPTIONAL_11_CONSTEXPR optional(optional &&rhs) = default; - - /// Constructs the stored value with `u`. - template >::value> * = nullptr> - constexpr optional(U &&u) noexcept : m_value(std::addressof(u)) { - static_assert(std::is_lvalue_reference::value, - "U must be an lvalue"); - } - - template - constexpr explicit optional(const optional &rhs) noexcept - : optional(*rhs) {} - - /// No-op - ~optional() = default; - - /// Assignment to empty. - /// - /// Destroys the current value if there is one. - optional &operator=(nullopt_t) noexcept { - m_value = nullptr; - return *this; - } - - /// Copy assignment. - /// - /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise - /// resets the stored value in `*this`. - optional &operator=(const optional &rhs) = default; - - /// Rebinds this optional to `u`. - template >::value> * = nullptr> - optional &operator=(U &&u) { - static_assert(std::is_lvalue_reference::value, - "U must be an lvalue"); - m_value = std::addressof(u); - return *this; - } - - /// Converting copy assignment operator. - /// - /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise - /// resets the stored value in `*this`. - template - optional &operator=(const optional &rhs) noexcept { - m_value = std::addressof(rhs.value()); - return *this; - } - - /// Rebinds this optional to `u`. - template >::value> * = nullptr> - optional &emplace(U &&u) noexcept { - return *this = std::forward(u); - } - - void swap(optional &rhs) noexcept { std::swap(m_value, rhs.m_value); } - - /// Returns a pointer to the stored value - constexpr const T *operator->() const noexcept { return m_value; } - - TL_OPTIONAL_11_CONSTEXPR T *operator->() noexcept { return m_value; } - - /// Returns the stored value - TL_OPTIONAL_11_CONSTEXPR T &operator*() noexcept { return *m_value; } - - constexpr const T &operator*() const noexcept { return *m_value; } - - constexpr bool has_value() const noexcept { return m_value != nullptr; } - - constexpr explicit operator bool() const noexcept { - return m_value != nullptr; - } - - /// Returns the contained value if there is one, otherwise throws - /// bad_optional_access - TL_OPTIONAL_11_CONSTEXPR T &value() { - if (has_value()) return *m_value; - throw bad_optional_access(); - } - TL_OPTIONAL_11_CONSTEXPR const T &value() const { - if (has_value()) return *m_value; - throw bad_optional_access(); - } - - /// Returns the stored value if there is one, otherwise returns `u` - template - constexpr T value_or(U &&u) const &noexcept { - static_assert(std::is_copy_constructible::value && - std::is_convertible::value, - "T must be copy constructible and convertible from U"); - return has_value() ? **this : static_cast(std::forward(u)); - } - - /// \group value_or - template - TL_OPTIONAL_11_CONSTEXPR T value_or(U &&u) && noexcept { - static_assert(std::is_move_constructible::value && - std::is_convertible::value, - "T must be move constructible and convertible from U"); - return has_value() ? **this : static_cast(std::forward(u)); - } - - /// Destroys the stored value if one exists, making the optional empty - void reset() noexcept { m_value = nullptr; } - - private: - T *m_value; -}; // namespace tl - - -} // namespace tl - -namespace std { -// TODO SFINAE -template -struct hash> { - ::std::size_t operator()(const tl::optional &o) const { - if (!o.has_value()) return 0; - - return std::hash>()(*o); - } -}; -} // namespace std - -#endif diff --git a/paddlespeech/audio/src/pybind/kaldi/feature_common.h b/paddlespeech/audio/src/pybind/kaldi/feature_common.h deleted file mode 100644 index 05522bb7e8a82f932a0004d7f985b88ec517f3db..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/kaldi/feature_common.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "pybind11/pybind11.h" -#include "pybind11/numpy.h" -#include "feat/feature-window.h" - -namespace paddleaudio { -namespace kaldi { - -namespace py = pybind11; - -template -class StreamingFeatureTpl { - public: - typedef typename F::Options Options; - StreamingFeatureTpl(const Options& opts); - bool ComputeFeature(const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav, - ::kaldi::Vector<::kaldi::BaseFloat>* feats); - void Reset() { remained_wav_.Resize(0); } - - int Dim() { return computer_.Dim(); } - - private: - bool Compute(const ::kaldi::Vector<::kaldi::BaseFloat>& waves, - ::kaldi::Vector<::kaldi::BaseFloat>* feats); - Options opts_; - ::kaldi::FeatureWindowFunction window_function_; - ::kaldi::Vector<::kaldi::BaseFloat> remained_wav_; - F computer_; -}; - -} // namespace kaldi -} // namespace ppspeech - -#include "feature_common_inl.h" diff --git a/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h b/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h deleted file mode 100644 index c894b97755845a46837e68a98cbaa54567a5a9dd..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" - -namespace paddleaudio { -namespace kaldi { - -template -StreamingFeatureTpl::StreamingFeatureTpl(const Options& opts) - : opts_(opts), computer_(opts), window_function_(opts.frame_opts) { - // window_function_(computer_.GetFrameOptions()) { the opt set to zero -} - -template -bool StreamingFeatureTpl::ComputeFeature( - const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav, - ::kaldi::Vector<::kaldi::BaseFloat>* feats) { - // append remaned waves - ::kaldi::int32 wav_len = wav.Dim(); - if (wav_len == 0) return false; - ::kaldi::int32 left_len = remained_wav_.Dim(); - ::kaldi::Vector<::kaldi::BaseFloat> waves(left_len + wav_len); - waves.Range(0, left_len).CopyFromVec(remained_wav_); - waves.Range(left_len, wav_len).CopyFromVec(wav); - - // cache remaned waves - ::kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions(); - ::kaldi::int32 num_frames = ::kaldi::NumFrames(waves.Dim(), frame_opts); - ::kaldi::int32 frame_shift = frame_opts.WindowShift(); - ::kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames; - remained_wav_.Resize(left_samples); - remained_wav_.CopyFromVec( - waves.Range(frame_shift * num_frames, left_samples)); - - // compute speech feature - Compute(waves, feats); - return true; -} - -// Compute feat -template -bool StreamingFeatureTpl::Compute( - const ::kaldi::Vector<::kaldi::BaseFloat>& waves, - ::kaldi::Vector<::kaldi::BaseFloat>* feats) { - ::kaldi::BaseFloat vtln_warp = 1.0; - const ::kaldi::FrameExtractionOptions& frame_opts = - computer_.GetFrameOptions(); - ::kaldi::int32 num_samples = waves.Dim(); - ::kaldi::int32 frame_length = frame_opts.WindowSize(); - ::kaldi::int32 sample_rate = frame_opts.samp_freq; - if (num_samples < frame_length) { - return false; - } - - ::kaldi::int32 num_frames = ::kaldi::NumFrames(num_samples, frame_opts); - feats->Resize(num_frames * Dim()); - - ::kaldi::Vector<::kaldi::BaseFloat> window; - bool need_raw_log_energy = computer_.NeedRawLogEnergy(); - for (::kaldi::int32 frame = 0; frame < num_frames; frame++) { - ::kaldi::BaseFloat raw_log_energy = 0.0; - ::kaldi::ExtractWindow(0, - waves, - frame, - frame_opts, - window_function_, - &window, - need_raw_log_energy ? &raw_log_energy : NULL); - - ::kaldi::Vector<::kaldi::BaseFloat> this_feature(computer_.Dim(), - ::kaldi::kUndefined); - computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature); - ::kaldi::SubVector<::kaldi::BaseFloat> output_row( - feats->Data() + frame * Dim(), Dim()); - output_row.CopyFromVec(this_feature); - } - return true; -} - -} // namespace kaldi -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc deleted file mode 100644 index 9fd8e93f9e29b0351e490622d06aaf37d7c8ce88..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h" -#include "feat/pitch-functions.h" - -namespace paddleaudio { -namespace kaldi { - -bool InitFbank( - ::kaldi::FrameExtractionOptions frame_opts, - ::kaldi::MelBanksOptions mel_opts, - FbankOptions fbank_opts) { - ::kaldi::FbankOptions opts; - opts.frame_opts = frame_opts; - opts.mel_opts = mel_opts; - opts.use_energy = fbank_opts.use_energy; - opts.energy_floor = fbank_opts.energy_floor; - opts.raw_energy = fbank_opts.raw_energy; - opts.htk_compat = fbank_opts.htk_compat; - opts.use_log_fbank = fbank_opts.use_log_fbank; - opts.use_power = fbank_opts.use_power; - paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts); - return true; -} - -py::array_t ComputeFbankStreaming(const py::array_t& wav) { - return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank( - wav); -} - -py::array_t ComputeFbank( - ::kaldi::FrameExtractionOptions frame_opts, - ::kaldi::MelBanksOptions mel_opts, - FbankOptions fbank_opts, - const py::array_t& wav) { - InitFbank(frame_opts, mel_opts, fbank_opts); - py::array_t result = ComputeFbankStreaming(wav); - paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank(); - return result; -} - -void ResetFbank() { - paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank(); -} - -py::array_t ComputeKaldiPitch( - const ::kaldi::PitchExtractionOptions& opts, - const py::array_t& wav) { - py::buffer_info info = wav.request(); - ::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size); - - ::kaldi::Matrix<::kaldi::BaseFloat> features; - ::kaldi::ComputeKaldiPitch(opts, input_wav, &features); - auto result = py::array_t({features.NumRows(), features.NumCols()}); - for (int row_idx = 0; row_idx < features.NumRows(); ++row_idx) { - std::memcpy(result.mutable_data(row_idx), features.Row(row_idx).Data(), - sizeof(float)*features.NumCols()); - } - return result; -} - -} // namespace kaldi -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h deleted file mode 100644 index bbc88825cd03788fe3aa9f086b4787c68eee8910..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h" -#include "feat/pitch-functions.h" - -namespace py = pybind11; - -namespace paddleaudio { -namespace kaldi { - -struct FbankOptions{ - bool use_energy; // append an extra dimension with energy to the filter banks - float energy_floor; - bool raw_energy; // If true, compute energy before preemphasis and windowing - bool htk_compat; // If true, put energy last (if using energy) - bool use_log_fbank; // if true (default), produce log-filterbank, else linear - bool use_power; - FbankOptions(): use_energy(false), - energy_floor(0.0), - raw_energy(true), - htk_compat(false), - use_log_fbank(true), - use_power(true) {} -}; - -bool InitFbank( - ::kaldi::FrameExtractionOptions frame_opts, - ::kaldi::MelBanksOptions mel_opts, - FbankOptions fbank_opts); - -py::array_t ComputeFbank( - ::kaldi::FrameExtractionOptions frame_opts, - ::kaldi::MelBanksOptions mel_opts, - FbankOptions fbank_opts, - const py::array_t& wav); - -py::array_t ComputeFbankStreaming(const py::array_t& wav); - -void ResetFbank(); - -py::array_t ComputeKaldiPitch( - const ::kaldi::PitchExtractionOptions& opts, - const py::array_t& wav); - -} // namespace kaldi -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc deleted file mode 100644 index 186cd92a0f6d35b6837c6811468d031609674178..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h" - -namespace paddleaudio { -namespace kaldi { - -KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() { - static KaldiFeatureWrapper instance; - return &instance; -} - -bool KaldiFeatureWrapper::InitFbank(::kaldi::FbankOptions opts) { - fbank_.reset(new Fbank(opts)); - return true; -} - -py::array_t KaldiFeatureWrapper::ComputeFbank( - const py::array_t wav) { - py::buffer_info info = wav.request(); - ::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size); - - ::kaldi::Vector<::kaldi::BaseFloat> feats; - bool flag = fbank_->ComputeFeature(input_wav, &feats); - if (flag == false || feats.Dim() == 0) return py::array_t(); - auto result = py::array_t(feats.Dim()); - py::buffer_info xs = result.request(); - std::cout << std::endl; - float* res_ptr = (float*)xs.ptr; - for (int idx = 0; idx < feats.Dim(); ++idx) { - *res_ptr = feats(idx); - res_ptr++; - } - - return result.reshape({feats.Dim() / Dim(), Dim()}); -} - -} // namesapce kaldi -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h b/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h deleted file mode 100644 index 48b12bb80b8b38ed0506e1d7c387ea2dbe265128..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "base/kaldi-common.h" -#include "feat/feature-fbank.h" - -#include "paddlespeech/audio/src/pybind/kaldi/feature_common.h" - -namespace paddleaudio { -namespace kaldi { - -typedef StreamingFeatureTpl<::kaldi::FbankComputer> Fbank; - -class KaldiFeatureWrapper { - public: - static KaldiFeatureWrapper* GetInstance(); - bool InitFbank(::kaldi::FbankOptions opts); - py::array_t ComputeFbank(const py::array_t wav); - int Dim() { return fbank_->Dim(); } - void ResetFbank() { fbank_->Reset(); } - - private: - std::unique_ptr fbank_; -}; - -} // namespace kaldi -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/pybind.cpp b/paddlespeech/audio/src/pybind/pybind.cpp deleted file mode 100644 index b265a2ab1b52f2f6e913b6e6f5bdbeb24f7215c3..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/pybind.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), All rights reserved. -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h" -#include "paddlespeech/audio/src/pybind/sox/io.h" -#include "paddlespeech/audio/src/pybind/sox/effects.h" -#include "paddlespeech/audio/third_party/kaldi/feat/feature-fbank.h" - -#include -#include - -// `tl::optional` -namespace pybind11 { namespace detail { - template - struct type_caster> : optional_caster> {}; -}} - -PYBIND11_MODULE(_paddleaudio, m) { -#ifdef INCLUDE_SOX - m.def("get_info_file", - &paddleaudio::sox_io::get_info_file, - "Get metadata of audio file."); - // support obj later - m.def("get_info_fileobj", - &paddleaudio::sox_io::get_info_fileobj, - "Get metadata of audio in file object."); - m.def("load_audio_fileobj", - &paddleaudio::sox_io::load_audio_fileobj, - "Load audio from file object."); - m.def("save_audio_fileobj", - &paddleaudio::sox_io::save_audio_fileobj, - "Save audio to file obj."); - - // sox io - m.def("sox_io_get_info", &paddleaudio::sox_io::get_info_file); - m.def( - "sox_io_load_audio_file", - &paddleaudio::sox_io::load_audio_file); - m.def( - "sox_io_save_audio_file", - &paddleaudio::sox_io::save_audio_file); - - // sox utils - m.def("sox_utils_set_seed", &paddleaudio::sox_utils::set_seed); - m.def( - "sox_utils_set_verbosity", - &paddleaudio::sox_utils::set_verbosity); - m.def( - "sox_utils_set_use_threads", - &paddleaudio::sox_utils::set_use_threads); - m.def( - "sox_utils_set_buffer_size", - &paddleaudio::sox_utils::set_buffer_size); - m.def( - "sox_utils_list_effects", - &paddleaudio::sox_utils::list_effects); - m.def( - "sox_utils_list_read_formats", - &paddleaudio::sox_utils::list_read_formats); - m.def( - "sox_utils_list_write_formats", - &paddleaudio::sox_utils::list_write_formats); - m.def( - "sox_utils_get_buffer_size", - &paddleaudio::sox_utils::get_buffer_size); - - // effect - m.def("apply_effects_fileobj", - &paddleaudio::sox_effects::apply_effects_fileobj, - "Decode audio data from file-like obj and apply effects."); - m.def("sox_effects_initialize_sox_effects", - &paddleaudio::sox_effects::initialize_sox_effects); - m.def( - "sox_effects_shutdown_sox_effects", - &paddleaudio::sox_effects::shutdown_sox_effects); - m.def( - "sox_effects_apply_effects_tensor", - &paddleaudio::sox_effects::apply_effects_tensor); - m.def( - "sox_effects_apply_effects_file", - &paddleaudio::sox_effects::apply_effects_file); -#endif - -#ifdef INCLUDE_KALDI - m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank"); - py::class_(m, "PitchExtractionOptions") - .def(py::init<>()) - .def_readwrite("samp_freq", &kaldi::PitchExtractionOptions::samp_freq) - .def_readwrite("frame_shift_ms", &kaldi::PitchExtractionOptions::frame_shift_ms) - .def_readwrite("frame_length_ms", &kaldi::PitchExtractionOptions::frame_length_ms) - .def_readwrite("preemph_coeff", &kaldi::PitchExtractionOptions::preemph_coeff) - .def_readwrite("min_f0", &kaldi::PitchExtractionOptions::min_f0) - .def_readwrite("max_f0", &kaldi::PitchExtractionOptions::max_f0) - .def_readwrite("soft_min_f0", &kaldi::PitchExtractionOptions::soft_min_f0) - .def_readwrite("penalty_factor", &kaldi::PitchExtractionOptions::penalty_factor) - .def_readwrite("lowpass_cutoff", &kaldi::PitchExtractionOptions::lowpass_cutoff) - .def_readwrite("resample_freq", &kaldi::PitchExtractionOptions::resample_freq) - .def_readwrite("delta_pitch", &kaldi::PitchExtractionOptions::delta_pitch) - .def_readwrite("nccf_ballast", &kaldi::PitchExtractionOptions::nccf_ballast) - .def_readwrite("lowpass_filter_width", &kaldi::PitchExtractionOptions::lowpass_filter_width) - .def_readwrite("upsample_filter_width", &kaldi::PitchExtractionOptions::upsample_filter_width) - .def_readwrite("max_frames_latency", &kaldi::PitchExtractionOptions::max_frames_latency) - .def_readwrite("frames_per_chunk", &kaldi::PitchExtractionOptions::frames_per_chunk) - .def_readwrite("simulate_first_pass_online", &kaldi::PitchExtractionOptions::simulate_first_pass_online) - .def_readwrite("recompute_frame", &kaldi::PitchExtractionOptions::recompute_frame) - .def_readwrite("nccf_ballast_online", &kaldi::PitchExtractionOptions::nccf_ballast_online) - .def_readwrite("snip_edges", &kaldi::PitchExtractionOptions::snip_edges); - m.def("ComputeKaldiPitch", &paddleaudio::kaldi::ComputeKaldiPitch, "compute kaldi pitch"); - py::class_(m, "FrameExtractionOptions") - .def(py::init<>()) - .def_readwrite("samp_freq", &kaldi::FrameExtractionOptions::samp_freq) - .def_readwrite("frame_shift_ms", &kaldi::FrameExtractionOptions::frame_shift_ms) - .def_readwrite("frame_length_ms", &kaldi::FrameExtractionOptions::frame_length_ms) - .def_readwrite("dither", &kaldi::FrameExtractionOptions::dither) - .def_readwrite("preemph_coeff", &kaldi::FrameExtractionOptions::preemph_coeff) - .def_readwrite("remove_dc_offset", &kaldi::FrameExtractionOptions::remove_dc_offset) - .def_readwrite("window_type", &kaldi::FrameExtractionOptions::window_type) - .def_readwrite("round_to_power_of_two", &kaldi::FrameExtractionOptions::round_to_power_of_two) - .def_readwrite("blackman_coeff", &kaldi::FrameExtractionOptions::blackman_coeff) - .def_readwrite("snip_edges", &kaldi::FrameExtractionOptions::snip_edges) - .def_readwrite("allow_downsample", &kaldi::FrameExtractionOptions::allow_downsample) - .def_readwrite("allow_upsample", &kaldi::FrameExtractionOptions::allow_upsample) - .def_readwrite("max_feature_vectors", &kaldi::FrameExtractionOptions::max_feature_vectors); - py::class_(m, "MelBanksOptions") - .def(py::init<>()) - .def_readwrite("num_bins", &kaldi::MelBanksOptions::num_bins) - .def_readwrite("low_freq", &kaldi::MelBanksOptions::low_freq) - .def_readwrite("high_freq", &kaldi::MelBanksOptions::high_freq) - .def_readwrite("vtln_low", &kaldi::MelBanksOptions::vtln_low) - .def_readwrite("vtln_high", &kaldi::MelBanksOptions::vtln_high) - .def_readwrite("debug_mel", &kaldi::MelBanksOptions::debug_mel) - .def_readwrite("htk_mode", &kaldi::MelBanksOptions::htk_mode); - - py::class_(m, "FbankOptions") - .def(py::init<>()) - .def_readwrite("use_energy", &paddleaudio::kaldi::FbankOptions::use_energy) - .def_readwrite("energy_floor", &paddleaudio::kaldi::FbankOptions::energy_floor) - .def_readwrite("raw_energy", &paddleaudio::kaldi::FbankOptions::raw_energy) - .def_readwrite("htk_compat", &paddleaudio::kaldi::FbankOptions::htk_compat) - .def_readwrite("use_log_fbank", &paddleaudio::kaldi::FbankOptions::use_log_fbank) - .def_readwrite("use_power", &paddleaudio::kaldi::FbankOptions::use_power); -#endif - -} diff --git a/paddlespeech/audio/src/pybind/sox/effects.cpp b/paddlespeech/audio/src/pybind/sox/effects.cpp deleted file mode 100644 index b69c5358a4cf0c2d000970ce44563edb0eecc447..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/effects.cpp +++ /dev/null @@ -1,257 +0,0 @@ -#include -#include - -#include "paddlespeech/audio/src/pybind/sox/effects.h" -#include "paddlespeech/audio/src/pybind/sox/effects_chain.h" -#include "paddlespeech/audio/src/pybind/sox/utils.h" - -using namespace paddleaudio::sox_utils; - -namespace paddleaudio::sox_effects { - -// Streaming decoding over file-like object is tricky because libsox operates on -// FILE pointer. The folloing is what `sox` and `play` commands do -// - file input -> FILE pointer -// - URL input -> call wget in suprocess and pipe the data -> FILE pointer -// - stdin -> FILE pointer -// -// We want to, instead, fetch byte strings chunk by chunk, consume them, and -// discard. -// -// Here is the approach -// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial -// chunk of byte string -// This will perform header-based format detection, if necessary, then fill -// the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen, -// which returns FILE* which points the buffer of the provided byte string. -// 2. Each time sox reads a chunk from the FILE*, we update the underlying -// buffer in a way that it -// starts with unseen data, and append the new data read from the given -// fileobj. This will trick libsox as if it keeps reading from the FILE* -// continuously. -// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp -auto apply_effects_fileobj( - py::object fileobj, - const std::vector>& effects, - tl::optional normalize, - tl::optional channels_first, - tl::optional format) - -> tl::optional> { - // Prepare the buffer used throughout the lifecycle of SoxEffectChain. - // - // For certain format (such as FLAC), libsox keeps reading the content at - // the initialization unless it reaches EOF even when the header is properly - // parsed. (Making buffer size 8192, which is way bigger than the header, - // resulted in libsox consuming all the buffer content at the time it opens - // the file.) Therefore buffer has to always contain valid data, except after - // EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we - // first check if there is enough data to fill the buffer. `read_fileobj` - // repeatedly calls `read` method until it receives the requested length of - // bytes or it reaches EOF. If we get bytes shorter than requested, that means - // the whole audio data are fetched. - // - // * This can be changed with `paddleaudio.utils.sox_utils.set_buffer_size`. - const auto capacity = [&]() { - // NOTE: - // Use the abstraction provided by `libpaddleaudio` to access the global - // config defined by libsox. Directly using `sox_get_globals` function will - // end up retrieving the static variable defined in `_paddleaudio`, which is - // not correct. - const auto bufsiz = get_buffer_size(); - const int64_t kDefaultCapacityInBytes = 256; - return (bufsiz > kDefaultCapacityInBytes) ? bufsiz - : kDefaultCapacityInBytes; - }(); - std::string buffer(capacity, '\0'); - auto* in_buf = const_cast(buffer.data()); - auto num_read = read_fileobj(&fileobj, capacity, in_buf); - // If the file is shorter than 256, then libsox cannot read the header. - auto in_buffer_size = (num_read > 256) ? num_read : 256; - - // Open file (this starts reading the header) - // When opening a file there are two functions that can touches FILE*. - // * `auto_detect_format` - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43 - // * `startread` handler of detected format. - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574 - // To see the handler of a particular format, go to - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/.c - // For example, voribs can be found - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158 - SoxFormat sf(sox_open_mem_read( - in_buf, - in_buffer_size, - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - - // In case of streamed data, length can be 0 - if (static_cast(sf) == nullptr || - sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - return {}; - } - - // Prepare output buffer - std::vector out_buffer; - out_buffer.reserve(sf->signal.length); - - // Create and run SoxEffectsChain - const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision); - paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain( - /*input_encoding=*/sf->encoding, - /*output_encoding=*/get_tensor_encodinginfo(dtype)); - chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj); - for (const auto& effect : effects) { - chain.addEffect(effect); - } - chain.addOutputBuffer(&out_buffer); - chain.run(); - - // Create tensor from buffer - bool channels_first_ = channels_first.value_or(true); - auto tensor = convert_to_tensor( - /*buffer=*/out_buffer.data(), - /*num_samples=*/out_buffer.size(), - /*num_channels=*/chain.getOutputNumChannels(), - dtype, - normalize.value_or(true), - channels_first_); - - return std::forward_as_tuple( - tensor, static_cast(chain.getOutputSampleRate())); -} - -namespace { - -enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown }; -SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized; -std::mutex SOX_RESOUCE_STATE_MUTEX; - -} // namespace - -void initialize_sox_effects() { - const std::lock_guard lock(SOX_RESOUCE_STATE_MUTEX); - - switch (SOX_RESOURCE_STATE) { - case NotInitialized: - if (sox_init() != SOX_SUCCESS) { - throw std::runtime_error("Failed to initialize sox effects."); - }; - SOX_RESOURCE_STATE = Initialized; - break; - case Initialized: - break; - case ShutDown: - throw std::runtime_error( - "SoX Effects has been shut down. Cannot initialize again."); - } -}; - -void shutdown_sox_effects() { - const std::lock_guard lock(SOX_RESOUCE_STATE_MUTEX); - - switch (SOX_RESOURCE_STATE) { - case NotInitialized: - throw std::runtime_error( - "SoX Effects is not initialized. Cannot shutdown."); - case Initialized: - if (sox_quit() != SOX_SUCCESS) { - throw std::runtime_error("Failed to initialize sox effects."); - }; - SOX_RESOURCE_STATE = ShutDown; - break; - case ShutDown: - break; - } -} - -auto apply_effects_tensor( - py::array waveform, - int64_t sample_rate, - const std::vector>& effects, - bool channels_first) -> std::tuple { - validate_input_tensor(waveform); - - // Create SoxEffectsChain - const auto dtype = waveform.dtype(); - paddleaudio::sox_effects_chain::SoxEffectsChain chain( - /*input_encoding=*/get_tensor_encodinginfo(dtype), - /*output_encoding=*/get_tensor_encodinginfo(dtype)); - - // Prepare output buffer - std::vector out_buffer; - out_buffer.reserve(waveform.size()); - - // Build and run effects chain - chain.addInputTensor(&waveform, sample_rate, channels_first); - for (const auto& effect : effects) { - chain.addEffect(effect); - } - chain.addOutputBuffer(&out_buffer); - chain.run(); - - // Create tensor from buffer - auto out_tensor = convert_to_tensor( - /*buffer=*/out_buffer.data(), - /*num_samples=*/out_buffer.size(), - /*num_channels=*/chain.getOutputNumChannels(), - dtype, - /*normalize=*/false, - channels_first); - - return std::tuple( - out_tensor, chain.getOutputSampleRate()); -} - -auto apply_effects_file( - const std::string& path, - const std::vector>& effects, - tl::optional normalize, - tl::optional channels_first, - const tl::optional& format) - -> tl::optional> { - // Open input file - SoxFormat sf(sox_open_read( - path.c_str(), - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - - if (static_cast(sf) == nullptr || - sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - return {}; - } - - const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision); - - // Prepare output - std::vector out_buffer; - out_buffer.reserve(sf->signal.length); - - // Create and run SoxEffectsChain - paddleaudio::sox_effects_chain::SoxEffectsChain chain( - /*input_encoding=*/sf->encoding, - /*output_encoding=*/get_tensor_encodinginfo(dtype)); - - chain.addInputFile(sf); - for (const auto& effect : effects) { - chain.addEffect(effect); - } - chain.addOutputBuffer(&out_buffer); - chain.run(); - - // Create tensor from buffer - bool channels_first_ = channels_first.value_or(true); - auto tensor = convert_to_tensor( - /*buffer=*/out_buffer.data(), - /*num_samples=*/out_buffer.size(), - /*num_channels=*/chain.getOutputNumChannels(), - dtype, - normalize.value_or(true), - channels_first_); - - return std::tuple( - tensor, chain.getOutputSampleRate()); -} - -} // namespace paddleaudio::sox_effects diff --git a/paddlespeech/audio/src/pybind/sox/effects.h b/paddlespeech/audio/src/pybind/sox/effects.h deleted file mode 100644 index 6ba53d008715ed6dd937d50475bf2a978b734d89..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/effects.h +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include - -#include "paddlespeech/audio/src/optional/optional.hpp" - -namespace py = pybind11; - -namespace paddleaudio::sox_effects { - -auto apply_effects_fileobj( - py::object fileobj, - const std::vector>& effects, - tl::optional normalize, - tl::optional channels_first, - tl::optional format) - -> tl::optional>; - -void initialize_sox_effects(); - -void shutdown_sox_effects(); - -auto apply_effects_tensor( - py::array waveform, - int64_t sample_rate, - const std::vector>& effects, - bool channels_first) -> std::tuple; - -auto apply_effects_file( - const std::string& path, - const std::vector>& effects, - tl::optional normalize, - tl::optional channels_first, - const tl::optional& format) - -> tl::optional>; - -} // namespace paddleaudio::sox_effects diff --git a/paddlespeech/audio/src/pybind/sox/effects_chain.cpp b/paddlespeech/audio/src/pybind/sox/effects_chain.cpp deleted file mode 100644 index 5e8f6ee71fac3daf10871dfdbdf6b2920e33cff7..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/effects_chain.cpp +++ /dev/null @@ -1,595 +0,0 @@ -#include -#include -#include -#include "paddlespeech/audio/src/pybind/sox/effects_chain.h" -#include "paddlespeech/audio/src/pybind/sox/utils.h" - -using namespace paddleaudio::sox_utils; - -namespace paddleaudio::sox_effects_chain { - -namespace { - -/// helper classes for passing the location of input tensor and output buffer -/// -/// drain/flow callback functions require plaing C style function signature and -/// the way to pass extra data is to attach data to sox_effect_t::priv pointer. -/// The following structs will be assigned to sox_effect_t::priv pointer which -/// gives sox_effect_t an access to input Tensor and output buffer object. -struct TensorInputPriv { - size_t index; - py::array* waveform; - int64_t sample_rate; - bool channels_first; -}; - -struct TensorOutputPriv { - std::vector* buffer; -}; -struct FileOutputPriv { - sox_format_t* sf; -}; - -/// Callback function to feed Tensor data to SoxEffectChain. -int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) { - // Retrieve the input Tensor and current index - auto priv = static_cast(effp->priv); - auto index = priv->index; - auto tensor = *(priv->waveform); - auto num_channels = effp->out_signal.channels; - - // Adjust the number of samples to read - const size_t num_samples = tensor.size(); - if (index + *osamp > num_samples) { - *osamp = num_samples - index; - } - - // Ensure that it's a multiple of the number of channels - *osamp -= *osamp % num_channels; - - // Slice the input Tensor - // refacor this module, chunk - auto i_frame = index / num_channels; - auto num_frames = *osamp / num_channels; - - std::vector chunk(num_frames*num_channels); - py::buffer_info ori_info = tensor.request(); - void* ptr = ori_info.ptr; - // Convert to sox_sample_t (int32_t) - switch (tensor.dtype().num()) { - //case c10::ScalarType::Float: { - case 11: { - // Need to convert to 64-bit precision so that - // values around INT32_MIN/MAX are handled correctly. - for (int idx = 0; idx < chunk.size(); ++idx) { - int frame_idx = (idx + index) / num_channels; - int channels_idx = (idx + index) % num_channels; - double elem = 0; - if (priv->channels_first) { - elem = *(float*)tensor.data(channels_idx, frame_idx); - } else { - elem = *(float*)tensor.data(frame_idx, channels_idx); - } - elem = elem * 2147483648.; - // *new_ptr = std::clamp(elem, INT32_MIN, INT32_MAX); - if (elem > INT32_MAX) { - chunk[idx] = INT32_MAX; - } else if (elem < INT32_MIN) { - chunk[idx] = INT32_MIN; - } else { - chunk[idx] = elem; - } - } - break; - } - //case c10::ScalarType::Int: { - case 5: { - for (int idx = 0; idx < chunk.size(); ++idx) { - int frame_idx = (idx + index) / num_channels; - int channels_idx = (idx + index) % num_channels; - int elem = 0; - if (priv->channels_first) { - elem = *(int*)tensor.data(channels_idx, frame_idx); - } else { - elem = *(int*)tensor.data(frame_idx, channels_idx); - } - chunk[idx] = elem; - } - break; - } - // case short - case 3: { - for (int idx = 0; idx < chunk.size(); ++idx) { - int frame_idx = (idx + index) / num_channels; - int channels_idx = (idx + index) % num_channels; - int16_t elem = 0; - if (priv->channels_first) { - elem = *(int16_t*)tensor.data(channels_idx, frame_idx); - } else { - elem = *(int16_t*)tensor.data(frame_idx, channels_idx); - } - chunk[idx] = elem * 65536; - } - break; - } - // case byte - case 1: { - for (int idx = 0; idx < chunk.size(); ++idx) { - int frame_idx = (idx + index) / num_channels; - int channels_idx = (idx + index) % num_channels; - int8_t elem = 0; - if (priv->channels_first) { - elem = *(int8_t*)tensor.data(channels_idx, frame_idx); - } else { - elem = *(int8_t*)tensor.data(frame_idx, channels_idx); - } - chunk[idx] = (elem - 128) * 16777216; - } - break; - } - default: - throw std::runtime_error("Unexpected dtype."); - } - // Write to buffer - memcpy(obuf, chunk.data(), *osamp * 4); - priv->index += *osamp; - return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS; -} - -/// Callback function to fetch data from SoxEffectChain. -int tensor_output_flow( - sox_effect_t* effp, - sox_sample_t const* ibuf, - sox_sample_t* obuf LSX_UNUSED, - size_t* isamp, - size_t* osamp) { - *osamp = 0; - // Get output buffer - auto out_buffer = static_cast(effp->priv)->buffer; - // Append at the end - out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp); - return SOX_SUCCESS; -} - -int file_output_flow( - sox_effect_t* effp, - sox_sample_t const* ibuf, - sox_sample_t* obuf LSX_UNUSED, - size_t* isamp, - size_t* osamp) { - *osamp = 0; - if (*isamp) { - auto sf = static_cast(effp->priv)->sf; - if (sox_write(sf, ibuf, *isamp) != *isamp) { - if (sf->sox_errno) { - std::ostringstream stream; - stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " " - << sf->filename; - throw std::runtime_error(stream.str()); - } - return SOX_EOF; - } - } - return SOX_SUCCESS; -} - -sox_effect_handler_t* get_tensor_input_handler() { - static sox_effect_handler_t handler{ - /*name=*/"input_tensor", - /*usage=*/NULL, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/NULL, - /*start=*/NULL, - /*flow=*/NULL, - /*drain=*/tensor_input_drain, - /*stop=*/NULL, - /*kill=*/NULL, - /*priv_size=*/sizeof(TensorInputPriv)}; - return &handler; -} - -sox_effect_handler_t* get_tensor_output_handler() { - static sox_effect_handler_t handler{ - /*name=*/"output_tensor", - /*usage=*/NULL, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/NULL, - /*start=*/NULL, - /*flow=*/tensor_output_flow, - /*drain=*/NULL, - /*stop=*/NULL, - /*kill=*/NULL, - /*priv_size=*/sizeof(TensorOutputPriv)}; - return &handler; -} - -sox_effect_handler_t* get_file_output_handler() { - static sox_effect_handler_t handler{ - /*name=*/"output_file", - /*usage=*/NULL, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/NULL, - /*start=*/NULL, - /*flow=*/file_output_flow, - /*drain=*/NULL, - /*stop=*/NULL, - /*kill=*/NULL, - /*priv_size=*/sizeof(FileOutputPriv)}; - return &handler; -} - -} // namespace - -SoxEffect::SoxEffect(sox_effect_t* se) noexcept : se_(se) {} - -SoxEffect::~SoxEffect() { - if (se_ != nullptr) { - free(se_); - } -} - -SoxEffect::operator sox_effect_t*() const { - return se_; -} - -auto SoxEffect::operator->() noexcept -> sox_effect_t* { - return se_; -} - -SoxEffectsChain::SoxEffectsChain( - sox_encodinginfo_t input_encoding, - sox_encodinginfo_t output_encoding) - : in_enc_(input_encoding), - out_enc_(output_encoding), - in_sig_(), - interm_sig_(), - out_sig_(), - sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) { - if (!sec_) { - throw std::runtime_error("Failed to create effect chain."); - } -} - -SoxEffectsChain::~SoxEffectsChain() { - if (sec_ != nullptr) { - sox_delete_effects_chain(sec_); - } -} - -void SoxEffectsChain::run() { - sox_flow_effects(sec_, NULL, NULL); -} - -void SoxEffectsChain::addInputTensor( - py::array* waveform, - int64_t sample_rate, - bool channels_first) { - in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first); - interm_sig_ = in_sig_; - SoxEffect e(sox_create_effect(get_tensor_input_handler())); - auto priv = static_cast(e->priv); - priv->index = 0; - priv->waveform = waveform; - priv->sample_rate = sample_rate; - priv->channels_first = channels_first; - if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) { - throw std::runtime_error( - "Internal Error: Failed to add effect: input_tensor"); - } -} - -void SoxEffectsChain::addOutputBuffer( - std::vector* output_buffer) { - SoxEffect e(sox_create_effect(get_tensor_output_handler())); - static_cast(e->priv)->buffer = output_buffer; - if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) { - throw std::runtime_error( - "Internal Error: Failed to add effect: output_tensor"); - } -} - -void SoxEffectsChain::addInputFile(sox_format_t* sf) { - in_sig_ = sf->signal; - interm_sig_ = in_sig_; - SoxEffect e(sox_create_effect(sox_find_effect("input"))); - char* opts[] = {(char*)sf}; - sox_effect_options(e, 1, opts); - if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) { - std::ostringstream stream; - stream << "Internal Error: Failed to add effect: input " << sf->filename; - throw std::runtime_error(stream.str()); - } -} - -void SoxEffectsChain::addOutputFile(sox_format_t* sf) { - out_sig_ = sf->signal; - SoxEffect e(sox_create_effect(get_file_output_handler())); - static_cast(e->priv)->sf = sf; - if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) { - std::ostringstream stream; - stream << "Internal Error: Failed to add effect: output " << sf->filename; - throw std::runtime_error(stream.str()); - } -} - -void SoxEffectsChain::addEffect(const std::vector effect) { - const auto num_args = effect.size(); - if (num_args == 0) { - throw std::runtime_error("Invalid argument: empty effect."); - } - const auto name = effect[0]; - if (UNSUPPORTED_EFFECTS.find(name) != UNSUPPORTED_EFFECTS.end()) { - std::ostringstream stream; - stream << "Unsupported effect: " << name; - throw std::runtime_error(stream.str()); - } - - auto returned_effect = sox_find_effect(name.c_str()); - if (!returned_effect) { - std::ostringstream stream; - stream << "Unsupported effect: " << name; - throw std::runtime_error(stream.str()); - } - SoxEffect e(sox_create_effect(returned_effect)); - const auto num_options = num_args - 1; - - std::vector opts; - for (size_t i = 1; i < num_args; ++i) { - opts.push_back((char*)effect[i].c_str()); - } - if (sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) != - SOX_SUCCESS) { - std::ostringstream stream; - stream << "Invalid effect option:"; - for (const auto& v : effect) { - stream << " " << v; - } - throw std::runtime_error(stream.str()); - } - - if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) { - std::ostringstream stream; - stream << "Internal Error: Failed to add effect: \"" << name; - for (size_t i = 1; i < num_args; ++i) { - stream << " " << effect[i]; - } - stream << "\""; - throw std::runtime_error(stream.str()); - } -} - -int64_t SoxEffectsChain::getOutputNumChannels() { - return interm_sig_.channels; -} - -int64_t SoxEffectsChain::getOutputSampleRate() { - return interm_sig_.rate; -} - -namespace { - -/// helper classes for passing file-like object to SoxEffectChain -struct FileObjInputPriv { - sox_format_t* sf; - py::object* fileobj; - bool eof_reached; - char* buffer; - uint64_t buffer_size; -}; - -struct FileObjOutputPriv { - sox_format_t* sf; - py::object* fileobj; - char** buffer; - size_t* buffer_size; -}; - -/// Callback function to feed byte string -/// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/sox.h#L1268-L1278 -auto fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) - -> int { - auto priv = static_cast(effp->priv); - auto sf = priv->sf; - auto buffer = priv->buffer; - - // 1. Refresh the buffer - // - // NOTE: - // Since the underlying FILE* was opened with `fmemopen`, the only way - // libsox detect EOF is reaching the end of the buffer. (null byte won't - // help) Therefore we need to align the content at the end of buffer, - // otherwise, libsox will keep reading the content beyond intended length. - // - // Before: - // - // |<-------consumed------>|<---remaining--->| - // |***********************|-----------------| - // ^ ftell - // - // After: - // - // |<-offset->|<---remaining--->|<-new data->| - // |**********|-----------------|++++++++++++| - // ^ ftell - - // NOTE: - // Do not use `sf->tell_off` here. Presumably, `tell_off` and `fseek` are - // supposed to be in sync, but there are cases (Vorbis) they are not - // in sync and `tell_off` has seemingly uninitialized value, which - // leads num_remain to be negative and cause segmentation fault - // in `memmove`. - const auto tell = ftell((FILE*)sf->fp); - if (tell < 0) { - throw std::runtime_error("Internal Error: ftell failed."); - } - const auto num_consumed = static_cast(tell); - if (num_consumed > priv->buffer_size) { - throw std::runtime_error("Internal Error: buffer overrun."); - } - - const auto num_remain = priv->buffer_size - num_consumed; - - // 1.1. Fetch the data to see if there is data to fill the buffer - size_t num_refill = 0; - std::string chunk(num_consumed, '\0'); - if (num_consumed && !priv->eof_reached) { - num_refill = read_fileobj( - priv->fileobj, num_consumed, const_cast(chunk.data())); - if (num_refill < num_consumed) { - priv->eof_reached = true; - } - } - const auto offset = num_consumed - num_refill; - - // 1.2. Move the unconsumed data towards the beginning of buffer. - if (num_remain) { - auto src = static_cast(buffer + num_consumed); - auto dst = static_cast(buffer + offset); - memmove(dst, src, num_remain); - } - - // 1.3. Refill the remaining buffer. - if (num_refill) { - auto src = static_cast(const_cast(chunk.c_str())); - auto dst = buffer + offset + num_remain; - memcpy(dst, src, num_refill); - } - - // 1.4. Set the file pointer to the new offset - sf->tell_off = offset; - fseek((FILE*)sf->fp, offset, SEEK_SET); - - // 2. Perform decoding operation - // The following part is practically same as "input" effect - // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/input.c#L30-L48 - - // At this point, osamp represents the buffer size in bytes, - // but sox_read expects the maximum number of samples ready to read. - // Normally, this is fine, but in case when the samples are not 4-byte - // aligned, (e.g. sample is 24bits), the resulting signal is not correct. - // https://github.com/pytorch/audio/issues/2083 - if (sf->encoding.bits_per_sample > 0) - *osamp /= (sf->encoding.bits_per_sample / 8); - - // Ensure that it's a multiple of the number of channels - *osamp -= *osamp % effp->out_signal.channels; - - // Read up to *osamp samples into obuf; - // store the actual number read back to *osamp - *osamp = sox_read(sf, obuf, *osamp); - - // Decoding is finished when fileobject is exhausted and sox can no longer - // decode a sample. - return (priv->eof_reached && !*osamp) ? SOX_EOF : SOX_SUCCESS; -} - -auto fileobj_output_flow( - sox_effect_t* effp, - sox_sample_t const* ibuf, - sox_sample_t* obuf LSX_UNUSED, - size_t* isamp, - size_t* osamp) -> int { - *osamp = 0; - if (*isamp) { - auto priv = static_cast(effp->priv); - auto sf = priv->sf; - auto fp = static_cast(sf->fp); - auto fileobj = priv->fileobj; - auto buffer = priv->buffer; - - // Encode chunk - auto num_samples_written = sox_write(sf, ibuf, *isamp); - fflush(fp); - - // Copy the encoded chunk to python object. - fileobj->attr("write")(py::bytes(*buffer, ftell(fp))); - - // Reset FILE* - sf->tell_off = 0; - fseek(fp, 0, SEEK_SET); - - if (num_samples_written != *isamp) { - if (sf->sox_errno) { - std::ostringstream stream; - stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " " - << sf->filename; - throw std::runtime_error(stream.str()); - } - return SOX_EOF; - } - } - return SOX_SUCCESS; -} - -auto get_fileobj_input_handler() -> sox_effect_handler_t* { - static sox_effect_handler_t handler{ - /*name=*/"input_fileobj_object", - /*usage=*/nullptr, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/nullptr, - /*start=*/nullptr, - /*flow=*/nullptr, - /*drain=*/fileobj_input_drain, - /*stop=*/nullptr, - /*kill=*/nullptr, - /*priv_size=*/sizeof(FileObjInputPriv)}; - return &handler; -} - -auto get_fileobj_output_handler() -> sox_effect_handler_t* { - static sox_effect_handler_t handler{ - /*name=*/"output_fileobj_object", - /*usage=*/nullptr, - /*flags=*/SOX_EFF_MCHAN, - /*getopts=*/nullptr, - /*start=*/nullptr, - /*flow=*/fileobj_output_flow, - /*drain=*/nullptr, - /*stop=*/nullptr, - /*kill=*/nullptr, - /*priv_size=*/sizeof(FileObjOutputPriv)}; - return &handler; -} - -} // namespace - -void SoxEffectsChainPyBind::addInputFileObj( - sox_format_t* sf, - char* buffer, - uint64_t buffer_size, - py::object* fileobj) { - in_sig_ = sf->signal; - interm_sig_ = in_sig_; - - SoxEffect e(sox_create_effect(get_fileobj_input_handler())); - auto priv = static_cast(e->priv); - priv->sf = sf; - priv->fileobj = fileobj; - priv->eof_reached = false; - priv->buffer = buffer; - priv->buffer_size = buffer_size; - if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) { - throw std::runtime_error( - "Internal Error: Failed to add effect: input fileobj"); - } -} - -void SoxEffectsChainPyBind::addOutputFileObj( - sox_format_t* sf, - char** buffer, - size_t* buffer_size, - py::object* fileobj) { - out_sig_ = sf->signal; - SoxEffect e(sox_create_effect(get_fileobj_output_handler())); - auto priv = static_cast(e->priv); - priv->sf = sf; - priv->fileobj = fileobj; - priv->buffer = buffer; - priv->buffer_size = buffer_size; - if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) { - throw std::runtime_error( - "Internal Error: Failed to add effect: output fileobj"); - } -} - -} // namespace paddleaudio::sox_effects_chain diff --git a/paddlespeech/audio/src/pybind/sox/effects_chain.h b/paddlespeech/audio/src/pybind/sox/effects_chain.h deleted file mode 100644 index 6fb994b5af6a9f519fa9e10811ba3fb6a2fac47b..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/effects_chain.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include -#include "paddlespeech/audio/src/pybind/sox/utils.h" - -namespace paddleaudio::sox_effects_chain { - -// Helper struct to safely close sox_effect_t* pointer returned by -// sox_create_effect - -struct SoxEffect { - explicit SoxEffect(sox_effect_t* se) noexcept; - SoxEffect(const SoxEffect& other) = delete; - SoxEffect(const SoxEffect&& other) = delete; - auto operator=(const SoxEffect& other) -> SoxEffect& = delete; - auto operator=(SoxEffect&& other) -> SoxEffect& = delete; - ~SoxEffect(); - operator sox_effect_t*() const; - auto operator->() noexcept -> sox_effect_t*; - - private: - sox_effect_t* se_; -}; - -// Helper struct to safely close sox_effects_chain_t with handy methods -class SoxEffectsChain { - const sox_encodinginfo_t in_enc_; - const sox_encodinginfo_t out_enc_; - - protected: - sox_signalinfo_t in_sig_; - sox_signalinfo_t interm_sig_; - sox_signalinfo_t out_sig_; - sox_effects_chain_t* sec_; - - public: - explicit SoxEffectsChain( - sox_encodinginfo_t input_encoding, - sox_encodinginfo_t output_encoding); - SoxEffectsChain(const SoxEffectsChain& other) = delete; - SoxEffectsChain(const SoxEffectsChain&& other) = delete; - SoxEffectsChain& operator=(const SoxEffectsChain& other) = delete; - SoxEffectsChain& operator=(SoxEffectsChain&& other) = delete; - ~SoxEffectsChain(); - void run(); - void addInputTensor( - py::array* waveform, - int64_t sample_rate, - bool channels_first); - void addInputFile(sox_format_t* sf); - void addOutputBuffer(std::vector* output_buffer); - void addOutputFile(sox_format_t* sf); - void addEffect(const std::vector effect); - int64_t getOutputNumChannels(); - int64_t getOutputSampleRate(); -}; - -class SoxEffectsChainPyBind : public SoxEffectsChain { - using SoxEffectsChain::SoxEffectsChain; - - public: - void addInputFileObj( - sox_format_t* sf, - char* buffer, - uint64_t buffer_size, - py::object* fileobj); - - void addOutputFileObj( - sox_format_t* sf, - char** buffer, - size_t* buffer_size, - py::object* fileobj); -}; - -} // namespace paddleaudio::sox_effects_chain - diff --git a/paddlespeech/audio/src/pybind/sox/io.cpp b/paddlespeech/audio/src/pybind/sox/io.cpp deleted file mode 100644 index 60f9222abc666f17b291a1fa1ada4507ac5d484e..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/io.cpp +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), -// All rights reserved. - -#include "paddlespeech/audio/src/pybind/sox/io.h" -#include "paddlespeech/audio/src/pybind/sox/effects.h" -#include "paddlespeech/audio/src/pybind/sox/types.h" -#include "paddlespeech/audio/src/pybind/sox/effects_chain.h" -#include "paddlespeech/audio/src/pybind/sox/utils.h" -#include "paddlespeech/audio/src/optional/optional.hpp" - -using namespace paddleaudio::sox_utils; - -namespace paddleaudio { -namespace sox_io { - -auto get_info_file(const std::string &path, - const tl::optional &format) - -> std::tuple { - SoxFormat sf( - sox_open_read(path.data(), - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - - - validate_input_file(sf, path); - - return std::make_tuple( - static_cast(sf->signal.rate), - static_cast(sf->signal.length / sf->signal.channels), - static_cast(sf->signal.channels), - static_cast(sf->encoding.bits_per_sample), - get_encoding(sf->encoding.encoding)); -} - -std::vector> get_effects( - const tl::optional& frame_offset, - const tl::optional& num_frames) { - const auto offset = frame_offset.value_or(0); - if (offset < 0) { - throw std::runtime_error( - "Invalid argument: frame_offset must be non-negative."); - } - const auto frames = num_frames.value_or(-1); - if (frames == 0 || frames < -1) { - throw std::runtime_error( - "Invalid argument: num_frames must be -1 or greater than 0."); - } - - std::vector> effects; - if (frames != -1) { - std::ostringstream os_offset, os_frames; - os_offset << offset << "s"; - os_frames << "+" << frames << "s"; - effects.emplace_back( - std::vector{"trim", os_offset.str(), os_frames.str()}); - } else if (offset != 0) { - std::ostringstream os_offset; - os_offset << offset << "s"; - effects.emplace_back(std::vector{"trim", os_offset.str()}); - } - return effects; -} - -auto get_info_fileobj(py::object fileobj, - const tl::optional &format) - -> std::tuple { - const auto capacity = [&]() { - const auto bufsiz = get_buffer_size(); - const int64_t kDefaultCapacityInBytes = 4096; - return (bufsiz > kDefaultCapacityInBytes) ? bufsiz - : kDefaultCapacityInBytes; - }(); - std::string buffer(capacity, '\0'); - auto *buf = const_cast(buffer.data()); - auto num_read = read_fileobj(&fileobj, capacity, buf); - // If the file is shorter than 256, then libsox cannot read the header. - auto buf_size = (num_read > 256) ? num_read : 256; - - SoxFormat sf(sox_open_mem_read( - buf, - buf_size, - /*signal=*/nullptr, - /*encoding=*/nullptr, - /*filetype=*/format.has_value() ? format.value().c_str() : nullptr)); - - // In case of streamed data, length can be 0 - validate_input_memfile(sf); - - return std::make_tuple( - static_cast(sf->signal.rate), - static_cast(sf->signal.length / sf->signal.channels), - static_cast(sf->signal.channels), - static_cast(sf->encoding.bits_per_sample), - get_encoding(sf->encoding.encoding)); -} - -tl::optional> load_audio_fileobj( - py::object fileobj, - const tl::optional& frame_offset, - const tl::optional& num_frames, - tl::optional normalize, - tl::optional channels_first, - const tl::optional& format) { - auto effects = get_effects(frame_offset, num_frames); - return paddleaudio::sox_effects::apply_effects_fileobj( - std::move(fileobj), effects, normalize, channels_first, std::move(format)); -} - -tl::optional> load_audio_file( - const std::string& path, - const tl::optional& frame_offset, - const tl::optional& num_frames, - tl::optional normalize, - tl::optional channels_first, - const tl::optional& format) { - auto effects = get_effects(frame_offset, num_frames); - return paddleaudio::sox_effects::apply_effects_file( - path, effects, normalize, channels_first, format); -} - -void save_audio_file(const std::string& path, - py::array tensor, - int64_t sample_rate, - bool channels_first, - tl::optional compression, - tl::optional format, - tl::optional encoding, - tl::optional bits_per_sample) { - validate_input_tensor(tensor); - - const auto filetype = [&]() { - if (format.has_value()) return format.value(); - return get_filetype(path); - }(); - - if (filetype == "amr-nb") { - const auto num_channels = tensor.shape(channels_first ? 0 : 1); - //TORCH_CHECK(num_channels == 1, - // "amr-nb format only supports single channel audio."); - assert(num_channels == 1); - } else if (filetype == "htk") { - const auto num_channels = tensor.shape(channels_first ? 0 : 1); - // TORCH_CHECK(num_channels == 1, - // "htk format only supports single channel audio."); - assert(num_channels == 1); - } else if (filetype == "gsm") { - const auto num_channels = tensor.shape(channels_first ? 0 : 1); - assert(num_channels == 1); - assert(sample_rate == 8000); - //TORCH_CHECK(num_channels == 1, - // "gsm format only supports single channel audio."); - //TORCH_CHECK(sample_rate == 8000, - // "gsm format only supports a sampling rate of 8kHz."); - } - const auto signal_info = - get_signalinfo(&tensor, sample_rate, filetype, channels_first); - const auto encoding_info = get_encodinginfo_for_save( - filetype, tensor.dtype(), compression, encoding, bits_per_sample); - - SoxFormat sf(sox_open_write(path.c_str(), - &signal_info, - &encoding_info, - /*filetype=*/filetype.c_str(), - /*oob=*/nullptr, - /*overwrite_permitted=*/nullptr)); - - if (static_cast(sf) == nullptr) { - throw std::runtime_error( - "Error saving audio file: failed to open file " + path); - } - - paddleaudio::sox_effects_chain::SoxEffectsChain chain( - /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()), - /*output_encoding=*/sf->encoding); - chain.addInputTensor(&tensor, sample_rate, channels_first); - chain.addOutputFile(sf); - chain.run(); -} - -namespace { -// helper class to automatically release buffer, to be used by -// save_audio_fileobj -struct AutoReleaseBuffer { - char* ptr; - size_t size; - - AutoReleaseBuffer() : ptr(nullptr), size(0) {} - AutoReleaseBuffer(const AutoReleaseBuffer& other) = delete; - AutoReleaseBuffer(AutoReleaseBuffer&& other) = delete; - auto operator=(const AutoReleaseBuffer& other) -> AutoReleaseBuffer& = delete; - auto operator=(AutoReleaseBuffer&& other) -> AutoReleaseBuffer& = delete; - ~AutoReleaseBuffer() { - if (ptr) { - free(ptr); - } - } -}; - -} // namespace - -void save_audio_fileobj( - py::object fileobj, - py::array tensor, - int64_t sample_rate, - bool channels_first, - tl::optional compression, - tl::optional format, - tl::optional encoding, - tl::optional bits_per_sample) { - - if (!format.has_value()) { - throw std::runtime_error( - "`format` is required when saving to file object."); - } - const auto filetype = format.value(); - - if (filetype == "amr-nb") { - const auto num_channels = tensor.shape(channels_first ? 0 : 1); - if (num_channels != 1) { - throw std::runtime_error( - "amr-nb format only supports single channel audio."); - } - } else if (filetype == "htk") { - const auto num_channels = tensor.shape(channels_first ? 0 : 1); - if (num_channels != 1) { - throw std::runtime_error( - "htk format only supports single channel audio."); - } - } else if (filetype == "gsm") { - const auto num_channels = tensor.shape(channels_first ? 0 : 1); - if (num_channels != 1) { - throw std::runtime_error( - "gsm format only supports single channel audio."); - } - if (sample_rate != 8000) { - throw std::runtime_error( - "gsm format only supports a sampling rate of 8kHz."); - } - } - - const auto signal_info = - get_signalinfo(&tensor, sample_rate, filetype, channels_first); - const auto encoding_info = get_encodinginfo_for_save( - filetype, - tensor.dtype(), - compression, - std::move(encoding), - bits_per_sample); - - AutoReleaseBuffer buffer; - - SoxFormat sf(sox_open_memstream_write( - &buffer.ptr, - &buffer.size, - &signal_info, - &encoding_info, - filetype.c_str(), - /*oob=*/nullptr)); - - if (static_cast(sf) == nullptr) { - throw std::runtime_error( - "Error saving audio file: failed to open memory stream."); - } - - paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain( - /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()), - /*output_encoding=*/sf->encoding); - chain.addInputTensor(&tensor, sample_rate, channels_first); - chain.addOutputFileObj(sf, &buffer.ptr, &buffer.size, &fileobj); - chain.run(); - - // Closing the sox_format_t is necessary for flushing the last chunk to the - // buffer - sf.close(); - fileobj.attr("write")(py::bytes(buffer.ptr, buffer.size)); -} - -} // namespace paddleaudio -} // namespace sox_io diff --git a/paddlespeech/audio/src/pybind/sox/io.h b/paddlespeech/audio/src/pybind/sox/io.h deleted file mode 100644 index 3734bcb34d34f2b23e26a374aef646ecb715276f..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/io.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), -// All rights reserved. - -#pragma once - -#include "paddlespeech/audio/src/pybind/sox/utils.h" - -namespace py = pybind11; - -namespace paddleaudio { -namespace sox_io { - -auto get_info_file(const std::string &path, - const tl::optional &format) - -> std::tuple; - -auto get_info_fileobj(py::object fileobj, - const tl::optional &format) - -> std::tuple; - -tl::optional> load_audio_fileobj( - py::object fileobj, - const tl::optional& frame_offset, - const tl::optional& num_frames, - tl::optional normalize, - tl::optional channels_first, - const tl::optional& format); - -void save_audio_fileobj( - py::object fileobj, - py::array tensor, - int64_t sample_rate, - bool channels_first, - tl::optional compression, - tl::optional format, - tl::optional encoding, - tl::optional bits_per_sample); - -auto get_effects(const tl::optional& frame_offset, - const tl::optional& num_frames) - -> std::vector>; - - -tl::optional> load_audio_file( - const std::string& path, - const tl::optional& frame_offset, - const tl::optional& num_frames, - tl::optional normalize, - tl::optional channels_first, - const tl::optional& format); - -void save_audio_file(const std::string& path, - py::array tensor, - int64_t sample_rate, - bool channels_first, - tl::optional compression, - tl::optional format, - tl::optional encoding, - tl::optional bits_per_sample); - - -} // namespace paddleaudio -} // namespace sox_io diff --git a/paddlespeech/audio/src/pybind/sox/types.cpp b/paddlespeech/audio/src/pybind/sox/types.cpp deleted file mode 100644 index 8e3e61373e0cc0238c7b3a722ce995daea7cfd25..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/types.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.cpp - -#include "paddlespeech/audio/src/pybind/sox/types.h" -#include -#include - -namespace paddleaudio { -namespace sox_utils { - -Format get_format_from_string(const std::string& format) { - if (format == "wav") - return Format::WAV; - if (format == "mp3") - return Format::MP3; - if (format == "flac") - return Format::FLAC; - if (format == "ogg" || format == "vorbis") - return Format::VORBIS; - if (format == "amr-nb") - return Format::AMR_NB; - if (format == "amr-wb") - return Format::AMR_WB; - if (format == "amb") - return Format::AMB; - if (format == "sph") - return Format::SPHERE; - if (format == "htk") - return Format::HTK; - if (format == "gsm") - return Format::GSM; - std::ostringstream stream; - stream << "Internal Error: unexpected format value: " << format; - throw std::runtime_error(stream.str()); -} - -std::string to_string(Encoding v) { - switch (v) { - case Encoding::UNKNOWN: - return "UNKNOWN"; - case Encoding::PCM_SIGNED: - return "PCM_S"; - case Encoding::PCM_UNSIGNED: - return "PCM_U"; - case Encoding::PCM_FLOAT: - return "PCM_F"; - case Encoding::FLAC: - return "FLAC"; - case Encoding::ULAW: - return "ULAW"; - case Encoding::ALAW: - return "ALAW"; - case Encoding::MP3: - return "MP3"; - case Encoding::VORBIS: - return "VORBIS"; - case Encoding::AMR_WB: - return "AMR_WB"; - case Encoding::AMR_NB: - return "AMR_NB"; - case Encoding::OPUS: - return "OPUS"; - default: - throw std::runtime_error("Internal Error: unexpected encoding."); - } -} - -Encoding get_encoding_from_option(const tl::optional encoding) { - if (!encoding.has_value()) - return Encoding::NOT_PROVIDED; - std::string v = encoding.value(); - if (v == "PCM_S") - return Encoding::PCM_SIGNED; - if (v == "PCM_U") - return Encoding::PCM_UNSIGNED; - if (v == "PCM_F") - return Encoding::PCM_FLOAT; - if (v == "ULAW") - return Encoding::ULAW; - if (v == "ALAW") - return Encoding::ALAW; - std::ostringstream stream; - stream << "Internal Error: unexpected encoding value: " << v; - throw std::runtime_error(stream.str()); -} - -BitDepth get_bit_depth_from_option(const tl::optional bit_depth) { - if (!bit_depth.has_value()) - return BitDepth::NOT_PROVIDED; - int64_t v = bit_depth.value(); - switch (v) { - case 8: - return BitDepth::B8; - case 16: - return BitDepth::B16; - case 24: - return BitDepth::B24; - case 32: - return BitDepth::B32; - case 64: - return BitDepth::B64; - default: { - std::ostringstream s; - s << "Internal Error: unexpected bit depth value: " << v; - throw std::runtime_error(s.str()); - } - } -} - -std::string get_encoding(sox_encoding_t encoding) { - switch (encoding) { - case SOX_ENCODING_UNKNOWN: - return "UNKNOWN"; - case SOX_ENCODING_SIGN2: - return "PCM_S"; - case SOX_ENCODING_UNSIGNED: - return "PCM_U"; - case SOX_ENCODING_FLOAT: - return "PCM_F"; - case SOX_ENCODING_FLAC: - return "FLAC"; - case SOX_ENCODING_ULAW: - return "ULAW"; - case SOX_ENCODING_ALAW: - return "ALAW"; - case SOX_ENCODING_MP3: - return "MP3"; - case SOX_ENCODING_VORBIS: - return "VORBIS"; - case SOX_ENCODING_AMR_WB: - return "AMR_WB"; - case SOX_ENCODING_AMR_NB: - return "AMR_NB"; - case SOX_ENCODING_OPUS: - return "OPUS"; - case SOX_ENCODING_GSM: - return "GSM"; - default: - return "UNKNOWN"; - } -} - -} // namespace sox_utils -} // namespace paddleaudio diff --git a/paddlespeech/audio/src/pybind/sox/types.h b/paddlespeech/audio/src/pybind/sox/types.h deleted file mode 100644 index 780840161366b7be4384fc86ac0ed6064557ced8..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/types.h +++ /dev/null @@ -1,58 +0,0 @@ -//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.h -#pragma once - -#include -#include "paddlespeech/audio/src/optional/optional.hpp" - -namespace paddleaudio { -namespace sox_utils { - -enum class Format { - WAV, - MP3, - FLAC, - VORBIS, - AMR_NB, - AMR_WB, - AMB, - SPHERE, - GSM, - HTK, -}; - -Format get_format_from_string(const std::string& format); - -enum class Encoding { - NOT_PROVIDED, - UNKNOWN, - PCM_SIGNED, - PCM_UNSIGNED, - PCM_FLOAT, - FLAC, - ULAW, - ALAW, - MP3, - VORBIS, - AMR_WB, - AMR_NB, - OPUS, -}; - -std::string to_string(Encoding v); -Encoding get_encoding_from_option(const tl::optional encoding); - -enum class BitDepth : unsigned { - NOT_PROVIDED = 0, - B8 = 8, - B16 = 16, - B24 = 24, - B32 = 32, - B64 = 64, -}; - -BitDepth get_bit_depth_from_option(const tl::optional bit_depth); - -std::string get_encoding(sox_encoding_t encoding); - -} // namespace sox_utils -} // namespace paddleaudio \ No newline at end of file diff --git a/paddlespeech/audio/src/pybind/sox/utils.cpp b/paddlespeech/audio/src/pybind/sox/utils.cpp deleted file mode 100644 index 5c78bc11614c2646b20fdbf9717d2d79f15ec89d..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/utils.cpp +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), -// All rights reserved. -#include - -#include "paddlespeech/audio/src/pybind/sox/utils.h" -#include "paddlespeech/audio/src/pybind/sox/types.h" - -#include - -namespace paddleaudio { -namespace sox_utils { - -auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer) - -> uint64_t { - uint64_t num_read = 0; - while (num_read < size) { - auto request = size - num_read; - auto chunk = static_cast( - static_cast(fileobj->attr("read")(request))); - auto chunk_len = chunk.length(); - if (chunk_len == 0) { - break; - } - if (chunk_len > request) { - std::ostringstream message; - message - << "Requested up to " << request << " bytes but, " - << "received " << chunk_len << " bytes. " - << "The given object does not confirm to read protocol of file " - "object."; - throw std::runtime_error(message.str()); - } - memcpy(buffer, chunk.data(), chunk_len); - buffer += chunk_len; - num_read += chunk_len; - } - return num_read; -} - - -void set_seed(const int64_t seed) { - sox_get_globals()->ranqd1 = static_cast(seed); -} - -void set_verbosity(const int64_t verbosity) { - sox_get_globals()->verbosity = static_cast(verbosity); -} - -void set_use_threads(const bool use_threads) { - sox_get_globals()->use_threads = static_cast(use_threads); -} - -void set_buffer_size(const int64_t buffer_size) { - sox_get_globals()->bufsiz = static_cast(buffer_size); -} - -int64_t get_buffer_size() { - return sox_get_globals()->bufsiz; -} - -std::vector> list_effects() { - std::vector> effects; - for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) { - const sox_effect_handler_t* handler = (*fns)(); - if (handler && handler->name) { - if (UNSUPPORTED_EFFECTS.find(handler->name) == - UNSUPPORTED_EFFECTS.end()) { - effects.emplace_back(std::vector{ - handler->name, - handler->usage ? std::string(handler->usage) : std::string("")}); - } - } - } - return effects; -} - -std::vector list_write_formats() { - std::vector formats; - for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) { - const sox_format_handler_t* handler = fns->fn(); - for (const char* const* names = handler->names; *names; ++names) { - if (!strchr(*names, '/') && handler->write) - formats.emplace_back(*names); - } - } - return formats; -} - -std::vector list_read_formats() { - std::vector formats; - for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) { - const sox_format_handler_t* handler = fns->fn(); - for (const char* const* names = handler->names; *names; ++names) { - if (!strchr(*names, '/') && handler->read) - formats.emplace_back(*names); - } - } - return formats; -} - -SoxFormat::SoxFormat(sox_format_t* fd) noexcept : fd_(fd) {} -SoxFormat::~SoxFormat() { - close(); -} - -sox_format_t* SoxFormat::operator->() const noexcept { - return fd_; -} -SoxFormat::operator sox_format_t*() const noexcept { - return fd_; -} - -void SoxFormat::close() { - if (fd_ != nullptr) { - sox_close(fd_); - fd_ = nullptr; - } -} - -void validate_input_file(const SoxFormat& sf, const std::string& path) { - if (static_cast(sf) == nullptr) { - throw std::runtime_error( - "Error loading audio file: failed to open file " + path); - } - if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - throw std::runtime_error("Error loading audio file: unknown encoding."); - } -} - -void validate_input_memfile(const SoxFormat &sf) { - return validate_input_file(sf, ""); -} - -void validate_input_tensor(const py::array tensor) { - if (tensor.ndim() != 2) { - throw std::runtime_error("Input tensor has to be 2D."); - } - - char dtype = tensor.dtype().char_(); - bool flag = (dtype == 'f') || (dtype == 'd') || (dtype == 'l') || (dtype == 'i'); - if (flag == false) { - throw std::runtime_error( - "Input tensor has to be one of float32, int32, int16 or uint8 type."); - } -} - -py::dtype get_dtype( - const sox_encoding_t encoding, - const unsigned precision) { - switch (encoding) { - case SOX_ENCODING_UNSIGNED: // 8-bit PCM WAV - return py::dtype('u1'); - case SOX_ENCODING_SIGN2: // 16-bit, 24-bit, or 32-bit PCM WAV - switch (precision) { - case 16: - return py::dtype("i2"); - case 24: // Cast 24-bit to 32-bit. - case 32: - return py::dtype('i'); - default: - throw std::runtime_error( - "Only 16, 24, and 32 bits are supported for signed PCM."); - } - default: - // default to float32 for the other formats, including - // 32-bit flaoting-point WAV, - // MP3, - // FLAC, - // VORBIS etc... - return py::dtype("f"); - } -} - -py::array convert_to_tensor( - sox_sample_t* buffer, - const int32_t num_samples, - const int32_t num_channels, - const py::dtype dtype, - const bool normalize, - const bool channels_first) { - // todo refector later(SGoat) - py::array t; - uint64_t dummy = 0; - SOX_SAMPLE_LOCALS; - int32_t num_rows = num_samples / num_channels; - if (normalize || dtype.char_() == 'f') { - t = py::array(dtype, {num_rows, num_channels}); - auto ptr = (float*)t.mutable_data(0, 0); - for (int32_t i = 0; i < num_samples; ++i) { - ptr[i] = SOX_SAMPLE_TO_FLOAT_32BIT(buffer[i], dummy); - } - if (channels_first) { - py::array t2 = py::array(dtype, {num_channels, num_rows}); - for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) { - for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx) - *(float*)t2.mutable_data(row_idx, col_idx) = *(float*)t.data(col_idx, row_idx); - } - return t2; - } - } else if (dtype.char_() == 'i') { - t = py::array(dtype, {num_rows, num_channels}); - auto ptr = (int*)t.mutable_data(0, 0); - for (int32_t i = 0; i < num_samples; ++i) { - ptr[i] = buffer[i]; - } - if (channels_first) { - py::array t2 = py::array(dtype, {num_channels, num_rows}); - for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) { - for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx) - *(int*)t2.mutable_data(row_idx, col_idx) = *(int*)t.data(col_idx, row_idx); - } - return t2; - } - } else if (dtype.char_() == 'h') { // int16 - t = py::array(dtype, {num_rows, num_channels}); - auto ptr = (int16_t*)t.mutable_data(0, 0); - for (int32_t i = 0; i < num_samples; ++i) { - ptr[i] = SOX_SAMPLE_TO_SIGNED_16BIT(buffer[i], dummy); - } - if (channels_first) { - py::array t2 = py::array(dtype, {num_channels, num_rows}); - for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) { - for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx) - *(int16_t*)t2.mutable_data(row_idx, col_idx) = *(int16_t*)t.data(col_idx, row_idx); - } - return t2; - } - } else if (dtype.char_() == 'b') { - //t = torch::empty({num_samples / num_channels, num_channels}, torch::kUInt8); - t = py::array(dtype, {num_rows, num_channels}); - auto ptr = (uint8_t*)t.mutable_data(0,0); - for (int32_t i = 0; i < num_samples; ++i) { - ptr[i] = SOX_SAMPLE_TO_UNSIGNED_8BIT(buffer[i], dummy); - } - if (channels_first) { - py::array t2 = py::array(dtype, {num_channels, num_rows}); - for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) { - for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx) - *(uint8_t*)t2.mutable_data(row_idx, col_idx) = *(uint8_t*)t.data(col_idx, row_idx); - } - return t2; - } - } else { - throw std::runtime_error("Unsupported dtype."); - } - return t; -} - -const std::string get_filetype(const std::string path) { - std::string ext = path.substr(path.find_last_of(".") + 1); - std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); - return ext; -} - -namespace { - -std::tuple get_save_encoding_for_wav( - const std::string format, - py::dtype dtype, - const Encoding& encoding, - const BitDepth& bits_per_sample) { - switch (encoding) { - case Encoding::NOT_PROVIDED: - switch (bits_per_sample) { - case BitDepth::NOT_PROVIDED: - switch (dtype.num()) { - case 11: // float32 numpy dtype num - return std::make_tuple<>(SOX_ENCODING_FLOAT, 32); - case 5: // int numpy dtype num - return std::make_tuple<>(SOX_ENCODING_SIGN2, 32); - case 3: // int16 numpy - return std::make_tuple<>(SOX_ENCODING_SIGN2, 16); - case 1: // byte numpy - return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8); - default: - throw std::runtime_error("Internal Error: Unexpected dtype."); - } - case BitDepth::B8: - return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8); - default: - return std::make_tuple<>( - SOX_ENCODING_SIGN2, static_cast(bits_per_sample)); - } - case Encoding::PCM_SIGNED: - switch (bits_per_sample) { - case BitDepth::NOT_PROVIDED: - return std::make_tuple<>(SOX_ENCODING_SIGN2, 32); - case BitDepth::B8: - throw std::runtime_error( - format + " does not support 8-bit signed PCM encoding."); - default: - return std::make_tuple<>( - SOX_ENCODING_SIGN2, static_cast(bits_per_sample)); - } - case Encoding::PCM_UNSIGNED: - switch (bits_per_sample) { - case BitDepth::NOT_PROVIDED: - case BitDepth::B8: - return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8); - default: - throw std::runtime_error( - format + " only supports 8-bit for unsigned PCM encoding."); - } - case Encoding::PCM_FLOAT: - switch (bits_per_sample) { - case BitDepth::NOT_PROVIDED: - case BitDepth::B32: - return std::make_tuple<>(SOX_ENCODING_FLOAT, 32); - case BitDepth::B64: - return std::make_tuple<>(SOX_ENCODING_FLOAT, 64); - default: - throw std::runtime_error( - format + - " only supports 32-bit or 64-bit for floating-point PCM encoding."); - } - case Encoding::ULAW: - switch (bits_per_sample) { - case BitDepth::NOT_PROVIDED: - case BitDepth::B8: - return std::make_tuple<>(SOX_ENCODING_ULAW, 8); - default: - throw std::runtime_error( - format + " only supports 8-bit for mu-law encoding."); - } - case Encoding::ALAW: - switch (bits_per_sample) { - case BitDepth::NOT_PROVIDED: - case BitDepth::B8: - return std::make_tuple<>(SOX_ENCODING_ALAW, 8); - default: - throw std::runtime_error( - format + " only supports 8-bit for a-law encoding."); - } - default: - throw std::runtime_error( - format + " does not support encoding: " + to_string(encoding)); - } -} - -std::tuple get_save_encoding( - const std::string& format, - const py::dtype dtype, - const tl::optional encoding, - const tl::optional bits_per_sample) { - const Format fmt = get_format_from_string(format); - const Encoding enc = get_encoding_from_option(encoding); - const BitDepth bps = get_bit_depth_from_option(bits_per_sample); - - switch (fmt) { - case Format::WAV: - case Format::AMB: - return get_save_encoding_for_wav(format, dtype, enc, bps); - case Format::MP3: - if (enc != Encoding::NOT_PROVIDED) - throw std::runtime_error("mp3 does not support `encoding` option."); - if (bps != BitDepth::NOT_PROVIDED) - throw std::runtime_error( - "mp3 does not support `bits_per_sample` option."); - return std::make_tuple<>(SOX_ENCODING_MP3, 16); - case Format::HTK: - if (enc != Encoding::NOT_PROVIDED) - throw std::runtime_error("htk does not support `encoding` option."); - if (bps != BitDepth::NOT_PROVIDED) - throw std::runtime_error( - "htk does not support `bits_per_sample` option."); - return std::make_tuple<>(SOX_ENCODING_SIGN2, 16); - case Format::VORBIS: - if (enc != Encoding::NOT_PROVIDED) - throw std::runtime_error("vorbis does not support `encoding` option."); - if (bps != BitDepth::NOT_PROVIDED) - throw std::runtime_error( - "vorbis does not support `bits_per_sample` option."); - return std::make_tuple<>(SOX_ENCODING_VORBIS, 16); - case Format::AMR_NB: - if (enc != Encoding::NOT_PROVIDED) - throw std::runtime_error("amr-nb does not support `encoding` option."); - if (bps != BitDepth::NOT_PROVIDED) - throw std::runtime_error( - "amr-nb does not support `bits_per_sample` option."); - return std::make_tuple<>(SOX_ENCODING_AMR_NB, 16); - case Format::FLAC: - if (enc != Encoding::NOT_PROVIDED) - throw std::runtime_error("flac does not support `encoding` option."); - switch (bps) { - case BitDepth::B32: - case BitDepth::B64: - throw std::runtime_error( - "flac does not support `bits_per_sample` larger than 24."); - default: - return std::make_tuple<>( - SOX_ENCODING_FLAC, static_cast(bps)); - } - case Format::SPHERE: - switch (enc) { - case Encoding::NOT_PROVIDED: - case Encoding::PCM_SIGNED: - switch (bps) { - case BitDepth::NOT_PROVIDED: - return std::make_tuple<>(SOX_ENCODING_SIGN2, 32); - default: - return std::make_tuple<>( - SOX_ENCODING_SIGN2, static_cast(bps)); - } - case Encoding::PCM_UNSIGNED: - throw std::runtime_error( - "sph does not support unsigned integer PCM."); - case Encoding::PCM_FLOAT: - throw std::runtime_error("sph does not support floating point PCM."); - case Encoding::ULAW: - switch (bps) { - case BitDepth::NOT_PROVIDED: - case BitDepth::B8: - return std::make_tuple<>(SOX_ENCODING_ULAW, 8); - default: - throw std::runtime_error( - "sph only supports 8-bit for mu-law encoding."); - } - case Encoding::ALAW: - switch (bps) { - case BitDepth::NOT_PROVIDED: - case BitDepth::B8: - return std::make_tuple<>(SOX_ENCODING_ALAW, 8); - default: - return std::make_tuple<>( - SOX_ENCODING_ALAW, static_cast(bps)); - } - default: - throw std::runtime_error( - "sph does not support encoding: " + encoding.value()); - } - case Format::GSM: - if (enc != Encoding::NOT_PROVIDED) - throw std::runtime_error("gsm does not support `encoding` option."); - if (bps != BitDepth::NOT_PROVIDED) - throw std::runtime_error( - "gsm does not support `bits_per_sample` option."); - return std::make_tuple<>(SOX_ENCODING_GSM, 16); - - default: - throw std::runtime_error("Unsupported format: " + format); - } -} - -unsigned get_precision(const std::string filetype, py::dtype dtype) { - if (filetype == "mp3") - return SOX_UNSPEC; - if (filetype == "flac") - return 24; - if (filetype == "ogg" || filetype == "vorbis") - return SOX_UNSPEC; - if (filetype == "wav" || filetype == "amb") { - switch (dtype.num()) { - case 1: // byte in numpy dype num - return 8; - case 3: // short, in numpy dtype num - return 16; - case 5: // int, numpy dtype - return 32; - case 11: // float, numpy dtype - return 32; - default: - throw std::runtime_error("Unsupported dtype."); - } - } - if (filetype == "sph") - return 32; - if (filetype == "amr-nb") { - return 16; - } - if (filetype == "gsm") { - return 16; - } - if (filetype == "htk") { - return 16; - } - throw std::runtime_error("Unsupported file type: " + filetype); -} - -} // namespace - -sox_signalinfo_t get_signalinfo( - const py::array* waveform, - const int64_t sample_rate, - const std::string filetype, - const bool channels_first) { - return sox_signalinfo_t{ - /*rate=*/static_cast(sample_rate), - /*channels=*/ - static_cast(waveform->shape(channels_first ? 0 : 1)), - /*precision=*/get_precision(filetype, waveform->dtype()), - /*length=*/static_cast(waveform->size())}; -} - -sox_encodinginfo_t get_tensor_encodinginfo(py::dtype dtype) { - sox_encoding_t encoding = [&]() { - switch (dtype.num()) { - case 1: // byte - return SOX_ENCODING_UNSIGNED; - case 3: // short - return SOX_ENCODING_SIGN2; - case 5: // int32 - return SOX_ENCODING_SIGN2; - case 11: // float - return SOX_ENCODING_FLOAT; - default: - throw std::runtime_error("Unsupported dtype."); - } - }(); - unsigned bits_per_sample = [&]() { - switch (dtype.num()) { - case 1: // byte - return 8; - case 3: //short - return 16; - case 5: // int32 - return 32; - case 11: // float - return 32; - default: - throw std::runtime_error("Unsupported dtype."); - } - }(); - return sox_encodinginfo_t{ - /*encoding=*/encoding, - /*bits_per_sample=*/bits_per_sample, - /*compression=*/HUGE_VAL, - /*reverse_bytes=*/sox_option_default, - /*reverse_nibbles=*/sox_option_default, - /*reverse_bits=*/sox_option_default, - /*opposite_endian=*/sox_false}; -} - -sox_encodinginfo_t get_encodinginfo_for_save( - const std::string& format, - const py::dtype dtype, - const tl::optional compression, - const tl::optional encoding, - const tl::optional bits_per_sample) { - auto enc = get_save_encoding(format, dtype, encoding, bits_per_sample); - return sox_encodinginfo_t{ - /*encoding=*/std::get<0>(enc), - /*bits_per_sample=*/std::get<1>(enc), - /*compression=*/compression.value_or(HUGE_VAL), - /*reverse_bytes=*/sox_option_default, - /*reverse_nibbles=*/sox_option_default, - /*reverse_bits=*/sox_option_default, - /*opposite_endian=*/sox_false}; -} - - -/* -SoxFormat::SoxFormat(sox_format_t *fd) noexcept : fd_(fd) {} -SoxFormat::~SoxFormat() { close(); } - -sox_format_t *SoxFormat::operator->() const noexcept { return fd_; } -SoxFormat::operator sox_format_t *() const noexcept { return fd_; } - -void SoxFormat::close() { - if (fd_ != nullptr) { - sox_close(fd_); - fd_ = nullptr; - } -} - -auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer) - -> uint64_t { - uint64_t num_read = 0; - while (num_read < size) { - auto request = size - num_read; - auto chunk = static_cast( - static_cast(fileobj->attr("read")(request))); - auto chunk_len = chunk.length(); - if (chunk_len == 0) { - break; - } - if (chunk_len > request) { - std::ostringstream message; - message - << "Requested up to " << request << " bytes but, " - << "received " << chunk_len << " bytes. " - << "The given object does not confirm to read protocol of file " - "object."; - throw std::runtime_error(message.str()); - } - memcpy(buffer, chunk.data(), chunk_len); - buffer += chunk_len; - num_read += chunk_len; - } - return num_read; -} - -int64_t get_buffer_size() { return sox_get_globals()->bufsiz; } - -void validate_input_file(const SoxFormat &sf, const std::string &path) { - if (static_cast(sf) == nullptr) { - throw std::runtime_error( - "Error loading audio file: failed to open file " + path); - } - if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) { - throw std::runtime_error("Error loading audio file: unknown encoding."); - } -} - -void validate_input_memfile(const SoxFormat &sf) { - return validate_input_file(sf, ""); -} - -std::string get_encoding(sox_encoding_t encoding) { - switch (encoding) { - case SOX_ENCODING_UNKNOWN: - return "UNKNOWN"; - case SOX_ENCODING_SIGN2: - return "PCM_S"; - case SOX_ENCODING_UNSIGNED: - return "PCM_U"; - case SOX_ENCODING_FLOAT: - return "PCM_F"; - case SOX_ENCODING_FLAC: - return "FLAC"; - case SOX_ENCODING_ULAW: - return "ULAW"; - case SOX_ENCODING_ALAW: - return "ALAW"; - case SOX_ENCODING_MP3: - return "MP3"; - case SOX_ENCODING_VORBIS: - return "VORBIS"; - case SOX_ENCODING_AMR_WB: - return "AMR_WB"; - case SOX_ENCODING_AMR_NB: - return "AMR_NB"; - case SOX_ENCODING_OPUS: - return "OPUS"; - case SOX_ENCODING_GSM: - return "GSM"; - default: - return "UNKNOWN"; - } -} -*/ -} // namespace paddleaudio -} // namespace sox_utils diff --git a/paddlespeech/audio/src/pybind/sox/utils.h b/paddlespeech/audio/src/pybind/sox/utils.h deleted file mode 100644 index 65223bc0c5c78085abf1ddc9ca4e10f5b1801718..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/pybind/sox/utils.h +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright (c) 2017 Facebook Inc. (Soumith Chintala), -// All rights reserved. - -#pragma once - -#include -#include -#include -#include "paddlespeech/audio/src/optional/optional.hpp" - -namespace py = pybind11; - -namespace paddleaudio { -namespace sox_utils { - -auto read_fileobj(py::object *fileobj, uint64_t size, char *buffer) -> uint64_t; - -void set_seed(const int64_t seed); - -void set_verbosity(const int64_t verbosity); - -void set_use_threads(const bool use_threads); - -void set_buffer_size(const int64_t buffer_size); - -int64_t get_buffer_size(); - -std::vector> list_effects(); - -std::vector list_read_formats(); - -std::vector list_write_formats(); - -//////////////////////////////////////////////////////////////////////////////// -// Utilities for sox_io / sox_effects implementations -//////////////////////////////////////////////////////////////////////////////// - -const std::unordered_set UNSUPPORTED_EFFECTS = - {"input", "output", "spectrogram", "noiseprof", "noisered", "splice"}; - -/// helper class to automatically close sox_format_t* -struct SoxFormat { - explicit SoxFormat(sox_format_t* fd) noexcept; - SoxFormat(const SoxFormat& other) = delete; - SoxFormat(SoxFormat&& other) = delete; - SoxFormat& operator=(const SoxFormat& other) = delete; - SoxFormat& operator=(SoxFormat&& other) = delete; - ~SoxFormat(); - sox_format_t* operator->() const noexcept; - operator sox_format_t*() const noexcept; - - void close(); - - private: - sox_format_t* fd_; -}; - -/// -/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32 -void validate_input_tensor(const py::array); - -void validate_input_file(const SoxFormat& sf, const std::string& path); - -void validate_input_memfile(const SoxFormat &sf); -/// -/// Get target dtype for the given encoding and precision. -py::dtype get_dtype( - const sox_encoding_t encoding, - const unsigned precision); - -/// -/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor -/// NOTE: This function might modify the values in the input buffer to -/// reduce the number of memory copy. -/// @param buffer Pointer to buffer that contains audio data. -/// @param num_samples The number of samples to read. -/// @param num_channels The number of channels. Used to reshape the resulting -/// Tensor. -/// @param dtype Target dtype. Determines the output dtype and value range in -/// conjunction with normalization. -/// @param noramlize Perform normalization. Only effective when dtype is not -/// kFloat32. When effective, the output tensor is kFloat32 type and value range -/// is [-1.0, 1.0] -/// @param channels_first When True, output Tensor has shape of [num_channels, -/// num_frames]. -py::array convert_to_tensor( - sox_sample_t* buffer, - const int32_t num_samples, - const int32_t num_channels, - const py::dtype dtype, - const bool normalize, - const bool channels_first); - -/// Extract extension from file path -const std::string get_filetype(const std::string path); - -/// Get sox_signalinfo_t for passing a py::array object. -sox_signalinfo_t get_signalinfo( - const py::array* waveform, - const int64_t sample_rate, - const std::string filetype, - const bool channels_first); - -/// Get sox_encodinginfo_t for Tensor I/O -sox_encodinginfo_t get_tensor_encodinginfo(const py::dtype dtype); - -/// Get sox_encodinginfo_t for saving to file/file object -sox_encodinginfo_t get_encodinginfo_for_save( - const std::string& format, - const py::dtype dtype, - const tl::optional compression, - const tl::optional encoding, - const tl::optional bits_per_sample); - -} // namespace paddleaudio -} // namespace sox_utils diff --git a/paddlespeech/audio/src/utils.cpp b/paddlespeech/audio/src/utils.cpp deleted file mode 100644 index d9d3575f201018aa88dc3ec146b19b3f4eeaa993..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/src/utils.cpp +++ /dev/null @@ -1,33 +0,0 @@ -namespace paddleaudio { - -namespace { - -bool is_sox_available() { -#ifdef INCLUDE_SOX - return true; -#else - return false; -#endif -} - -bool is_kaldi_available() { -#ifdef INCLUDE_KALDI - return true; -#else - return false; -#endif -} - -// It tells whether paddleaudio was compiled with ffmpeg -// not the runtime availability. -bool is_ffmpeg_available() { -#ifdef USE_FFMPEG - return true; -#else - return false; -#endif -} - -} // namespace - -} // namespace paddleaudio \ No newline at end of file diff --git a/paddlespeech/audio/streamdata/autodecode.py b/paddlespeech/audio/streamdata/autodecode.py index d7f7937bde3fc8f37af615612832eff223ca944d..ce5181d836ccbd0eaf11505c9cb15c088c54f638 100644 --- a/paddlespeech/audio/streamdata/autodecode.py +++ b/paddlespeech/audio/streamdata/autodecode.py @@ -295,7 +295,7 @@ def torch_video(key, data): def paddle_audio(key, data): - """Decode audio using the paddlespeech.audio library. + """Decode audio using the paddleaudio library. :param key: file name extension :param data: data to be decoded @@ -304,13 +304,13 @@ def paddle_audio(key, data): if extension not in ["flac", "mp3", "sox", "wav", "m4a", "ogg", "wma"]: return None - import paddlespeech.audio + import paddlesaudio with tempfile.TemporaryDirectory() as dirname: fname = os.path.join(dirname, f"file.{extension}") with open(fname, "wb") as stream: stream.write(data) - return paddlespeech.audio.load(fname) + return paddleaudio.backends.soundfile_load(fname) ################################################################ diff --git a/paddlespeech/audio/streamdata/filters.py b/paddlespeech/audio/streamdata/filters.py index 68d6830bb83c8c6944923963ef1ec69a79f8780e..c4f590fc850d38c29dc649bc142fea34db6dac4a 100644 --- a/paddlespeech/audio/streamdata/filters.py +++ b/paddlespeech/audio/streamdata/filters.py @@ -25,8 +25,10 @@ import paddle from . import autodecode from . import utils -from .. import backends -from ..compliance import kaldi + +from paddleaudio import backends +from paddleaudio.compliance import kaldi + from ..transform.cmvn import GlobalCMVN from ..transform.spec_augment import freq_mask from ..transform.spec_augment import time_mask diff --git a/paddlespeech/audio/streamdata/tariterators.py b/paddlespeech/audio/streamdata/tariterators.py index 79b81c0ce67623241c42818508b7a106195996cc..3adf4892a7b4293f76235e6e553338f633346a2e 100644 --- a/paddlespeech/audio/streamdata/tariterators.py +++ b/paddlespeech/audio/streamdata/tariterators.py @@ -20,7 +20,7 @@ trace = False meta_prefix = "__" meta_suffix = "__" -import paddlespeech +import paddleaudio import paddle import numpy as np @@ -111,7 +111,7 @@ def tar_file_iterator(fileobj, assert pos > 0 prefix, postfix = name[:pos], name[pos + 1:] if postfix == 'wav': - waveform, sample_rate = paddlespeech.audio.load( + waveform, sample_rate = paddleaudio.backends.soundfile_load( stream.extractfile(tarinfo), normal=False) result = dict( fname=prefix, wav=waveform, sample_rate=sample_rate) @@ -163,7 +163,7 @@ def tar_file_and_group_iterator(fileobj, if postfix == 'txt': example['txt'] = file_obj.read().decode('utf8').strip() elif postfix in AUDIO_FORMAT_SETS: - waveform, sample_rate = paddlespeech.audio.load( + waveform, sample_rate = paddleaudio.backends.soundfile_load( file_obj, normal=False) waveform = paddle.to_tensor( np.expand_dims(np.array(waveform), 0), diff --git a/paddlespeech/audio/third_party/.gitignore b/paddlespeech/audio/third_party/.gitignore deleted file mode 100644 index 2d788f6b4b9d67cdde429f552d0514dbec8c9d87..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -archives/ -install/ diff --git a/paddlespeech/audio/third_party/CMakeLists.txt b/paddlespeech/audio/third_party/CMakeLists.txt deleted file mode 100644 index 43288f39b3517b3b4f8e1f60064720900c0123fa..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") - -################################################################################ -# sox -################################################################################ -if (BUILD_SOX) - add_subdirectory(sox) -endif() - -################################################################################ -# kaldi -################################################################################ -if (BUILD_KALDI) - add_subdirectory(kaldi) -endif() \ No newline at end of file diff --git a/paddlespeech/audio/third_party/kaldi/CMakeLists.txt b/paddlespeech/audio/third_party/kaldi/CMakeLists.txt deleted file mode 100644 index d25a22124876a83bc6114d66ca9c05d6480fe160..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/kaldi/CMakeLists.txt +++ /dev/null @@ -1,117 +0,0 @@ -# checkout the thirdparty/kaldi/base/kaldi-types.h -# compile kaldi without openfst -add_definitions("-DCOMPILE_WITHOUT_OPENFST") - -# function (define_library name source include_dirs link_libraries compile_defs) -# add_library(${name} INTERFACE ${source}) -# target_include_directories(${name} INTERFACE ${include_dirs}) -# target_link_libraries(${name} INTERFACE ${link_libraries}) -# target_compile_definitions(${name} INTERFACE ${compile_defs}) -# set_target_properties(${name} PROPERTIES PREFIX "") -# if (MSVC) -# set_target_properties(${name} PROPERTIES SUFFIX ".pyd") -# endif(MSVC) -# install( -# TARGETS ${name} -# LIBRARY DESTINATION lib -# RUNTIME DESTINATION lib # For Windows -# ) -# endfunction() - -# kaldi-base -add_library(kaldi-base STATIC - base/io-funcs.cc - base/kaldi-error.cc - base/kaldi-math.cc - base/kaldi-utils.cc - base/timer.cc -) -target_include_directories(kaldi-base PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) - - -# kaldi-matrix -add_library(kaldi-matrix STATIC - matrix/compressed-matrix.cc - matrix/matrix-functions.cc - matrix/kaldi-matrix.cc - matrix/kaldi-vector.cc - matrix/optimization.cc - matrix/packed-matrix.cc - matrix/qr.cc - matrix/sparse-matrix.cc - matrix/sp-matrix.cc - matrix/srfft.cc - matrix/tp-matrix.cc -) -target_include_directories(kaldi-matrix PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(kaldi-matrix PUBLIC gfortran kaldi-base libopenblas) - - -# kaldi-util -add_library(kaldi-util STATIC - util/kaldi-holder.cc - util/kaldi-io.cc - util/kaldi-semaphore.cc - util/kaldi-table.cc - util/kaldi-thread.cc - util/parse-options.cc - util/simple-io-funcs.cc - util/simple-options.cc - util/text-utils.cc -) -target_include_directories(kaldi-util PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix) - - -# kaldi-feat-common -add_library(kaldi-feat-common STATIC - feat/cmvn.cc - feat/feature-functions.cc - feat/feature-window.cc - feat/mel-computations.cc - feat/pitch-functions.cc - feat/resample.cc - feat/signal.cc - feat/wave-reader.cc -) -target_include_directories(kaldi-feat-common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util) - - -# kaldi-mfcc -add_library(kaldi-mfcc STATIC - feat/feature-mfcc.cc -) -target_include_directories(kaldi-mfcc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(kaldi-mfcc PUBLIC kaldi-feat-common) - - -# kaldi-fbank -add_library(kaldi-fbank STATIC - feat/feature-fbank.cc -) -target_include_directories(kaldi-fbank PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(kaldi-fbank PUBLIC kaldi-feat-common) - - -set(KALDI_LIBRARIES - ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-base.a - ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-matrix.a - ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-util.a - ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-feat-common.a - ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-mfcc.a - ${CMAKE_CURRENT_BINARY_DIR}/libkaldi-fbank.a -) - -add_library(libkaldi INTERFACE) -add_dependencies(libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank) -target_include_directories(libkaldi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(libkaldi INTERFACE - # --whole-archive for undefined symbol when link static lib into shared lib - -Wl,--start-group -Wl,--whole-archive - ${KALDI_LIBRARIES} - libopenblas - gfortran - -Wl,--no-whole-archive -Wl,--end-group -) -target_compile_definitions(libkaldi INTERFACE "-DCOMPILE_WITHOUT_OPENFST") diff --git a/paddlespeech/audio/third_party/kaldi/base b/paddlespeech/audio/third_party/kaldi/base deleted file mode 120000 index cf286c165bb237d500322b9457be19950246faf2..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/kaldi/base +++ /dev/null @@ -1 +0,0 @@ -../../../../speechx/speechx/kaldi/base \ No newline at end of file diff --git a/paddlespeech/audio/third_party/kaldi/feat b/paddlespeech/audio/third_party/kaldi/feat deleted file mode 120000 index 796991243a5969ec1d5fd5f7ec300087cfec503d..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/kaldi/feat +++ /dev/null @@ -1 +0,0 @@ -../../../../speechx/speechx/kaldi/feat \ No newline at end of file diff --git a/paddlespeech/audio/third_party/kaldi/matrix b/paddlespeech/audio/third_party/kaldi/matrix deleted file mode 120000 index 184fa3233c882d57759966470b38ca34a92e18f2..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/kaldi/matrix +++ /dev/null @@ -1 +0,0 @@ -../../../../speechx/speechx/kaldi/matrix \ No newline at end of file diff --git a/paddlespeech/audio/third_party/kaldi/util b/paddlespeech/audio/third_party/kaldi/util deleted file mode 120000 index f3017b6022df93925e304fb9eb5b0ac49af14f77..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/kaldi/util +++ /dev/null @@ -1 +0,0 @@ -../../../../speechx/speechx/kaldi/util \ No newline at end of file diff --git a/paddlespeech/audio/third_party/patches/config.guess b/paddlespeech/audio/third_party/patches/config.guess deleted file mode 100644 index 7f76b6228f73d674f58cfcc3523f99e253ee5515..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/patches/config.guess +++ /dev/null @@ -1,1754 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright 1992-2022 Free Software Foundation, Inc. - -# shellcheck disable=SC2006,SC2268 # see below for rationale - -timestamp='2022-01-09' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that -# program. This Exception is an additional permission under section 7 -# of the GNU General Public License, version 3 ("GPLv3"). -# -# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. -# -# You can get the latest version of this script from: -# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess -# -# Please send patches to . - - -# The "shellcheck disable" line above the timestamp inhibits complaints -# about features and limitations of the classic Bourne shell that were -# superseded or lifted in POSIX. However, this script identifies a wide -# variety of pre-POSIX systems that do not have POSIX shells at all, and -# even some reasonably current systems (Solaris 10 as case-in-point) still -# have a pre-POSIX /bin/sh. - - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Options: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright 1992-2022 Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -# Just in case it came from the environment. -GUESS= - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -tmp= -# shellcheck disable=SC2172 -trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 - -set_cc_for_build() { - # prevent multiple calls if $tmp is already set - test "$tmp" && return 0 - : "${TMPDIR=/tmp}" - # shellcheck disable=SC2039,SC3028 - { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } - dummy=$tmp/dummy - case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in - ,,) echo "int x;" > "$dummy.c" - for driver in cc gcc c89 c99 ; do - if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then - CC_FOR_BUILD=$driver - break - fi - done - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; - esac -} - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if test -f /.attbin/uname ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -case $UNAME_SYSTEM in -Linux|GNU|GNU/*) - LIBC=unknown - - set_cc_for_build - cat <<-EOF > "$dummy.c" - #include - #if defined(__UCLIBC__) - LIBC=uclibc - #elif defined(__dietlibc__) - LIBC=dietlibc - #elif defined(__GLIBC__) - LIBC=gnu - #else - #include - /* First heuristic to detect musl libc. */ - #ifdef __DEFINED_va_list - LIBC=musl - #endif - #endif - EOF - cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` - eval "$cc_set_libc" - - # Second heuristic to detect musl libc. - if [ "$LIBC" = unknown ] && - command -v ldd >/dev/null && - ldd --version 2>&1 | grep -q ^musl; then - LIBC=musl - fi - - # If the system lacks a compiler, then just pick glibc. - # We could probably try harder. - if [ "$LIBC" = unknown ]; then - LIBC=gnu - fi - ;; -esac - -# Note: order is significant - the case branches are not exclusive. - -case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ - /sbin/sysctl -n hw.machine_arch 2>/dev/null || \ - /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \ - echo unknown)` - case $UNAME_MACHINE_ARCH in - aarch64eb) machine=aarch64_be-unknown ;; - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - sh5el) machine=sh5le-unknown ;; - earmv*) - arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` - endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` - machine=${arch}${endian}-unknown - ;; - *) machine=$UNAME_MACHINE_ARCH-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently (or will in the future) and ABI. - case $UNAME_MACHINE_ARCH in - earm*) - os=netbsdelf - ;; - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ELF__ - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # Determine ABI tags. - case $UNAME_MACHINE_ARCH in - earm*) - expr='s/^earmv[0-9]/-eabi/;s/eb$//' - abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case $UNAME_VERSION in - Debian*) - release='-gnu' - ;; - *) - release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - GUESS=$machine-${os}${release}${abi-} - ;; - *:Bitrig:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` - GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE - ;; - *:OpenBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` - GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE - ;; - *:SecBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'` - GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE - ;; - *:LibertyBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` - GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE - ;; - *:MidnightBSD:*:*) - GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE - ;; - *:ekkoBSD:*:*) - GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE - ;; - *:SolidBSD:*:*) - GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE - ;; - *:OS108:*:*) - GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE - ;; - macppc:MirBSD:*:*) - GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE - ;; - *:MirBSD:*:*) - GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE - ;; - *:Sortix:*:*) - GUESS=$UNAME_MACHINE-unknown-sortix - ;; - *:Twizzler:*:*) - GUESS=$UNAME_MACHINE-unknown-twizzler - ;; - *:Redox:*:*) - GUESS=$UNAME_MACHINE-unknown-redox - ;; - mips:OSF1:*.*) - GUESS=mips-dec-osf1 - ;; - alpha:OSF1:*:*) - # Reset EXIT trap before exiting to avoid spurious non-zero exit code. - trap '' 0 - case $UNAME_RELEASE in - *4.0) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - ;; - *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` - ;; - esac - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` - case $ALPHA_CPU_TYPE in - "EV4 (21064)") - UNAME_MACHINE=alpha ;; - "EV4.5 (21064)") - UNAME_MACHINE=alpha ;; - "LCA4 (21066/21068)") - UNAME_MACHINE=alpha ;; - "EV5 (21164)") - UNAME_MACHINE=alphaev5 ;; - "EV5.6 (21164A)") - UNAME_MACHINE=alphaev56 ;; - "EV5.6 (21164PC)") - UNAME_MACHINE=alphapca56 ;; - "EV5.7 (21164PC)") - UNAME_MACHINE=alphapca57 ;; - "EV6 (21264)") - UNAME_MACHINE=alphaev6 ;; - "EV6.7 (21264A)") - UNAME_MACHINE=alphaev67 ;; - "EV6.8CB (21264C)") - UNAME_MACHINE=alphaev68 ;; - "EV6.8AL (21264B)") - UNAME_MACHINE=alphaev68 ;; - "EV6.8CX (21264D)") - UNAME_MACHINE=alphaev68 ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE=alphaev69 ;; - "EV7 (21364)") - UNAME_MACHINE=alphaev7 ;; - "EV7.9 (21364A)") - UNAME_MACHINE=alphaev79 ;; - esac - # A Pn.n version is a patched version. - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` - GUESS=$UNAME_MACHINE-dec-osf$OSF_REL - ;; - Amiga*:UNIX_System_V:4.0:*) - GUESS=m68k-unknown-sysv4 - ;; - *:[Aa]miga[Oo][Ss]:*:*) - GUESS=$UNAME_MACHINE-unknown-amigaos - ;; - *:[Mm]orph[Oo][Ss]:*:*) - GUESS=$UNAME_MACHINE-unknown-morphos - ;; - *:OS/390:*:*) - GUESS=i370-ibm-openedition - ;; - *:z/VM:*:*) - GUESS=s390-ibm-zvmoe - ;; - *:OS400:*:*) - GUESS=powerpc-ibm-os400 - ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - GUESS=arm-acorn-riscix$UNAME_RELEASE - ;; - arm*:riscos:*:*|arm*:RISCOS:*:*) - GUESS=arm-unknown-riscos - ;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - GUESS=hppa1.1-hitachi-hiuxmpp - ;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - case `(/bin/universe) 2>/dev/null` in - att) GUESS=pyramid-pyramid-sysv3 ;; - *) GUESS=pyramid-pyramid-bsd ;; - esac - ;; - NILE*:*:*:dcosx) - GUESS=pyramid-pyramid-svr4 - ;; - DRS?6000:unix:4.0:6*) - GUESS=sparc-icl-nx6 - ;; - DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) - case `/usr/bin/uname -p` in - sparc) GUESS=sparc-icl-nx7 ;; - esac - ;; - s390x:SunOS:*:*) - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` - GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL - ;; - sun4H:SunOS:5.*:*) - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` - GUESS=sparc-hal-solaris2$SUN_REL - ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` - GUESS=sparc-sun-solaris2$SUN_REL - ;; - i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) - GUESS=i386-pc-auroraux$UNAME_RELEASE - ;; - i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) - set_cc_for_build - SUN_ARCH=i386 - # If there is a compiler, see if it is configured for 64-bit objects. - # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. - # This test works for both compilers. - if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - SUN_ARCH=x86_64 - fi - fi - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` - GUESS=$SUN_ARCH-pc-solaris2$SUN_REL - ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` - GUESS=sparc-sun-solaris3$SUN_REL - ;; - sun4*:SunOS:*:*) - case `/usr/bin/arch -k` in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'` - GUESS=sparc-sun-sunos$SUN_REL - ;; - sun3*:SunOS:*:*) - GUESS=m68k-sun-sunos$UNAME_RELEASE - ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 - case `/bin/arch` in - sun3) - GUESS=m68k-sun-sunos$UNAME_RELEASE - ;; - sun4) - GUESS=sparc-sun-sunos$UNAME_RELEASE - ;; - esac - ;; - aushp:SunOS:*:*) - GUESS=sparc-auspex-sunos$UNAME_RELEASE - ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - GUESS=m68k-atari-mint$UNAME_RELEASE - ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - GUESS=m68k-atari-mint$UNAME_RELEASE - ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - GUESS=m68k-atari-mint$UNAME_RELEASE - ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - GUESS=m68k-milan-mint$UNAME_RELEASE - ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - GUESS=m68k-hades-mint$UNAME_RELEASE - ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - GUESS=m68k-unknown-mint$UNAME_RELEASE - ;; - m68k:machten:*:*) - GUESS=m68k-apple-machten$UNAME_RELEASE - ;; - powerpc:machten:*:*) - GUESS=powerpc-apple-machten$UNAME_RELEASE - ;; - RISC*:Mach:*:*) - GUESS=mips-dec-mach_bsd4.3 - ;; - RISC*:ULTRIX:*:*) - GUESS=mips-dec-ultrix$UNAME_RELEASE - ;; - VAX*:ULTRIX*:*:*) - GUESS=vax-dec-ultrix$UNAME_RELEASE - ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - GUESS=clipper-intergraph-clix$UNAME_RELEASE - ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" -#ifdef __cplusplus -#include /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o "$dummy" "$dummy.c" && - dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && - SYSTEM_NAME=`"$dummy" "$dummyarg"` && - { echo "$SYSTEM_NAME"; exit; } - GUESS=mips-mips-riscos$UNAME_RELEASE - ;; - Motorola:PowerMAX_OS:*:*) - GUESS=powerpc-motorola-powermax - ;; - Motorola:*:4.3:PL8-*) - GUESS=powerpc-harris-powermax - ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - GUESS=powerpc-harris-powermax - ;; - Night_Hawk:Power_UNIX:*:*) - GUESS=powerpc-harris-powerunix - ;; - m88k:CX/UX:7*:*) - GUESS=m88k-harris-cxux7 - ;; - m88k:*:4*:R4*) - GUESS=m88k-motorola-sysv4 - ;; - m88k:*:3*:R3*) - GUESS=m88k-motorola-sysv3 - ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 - then - if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ - test "$TARGET_BINARY_INTERFACE"x = x - then - GUESS=m88k-dg-dgux$UNAME_RELEASE - else - GUESS=m88k-dg-dguxbcs$UNAME_RELEASE - fi - else - GUESS=i586-dg-dgux$UNAME_RELEASE - fi - ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - GUESS=m88k-dolphin-sysv3 - ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - GUESS=m88k-motorola-sysv3 - ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - GUESS=m88k-tektronix-sysv3 - ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - GUESS=m68k-tektronix-bsd - ;; - *:IRIX*:*:*) - IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'` - GUESS=mips-sgi-irix$IRIX_REL - ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - GUESS=romp-ibm-aix # uname -m gives an 8 hex-code CPU id - ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i*86:AIX:*:*) - GUESS=i386-ibm-aix - ;; - ia64:AIX:*:*) - if test -x /usr/bin/oslevel ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=$UNAME_VERSION.$UNAME_RELEASE - fi - GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV - ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` - then - GUESS=$SYSTEM_NAME - else - GUESS=rs6000-ibm-aix3.2.5 - fi - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - GUESS=rs6000-ibm-aix3.2.4 - else - GUESS=rs6000-ibm-aix3.2 - fi - ;; - *:AIX:*:[4567]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if test -x /usr/bin/lslpp ; then - IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \ - awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` - else - IBM_REV=$UNAME_VERSION.$UNAME_RELEASE - fi - GUESS=$IBM_ARCH-ibm-aix$IBM_REV - ;; - *:AIX:*:*) - GUESS=rs6000-ibm-aix - ;; - ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) - GUESS=romp-ibm-bsd4.4 - ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - GUESS=romp-ibm-bsd$UNAME_RELEASE # 4.3 with uname added to - ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - GUESS=rs6000-bull-bosx - ;; - DPX/2?00:B.O.S.:*:*) - GUESS=m68k-bull-sysv3 - ;; - 9000/[34]??:4.3bsd:1.*:*) - GUESS=m68k-hp-bsd - ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - GUESS=m68k-hp-bsd4.4 - ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` - case $UNAME_MACHINE in - 9000/31?) HP_ARCH=m68000 ;; - 9000/[34]??) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if test -x /usr/bin/getconf; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case $sc_cpu_version in - 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 - 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case $sc_kernel_bits in - 32) HP_ARCH=hppa2.0n ;; - 64) HP_ARCH=hppa2.0w ;; - '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 - esac ;; - esac - fi - if test "$HP_ARCH" = ""; then - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" - - #define _HPUX_SOURCE - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if test "$HP_ARCH" = hppa2.0w - then - set_cc_for_build - - # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating - # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler - # generating 64-bit code. GNU and HP use different nomenclature: - # - # $ CC_FOR_BUILD=cc ./config.guess - # => hppa2.0w-hp-hpux11.23 - # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess - # => hppa64-hp-hpux11.23 - - if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | - grep -q __LP64__ - then - HP_ARCH=hppa2.0w - else - HP_ARCH=hppa64 - fi - fi - GUESS=$HP_ARCH-hp-hpux$HPUX_REV - ;; - ia64:HP-UX:*:*) - HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` - GUESS=ia64-hp-hpux$HPUX_REV - ;; - 3050*:HI-UX:*:*) - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && - { echo "$SYSTEM_NAME"; exit; } - GUESS=unknown-hitachi-hiuxwe2 - ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) - GUESS=hppa1.1-hp-bsd - ;; - 9000/8??:4.3bsd:*:*) - GUESS=hppa1.0-hp-bsd - ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - GUESS=hppa1.0-hp-mpeix - ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) - GUESS=hppa1.1-hp-osf - ;; - hp8??:OSF1:*:*) - GUESS=hppa1.0-hp-osf - ;; - i*86:OSF1:*:*) - if test -x /usr/sbin/sysversion ; then - GUESS=$UNAME_MACHINE-unknown-osf1mk - else - GUESS=$UNAME_MACHINE-unknown-osf1 - fi - ;; - parisc*:Lites*:*:*) - GUESS=hppa1.1-hp-lites - ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - GUESS=c1-convex-bsd - ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - GUESS=c34-convex-bsd - ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - GUESS=c38-convex-bsd - ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - GUESS=c4-convex-bsd - ;; - CRAY*Y-MP:*:*:*) - CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` - GUESS=ymp-cray-unicos$CRAY_REL - ;; - CRAY*[A-Z]90:*:*:*) - echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*TS:*:*:*) - CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` - GUESS=t90-cray-unicos$CRAY_REL - ;; - CRAY*T3E:*:*:*) - CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` - GUESS=alphaev5-cray-unicosmk$CRAY_REL - ;; - CRAY*SV1:*:*:*) - CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` - GUESS=sv1-cray-unicos$CRAY_REL - ;; - *:UNICOS/mp:*:*) - CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` - GUESS=craynv-cray-unicosmp$CRAY_REL - ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` - FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` - GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} - ;; - 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` - FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` - GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} - ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE - ;; - sparc*:BSD/OS:*:*) - GUESS=sparc-unknown-bsdi$UNAME_RELEASE - ;; - *:BSD/OS:*:*) - GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE - ;; - arm:FreeBSD:*:*) - UNAME_PROCESSOR=`uname -p` - set_cc_for_build - if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_PCS_VFP - then - FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` - GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi - else - FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` - GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf - fi - ;; - *:FreeBSD:*:*) - UNAME_PROCESSOR=`/usr/bin/uname -p` - case $UNAME_PROCESSOR in - amd64) - UNAME_PROCESSOR=x86_64 ;; - i386) - UNAME_PROCESSOR=i586 ;; - esac - FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` - GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL - ;; - i*:CYGWIN*:*) - GUESS=$UNAME_MACHINE-pc-cygwin - ;; - *:MINGW64*:*) - GUESS=$UNAME_MACHINE-pc-mingw64 - ;; - *:MINGW*:*) - GUESS=$UNAME_MACHINE-pc-mingw32 - ;; - *:MSYS*:*) - GUESS=$UNAME_MACHINE-pc-msys - ;; - i*:PW*:*) - GUESS=$UNAME_MACHINE-pc-pw32 - ;; - *:SerenityOS:*:*) - GUESS=$UNAME_MACHINE-pc-serenity - ;; - *:Interix*:*) - case $UNAME_MACHINE in - x86) - GUESS=i586-pc-interix$UNAME_RELEASE - ;; - authenticamd | genuineintel | EM64T) - GUESS=x86_64-unknown-interix$UNAME_RELEASE - ;; - IA64) - GUESS=ia64-unknown-interix$UNAME_RELEASE - ;; - esac ;; - i*:UWIN*:*) - GUESS=$UNAME_MACHINE-pc-uwin - ;; - amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) - GUESS=x86_64-pc-cygwin - ;; - prep*:SunOS:5.*:*) - SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` - GUESS=powerpcle-unknown-solaris2$SUN_REL - ;; - *:GNU:*:*) - # the GNU system - GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'` - GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'` - GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL - ;; - *:GNU/*:*:*) - # other systems with GNU libc and userland - GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"` - GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` - GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC - ;; - *:Minix:*:*) - GUESS=$UNAME_MACHINE-unknown-minix - ;; - aarch64:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - aarch64_be:Linux:*:*) - UNAME_MACHINE=aarch64_be - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC=gnulibc1 ; fi - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - arm*:Linux:*:*) - set_cc_for_build - if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_EABI__ - then - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - else - if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_PCS_VFP - then - GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi - else - GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf - fi - fi - ;; - avr32*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - cris:Linux:*:*) - GUESS=$UNAME_MACHINE-axis-linux-$LIBC - ;; - crisv32:Linux:*:*) - GUESS=$UNAME_MACHINE-axis-linux-$LIBC - ;; - e2k:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - frv:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - hexagon:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - i*86:Linux:*:*) - GUESS=$UNAME_MACHINE-pc-linux-$LIBC - ;; - ia64:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - k1om:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - m32r*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - m68*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - mips:Linux:*:* | mips64:Linux:*:*) - set_cc_for_build - IS_GLIBC=0 - test x"${LIBC}" = xgnu && IS_GLIBC=1 - sed 's/^ //' << EOF > "$dummy.c" - #undef CPU - #undef mips - #undef mipsel - #undef mips64 - #undef mips64el - #if ${IS_GLIBC} && defined(_ABI64) - LIBCABI=gnuabi64 - #else - #if ${IS_GLIBC} && defined(_ABIN32) - LIBCABI=gnuabin32 - #else - LIBCABI=${LIBC} - #endif - #endif - - #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 - CPU=mipsisa64r6 - #else - #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 - CPU=mipsisa32r6 - #else - #if defined(__mips64) - CPU=mips64 - #else - CPU=mips - #endif - #endif - #endif - - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - MIPS_ENDIAN=el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - MIPS_ENDIAN= - #else - MIPS_ENDIAN= - #endif - #endif -EOF - cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'` - eval "$cc_set_vars" - test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } - ;; - mips64el:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - openrisc*:Linux:*:*) - GUESS=or1k-unknown-linux-$LIBC - ;; - or32:Linux:*:* | or1k*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - padre:Linux:*:*) - GUESS=sparc-unknown-linux-$LIBC - ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - GUESS=hppa64-unknown-linux-$LIBC - ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;; - PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;; - *) GUESS=hppa-unknown-linux-$LIBC ;; - esac - ;; - ppc64:Linux:*:*) - GUESS=powerpc64-unknown-linux-$LIBC - ;; - ppc:Linux:*:*) - GUESS=powerpc-unknown-linux-$LIBC - ;; - ppc64le:Linux:*:*) - GUESS=powerpc64le-unknown-linux-$LIBC - ;; - ppcle:Linux:*:*) - GUESS=powerpcle-unknown-linux-$LIBC - ;; - riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - s390:Linux:*:* | s390x:Linux:*:*) - GUESS=$UNAME_MACHINE-ibm-linux-$LIBC - ;; - sh64*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - sh*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - tile*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - vax:Linux:*:*) - GUESS=$UNAME_MACHINE-dec-linux-$LIBC - ;; - x86_64:Linux:*:*) - set_cc_for_build - LIBCABI=$LIBC - if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_X32 >/dev/null - then - LIBCABI=${LIBC}x32 - fi - fi - GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI - ;; - xtensa*:Linux:*:*) - GUESS=$UNAME_MACHINE-unknown-linux-$LIBC - ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - GUESS=i386-sequent-sysv4 - ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION - ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - GUESS=$UNAME_MACHINE-pc-os2-emx - ;; - i*86:XTS-300:*:STOP) - GUESS=$UNAME_MACHINE-unknown-stop - ;; - i*86:atheos:*:*) - GUESS=$UNAME_MACHINE-unknown-atheos - ;; - i*86:syllable:*:*) - GUESS=$UNAME_MACHINE-pc-syllable - ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) - GUESS=i386-unknown-lynxos$UNAME_RELEASE - ;; - i*86:*DOS:*:*) - GUESS=$UNAME_MACHINE-pc-msdosdjgpp - ;; - i*86:*:4.*:*) - UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL - else - GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL - fi - ;; - i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. - case `/bin/uname -X | grep "^Machine"` in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL - else - GUESS=$UNAME_MACHINE-pc-sysv32 - fi - ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i586. - # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configure will decide that - # this is a cross-build. - GUESS=i586-pc-msdosdjgpp - ;; - Intel:Mach:3*:*) - GUESS=i386-pc-mach3 - ;; - paragon:*:*:*) - GUESS=i860-intel-osf1 - ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - GUESS=i860-stardent-sysv$UNAME_RELEASE # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - GUESS=i860-unknown-sysv$UNAME_RELEASE # Unknown i860-SVR4 - fi - ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - GUESS=m68010-convergent-sysv - ;; - mc68k:UNIX:SYSTEM5:3.51m) - GUESS=m68k-convergent-sysv - ;; - M680?0:D-NIX:5.3:*) - GUESS=m68k-diab-dnix - ;; - M68*:*:R3V[5678]*:*) - test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; - NCR*:*:4.2:* | MPRAS*:*:4.2:*) - OS_REL='.3' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } - /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ - && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - GUESS=m68k-unknown-lynxos$UNAME_RELEASE - ;; - mc68030:UNIX_System_V:4.*:*) - GUESS=m68k-atari-sysv4 - ;; - TSUNAMI:LynxOS:2.*:*) - GUESS=sparc-unknown-lynxos$UNAME_RELEASE - ;; - rs6000:LynxOS:2.*:*) - GUESS=rs6000-unknown-lynxos$UNAME_RELEASE - ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) - GUESS=powerpc-unknown-lynxos$UNAME_RELEASE - ;; - SM[BE]S:UNIX_SV:*:*) - GUESS=mips-dde-sysv$UNAME_RELEASE - ;; - RM*:ReliantUNIX-*:*:*) - GUESS=mips-sni-sysv4 - ;; - RM*:SINIX-*:*:*) - GUESS=mips-sni-sysv4 - ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - GUESS=$UNAME_MACHINE-sni-sysv4 - else - GUESS=ns32k-sni-sysv - fi - ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - GUESS=i586-unisys-sysv4 - ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - GUESS=hppa1.1-stratus-sysv4 - ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - GUESS=i860-stratus-sysv4 - ;; - i*86:VOS:*:*) - # From Paul.Green@stratus.com. - GUESS=$UNAME_MACHINE-stratus-vos - ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - GUESS=hppa1.1-stratus-vos - ;; - mc68*:A/UX:*:*) - GUESS=m68k-apple-aux$UNAME_RELEASE - ;; - news*:NEWS-OS:6*:*) - GUESS=mips-sony-newsos6 - ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if test -d /usr/nec; then - GUESS=mips-nec-sysv$UNAME_RELEASE - else - GUESS=mips-unknown-sysv$UNAME_RELEASE - fi - ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - GUESS=powerpc-be-beos - ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - GUESS=powerpc-apple-beos - ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - GUESS=i586-pc-beos - ;; - BePC:Haiku:*:*) # Haiku running on Intel PC compatible. - GUESS=i586-pc-haiku - ;; - x86_64:Haiku:*:*) - GUESS=x86_64-unknown-haiku - ;; - SX-4:SUPER-UX:*:*) - GUESS=sx4-nec-superux$UNAME_RELEASE - ;; - SX-5:SUPER-UX:*:*) - GUESS=sx5-nec-superux$UNAME_RELEASE - ;; - SX-6:SUPER-UX:*:*) - GUESS=sx6-nec-superux$UNAME_RELEASE - ;; - SX-7:SUPER-UX:*:*) - GUESS=sx7-nec-superux$UNAME_RELEASE - ;; - SX-8:SUPER-UX:*:*) - GUESS=sx8-nec-superux$UNAME_RELEASE - ;; - SX-8R:SUPER-UX:*:*) - GUESS=sx8r-nec-superux$UNAME_RELEASE - ;; - SX-ACE:SUPER-UX:*:*) - GUESS=sxace-nec-superux$UNAME_RELEASE - ;; - Power*:Rhapsody:*:*) - GUESS=powerpc-apple-rhapsody$UNAME_RELEASE - ;; - *:Rhapsody:*:*) - GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE - ;; - arm64:Darwin:*:*) - GUESS=aarch64-apple-darwin$UNAME_RELEASE - ;; - *:Darwin:*:*) - UNAME_PROCESSOR=`uname -p` - case $UNAME_PROCESSOR in - unknown) UNAME_PROCESSOR=powerpc ;; - esac - if command -v xcode-select > /dev/null 2> /dev/null && \ - ! xcode-select --print-path > /dev/null 2> /dev/null ; then - # Avoid executing cc if there is no toolchain installed as - # cc will be a stub that puts up a graphical alert - # prompting the user to install developer tools. - CC_FOR_BUILD=no_compiler_found - else - set_cc_for_build - fi - if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - case $UNAME_PROCESSOR in - i386) UNAME_PROCESSOR=x86_64 ;; - powerpc) UNAME_PROCESSOR=powerpc64 ;; - esac - fi - # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc - if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_PPC >/dev/null - then - UNAME_PROCESSOR=powerpc - fi - elif test "$UNAME_PROCESSOR" = i386 ; then - # uname -m returns i386 or x86_64 - UNAME_PROCESSOR=$UNAME_MACHINE - fi - GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE - ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = x86; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE - ;; - *:QNX:*:4*) - GUESS=i386-pc-qnx - ;; - NEO-*:NONSTOP_KERNEL:*:*) - GUESS=neo-tandem-nsk$UNAME_RELEASE - ;; - NSE-*:NONSTOP_KERNEL:*:*) - GUESS=nse-tandem-nsk$UNAME_RELEASE - ;; - NSR-*:NONSTOP_KERNEL:*:*) - GUESS=nsr-tandem-nsk$UNAME_RELEASE - ;; - NSV-*:NONSTOP_KERNEL:*:*) - GUESS=nsv-tandem-nsk$UNAME_RELEASE - ;; - NSX-*:NONSTOP_KERNEL:*:*) - GUESS=nsx-tandem-nsk$UNAME_RELEASE - ;; - *:NonStop-UX:*:*) - GUESS=mips-compaq-nonstopux - ;; - BS2000:POSIX*:*:*) - GUESS=bs2000-siemens-sysv - ;; - DS/*:UNIX_System_V:*:*) - GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE - ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - if test "${cputype-}" = 386; then - UNAME_MACHINE=i386 - elif test "x${cputype-}" != x; then - UNAME_MACHINE=$cputype - fi - GUESS=$UNAME_MACHINE-unknown-plan9 - ;; - *:TOPS-10:*:*) - GUESS=pdp10-unknown-tops10 - ;; - *:TENEX:*:*) - GUESS=pdp10-unknown-tenex - ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - GUESS=pdp10-dec-tops20 - ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - GUESS=pdp10-xkl-tops20 - ;; - *:TOPS-20:*:*) - GUESS=pdp10-unknown-tops20 - ;; - *:ITS:*:*) - GUESS=pdp10-unknown-its - ;; - SEI:*:*:SEIUX) - GUESS=mips-sei-seiux$UNAME_RELEASE - ;; - *:DragonFly:*:*) - DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` - GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL - ;; - *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` - case $UNAME_MACHINE in - A*) GUESS=alpha-dec-vms ;; - I*) GUESS=ia64-dec-vms ;; - V*) GUESS=vax-dec-vms ;; - esac ;; - *:XENIX:*:SysV) - GUESS=i386-pc-xenix - ;; - i*86:skyos:*:*) - SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'` - GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL - ;; - i*86:rdos:*:*) - GUESS=$UNAME_MACHINE-pc-rdos - ;; - i*86:Fiwix:*:*) - GUESS=$UNAME_MACHINE-pc-fiwix - ;; - *:AROS:*:*) - GUESS=$UNAME_MACHINE-unknown-aros - ;; - x86_64:VMkernel:*:*) - GUESS=$UNAME_MACHINE-unknown-esx - ;; - amd64:Isilon\ OneFS:*:*) - GUESS=x86_64-unknown-onefs - ;; - *:Unleashed:*:*) - GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE - ;; -esac - -# Do we have a guess based on uname results? -if test "x$GUESS" != x; then - echo "$GUESS" - exit -fi - -# No uname command or uname output not recognized. -set_cc_for_build -cat > "$dummy.c" < -#include -#endif -#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) -#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) -#include -#if defined(_SIZE_T_) || defined(SIGLOST) -#include -#endif -#endif -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); -#endif - -#if defined (vax) -#if !defined (ultrix) -#include -#if defined (BSD) -#if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -#else -#if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -#else - printf ("vax-dec-bsd\n"); exit (0); -#endif -#endif -#else - printf ("vax-dec-bsd\n"); exit (0); -#endif -#else -#if defined(_SIZE_T_) || defined(SIGLOST) - struct utsname un; - uname (&un); - printf ("vax-dec-ultrix%s\n", un.release); exit (0); -#else - printf ("vax-dec-ultrix\n"); exit (0); -#endif -#endif -#endif -#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) -#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) -#if defined(_SIZE_T_) || defined(SIGLOST) - struct utsname *un; - uname (&un); - printf ("mips-dec-ultrix%s\n", un.release); exit (0); -#else - printf ("mips-dec-ultrix\n"); exit (0); -#endif -#endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. -test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } - -echo "$0: unable to guess system type" >&2 - -case $UNAME_MACHINE:$UNAME_SYSTEM in - mips:Linux | mips64:Linux) - # If we got here on MIPS GNU/Linux, output extra information. - cat >&2 <&2 <&2 </dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` - -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` - -UNAME_MACHINE = "$UNAME_MACHINE" -UNAME_RELEASE = "$UNAME_RELEASE" -UNAME_SYSTEM = "$UNAME_SYSTEM" -UNAME_VERSION = "$UNAME_VERSION" -EOF -fi - -exit 1 - -# Local variables: -# eval: (add-hook 'before-save-hook 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/paddlespeech/audio/third_party/patches/config.sub b/paddlespeech/audio/third_party/patches/config.sub deleted file mode 100644 index dba16e84c77c7d25871d80c24deff717faf4c094..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/patches/config.sub +++ /dev/null @@ -1,1890 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright 1992-2022 Free Software Foundation, Inc. - -# shellcheck disable=SC2006,SC2268 # see below for rationale - -timestamp='2022-01-03' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that -# program. This Exception is an additional permission under section 7 -# of the GNU General Public License, version 3 ("GPLv3"). - - -# Please send patches to . -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# You can get the latest version of this script from: -# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -# The "shellcheck disable" line above the timestamp inhibits complaints -# about features and limitations of the classic Bourne shell that were -# superseded or lifted in POSIX. However, this script identifies a wide -# variety of pre-POSIX systems that do not have POSIX shells at all, and -# even some reasonably current systems (Solaris 10 as case-in-point) still -# have a pre-POSIX /bin/sh. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS - -Canonicalize a configuration name. - -Options: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.sub ($timestamp) - -Copyright 1992-2022 Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo "$1" - exit ;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Split fields of configuration type -# shellcheck disable=SC2162 -saved_IFS=$IFS -IFS="-" read field1 field2 field3 field4 <&2 - exit 1 - ;; - *-*-*-*) - basic_machine=$field1-$field2 - basic_os=$field3-$field4 - ;; - *-*-*) - # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two - # parts - maybe_os=$field2-$field3 - case $maybe_os in - nto-qnx* | linux-* | uclinux-uclibc* \ - | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ - | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ - | storm-chaos* | os2-emx* | rtmk-nova*) - basic_machine=$field1 - basic_os=$maybe_os - ;; - android-linux) - basic_machine=$field1-unknown - basic_os=linux-android - ;; - *) - basic_machine=$field1-$field2 - basic_os=$field3 - ;; - esac - ;; - *-*) - # A lone config we happen to match not fitting any pattern - case $field1-$field2 in - decstation-3100) - basic_machine=mips-dec - basic_os= - ;; - *-*) - # Second component is usually, but not always the OS - case $field2 in - # Prevent following clause from handling this valid os - sun*os*) - basic_machine=$field1 - basic_os=$field2 - ;; - zephyr*) - basic_machine=$field1-unknown - basic_os=$field2 - ;; - # Manufacturers - dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ - | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ - | unicom* | ibm* | next | hp | isi* | apollo | altos* \ - | convergent* | ncr* | news | 32* | 3600* | 3100* \ - | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ - | ultra | tti* | harris | dolphin | highlevel | gould \ - | cbm | ns | masscomp | apple | axis | knuth | cray \ - | microblaze* | sim | cisco \ - | oki | wec | wrs | winbond) - basic_machine=$field1-$field2 - basic_os= - ;; - *) - basic_machine=$field1 - basic_os=$field2 - ;; - esac - ;; - esac - ;; - *) - # Convert single-component short-hands not valid as part of - # multi-component configurations. - case $field1 in - 386bsd) - basic_machine=i386-pc - basic_os=bsd - ;; - a29khif) - basic_machine=a29k-amd - basic_os=udi - ;; - adobe68k) - basic_machine=m68010-adobe - basic_os=scout - ;; - alliant) - basic_machine=fx80-alliant - basic_os= - ;; - altos | altos3068) - basic_machine=m68k-altos - basic_os= - ;; - am29k) - basic_machine=a29k-none - basic_os=bsd - ;; - amdahl) - basic_machine=580-amdahl - basic_os=sysv - ;; - amiga) - basic_machine=m68k-unknown - basic_os= - ;; - amigaos | amigados) - basic_machine=m68k-unknown - basic_os=amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - basic_os=sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - basic_os=sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - basic_os=bsd - ;; - aros) - basic_machine=i386-pc - basic_os=aros - ;; - aux) - basic_machine=m68k-apple - basic_os=aux - ;; - balance) - basic_machine=ns32k-sequent - basic_os=dynix - ;; - blackfin) - basic_machine=bfin-unknown - basic_os=linux - ;; - cegcc) - basic_machine=arm-unknown - basic_os=cegcc - ;; - convex-c1) - basic_machine=c1-convex - basic_os=bsd - ;; - convex-c2) - basic_machine=c2-convex - basic_os=bsd - ;; - convex-c32) - basic_machine=c32-convex - basic_os=bsd - ;; - convex-c34) - basic_machine=c34-convex - basic_os=bsd - ;; - convex-c38) - basic_machine=c38-convex - basic_os=bsd - ;; - cray) - basic_machine=j90-cray - basic_os=unicos - ;; - crds | unos) - basic_machine=m68k-crds - basic_os= - ;; - da30) - basic_machine=m68k-da30 - basic_os= - ;; - decstation | pmax | pmin | dec3100 | decstatn) - basic_machine=mips-dec - basic_os= - ;; - delta88) - basic_machine=m88k-motorola - basic_os=sysv3 - ;; - dicos) - basic_machine=i686-pc - basic_os=dicos - ;; - djgpp) - basic_machine=i586-pc - basic_os=msdosdjgpp - ;; - ebmon29k) - basic_machine=a29k-amd - basic_os=ebmon - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - basic_os=ose - ;; - gmicro) - basic_machine=tron-gmicro - basic_os=sysv - ;; - go32) - basic_machine=i386-pc - basic_os=go32 - ;; - h8300hms) - basic_machine=h8300-hitachi - basic_os=hms - ;; - h8300xray) - basic_machine=h8300-hitachi - basic_os=xray - ;; - h8500hms) - basic_machine=h8500-hitachi - basic_os=hms - ;; - harris) - basic_machine=m88k-harris - basic_os=sysv3 - ;; - hp300 | hp300hpux) - basic_machine=m68k-hp - basic_os=hpux - ;; - hp300bsd) - basic_machine=m68k-hp - basic_os=bsd - ;; - hppaosf) - basic_machine=hppa1.1-hp - basic_os=osf - ;; - hppro) - basic_machine=hppa1.1-hp - basic_os=proelf - ;; - i386mach) - basic_machine=i386-mach - basic_os=mach - ;; - isi68 | isi) - basic_machine=m68k-isi - basic_os=sysv - ;; - m68knommu) - basic_machine=m68k-unknown - basic_os=linux - ;; - magnum | m3230) - basic_machine=mips-mips - basic_os=sysv - ;; - merlin) - basic_machine=ns32k-utek - basic_os=sysv - ;; - mingw64) - basic_machine=x86_64-pc - basic_os=mingw64 - ;; - mingw32) - basic_machine=i686-pc - basic_os=mingw32 - ;; - mingw32ce) - basic_machine=arm-unknown - basic_os=mingw32ce - ;; - monitor) - basic_machine=m68k-rom68k - basic_os=coff - ;; - morphos) - basic_machine=powerpc-unknown - basic_os=morphos - ;; - moxiebox) - basic_machine=moxie-unknown - basic_os=moxiebox - ;; - msdos) - basic_machine=i386-pc - basic_os=msdos - ;; - msys) - basic_machine=i686-pc - basic_os=msys - ;; - mvs) - basic_machine=i370-ibm - basic_os=mvs - ;; - nacl) - basic_machine=le32-unknown - basic_os=nacl - ;; - ncr3000) - basic_machine=i486-ncr - basic_os=sysv4 - ;; - netbsd386) - basic_machine=i386-pc - basic_os=netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - basic_os=linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - basic_os=newsos - ;; - news1000) - basic_machine=m68030-sony - basic_os=newsos - ;; - necv70) - basic_machine=v70-nec - basic_os=sysv - ;; - nh3000) - basic_machine=m68k-harris - basic_os=cxux - ;; - nh[45]000) - basic_machine=m88k-harris - basic_os=cxux - ;; - nindy960) - basic_machine=i960-intel - basic_os=nindy - ;; - mon960) - basic_machine=i960-intel - basic_os=mon960 - ;; - nonstopux) - basic_machine=mips-compaq - basic_os=nonstopux - ;; - os400) - basic_machine=powerpc-ibm - basic_os=os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - basic_os=ose - ;; - os68k) - basic_machine=m68k-none - basic_os=os68k - ;; - paragon) - basic_machine=i860-intel - basic_os=osf - ;; - parisc) - basic_machine=hppa-unknown - basic_os=linux - ;; - psp) - basic_machine=mipsallegrexel-sony - basic_os=psp - ;; - pw32) - basic_machine=i586-unknown - basic_os=pw32 - ;; - rdos | rdos64) - basic_machine=x86_64-pc - basic_os=rdos - ;; - rdos32) - basic_machine=i386-pc - basic_os=rdos - ;; - rom68k) - basic_machine=m68k-rom68k - basic_os=coff - ;; - sa29200) - basic_machine=a29k-amd - basic_os=udi - ;; - sei) - basic_machine=mips-sei - basic_os=seiux - ;; - sequent) - basic_machine=i386-sequent - basic_os= - ;; - sps7) - basic_machine=m68k-bull - basic_os=sysv2 - ;; - st2000) - basic_machine=m68k-tandem - basic_os= - ;; - stratus) - basic_machine=i860-stratus - basic_os=sysv4 - ;; - sun2) - basic_machine=m68000-sun - basic_os= - ;; - sun2os3) - basic_machine=m68000-sun - basic_os=sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - basic_os=sunos4 - ;; - sun3) - basic_machine=m68k-sun - basic_os= - ;; - sun3os3) - basic_machine=m68k-sun - basic_os=sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - basic_os=sunos4 - ;; - sun4) - basic_machine=sparc-sun - basic_os= - ;; - sun4os3) - basic_machine=sparc-sun - basic_os=sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - basic_os=sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - basic_os=solaris2 - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - basic_os= - ;; - sv1) - basic_machine=sv1-cray - basic_os=unicos - ;; - symmetry) - basic_machine=i386-sequent - basic_os=dynix - ;; - t3e) - basic_machine=alphaev5-cray - basic_os=unicos - ;; - t90) - basic_machine=t90-cray - basic_os=unicos - ;; - toad1) - basic_machine=pdp10-xkl - basic_os=tops20 - ;; - tpf) - basic_machine=s390x-ibm - basic_os=tpf - ;; - udi29k) - basic_machine=a29k-amd - basic_os=udi - ;; - ultra3) - basic_machine=a29k-nyu - basic_os=sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - basic_os=none - ;; - vaxv) - basic_machine=vax-dec - basic_os=sysv - ;; - vms) - basic_machine=vax-dec - basic_os=vms - ;; - vsta) - basic_machine=i386-pc - basic_os=vsta - ;; - vxworks960) - basic_machine=i960-wrs - basic_os=vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - basic_os=vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - basic_os=vxworks - ;; - xbox) - basic_machine=i686-pc - basic_os=mingw32 - ;; - ymp) - basic_machine=ymp-cray - basic_os=unicos - ;; - *) - basic_machine=$1 - basic_os= - ;; - esac - ;; -esac - -# Decode 1-component or ad-hoc basic machines -case $basic_machine in - # Here we handle the default manufacturer of certain CPU types. It is in - # some cases the only manufacturer, in others, it is the most popular. - w89k) - cpu=hppa1.1 - vendor=winbond - ;; - op50n) - cpu=hppa1.1 - vendor=oki - ;; - op60c) - cpu=hppa1.1 - vendor=oki - ;; - ibm*) - cpu=i370 - vendor=ibm - ;; - orion105) - cpu=clipper - vendor=highlevel - ;; - mac | mpw | mac-mpw) - cpu=m68k - vendor=apple - ;; - pmac | pmac-mpw) - cpu=powerpc - vendor=apple - ;; - - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - cpu=m68000 - vendor=att - ;; - 3b*) - cpu=we32k - vendor=att - ;; - bluegene*) - cpu=powerpc - vendor=ibm - basic_os=cnk - ;; - decsystem10* | dec10*) - cpu=pdp10 - vendor=dec - basic_os=tops10 - ;; - decsystem20* | dec20*) - cpu=pdp10 - vendor=dec - basic_os=tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - cpu=m68k - vendor=motorola - ;; - dpx2*) - cpu=m68k - vendor=bull - basic_os=sysv3 - ;; - encore | umax | mmax) - cpu=ns32k - vendor=encore - ;; - elxsi) - cpu=elxsi - vendor=elxsi - basic_os=${basic_os:-bsd} - ;; - fx2800) - cpu=i860 - vendor=alliant - ;; - genix) - cpu=ns32k - vendor=ns - ;; - h3050r* | hiux*) - cpu=hppa1.1 - vendor=hitachi - basic_os=hiuxwe2 - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - cpu=hppa1.0 - vendor=hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - cpu=m68000 - vendor=hp - ;; - hp9k3[2-9][0-9]) - cpu=m68k - vendor=hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - cpu=hppa1.0 - vendor=hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - cpu=hppa1.1 - vendor=hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - cpu=hppa1.1 - vendor=hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - cpu=hppa1.1 - vendor=hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - cpu=hppa1.1 - vendor=hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - cpu=hppa1.0 - vendor=hp - ;; - i*86v32) - cpu=`echo "$1" | sed -e 's/86.*/86/'` - vendor=pc - basic_os=sysv32 - ;; - i*86v4*) - cpu=`echo "$1" | sed -e 's/86.*/86/'` - vendor=pc - basic_os=sysv4 - ;; - i*86v) - cpu=`echo "$1" | sed -e 's/86.*/86/'` - vendor=pc - basic_os=sysv - ;; - i*86sol2) - cpu=`echo "$1" | sed -e 's/86.*/86/'` - vendor=pc - basic_os=solaris2 - ;; - j90 | j90-cray) - cpu=j90 - vendor=cray - basic_os=${basic_os:-unicos} - ;; - iris | iris4d) - cpu=mips - vendor=sgi - case $basic_os in - irix*) - ;; - *) - basic_os=irix4 - ;; - esac - ;; - miniframe) - cpu=m68000 - vendor=convergent - ;; - *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) - cpu=m68k - vendor=atari - basic_os=mint - ;; - news-3600 | risc-news) - cpu=mips - vendor=sony - basic_os=newsos - ;; - next | m*-next) - cpu=m68k - vendor=next - case $basic_os in - openstep*) - ;; - nextstep*) - ;; - ns2*) - basic_os=nextstep2 - ;; - *) - basic_os=nextstep3 - ;; - esac - ;; - np1) - cpu=np1 - vendor=gould - ;; - op50n-* | op60c-*) - cpu=hppa1.1 - vendor=oki - basic_os=proelf - ;; - pa-hitachi) - cpu=hppa1.1 - vendor=hitachi - basic_os=hiuxwe2 - ;; - pbd) - cpu=sparc - vendor=tti - ;; - pbb) - cpu=m68k - vendor=tti - ;; - pc532) - cpu=ns32k - vendor=pc532 - ;; - pn) - cpu=pn - vendor=gould - ;; - power) - cpu=power - vendor=ibm - ;; - ps2) - cpu=i386 - vendor=ibm - ;; - rm[46]00) - cpu=mips - vendor=siemens - ;; - rtpc | rtpc-*) - cpu=romp - vendor=ibm - ;; - sde) - cpu=mipsisa32 - vendor=sde - basic_os=${basic_os:-elf} - ;; - simso-wrs) - cpu=sparclite - vendor=wrs - basic_os=vxworks - ;; - tower | tower-32) - cpu=m68k - vendor=ncr - ;; - vpp*|vx|vx-*) - cpu=f301 - vendor=fujitsu - ;; - w65) - cpu=w65 - vendor=wdc - ;; - w89k-*) - cpu=hppa1.1 - vendor=winbond - basic_os=proelf - ;; - none) - cpu=none - vendor=none - ;; - leon|leon[3-9]) - cpu=sparc - vendor=$basic_machine - ;; - leon-*|leon[3-9]-*) - cpu=sparc - vendor=`echo "$basic_machine" | sed 's/-.*//'` - ;; - - *-*) - # shellcheck disable=SC2162 - saved_IFS=$IFS - IFS="-" read cpu vendor <&2 - exit 1 - ;; - esac - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $vendor in - digital*) - vendor=dec - ;; - commodore*) - vendor=cbm - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if test x$basic_os != x -then - -# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just -# set os. -case $basic_os in - gnu/linux*) - kernel=linux - os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'` - ;; - os2-emx) - kernel=os2 - os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'` - ;; - nto-qnx*) - kernel=nto - os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'` - ;; - *-*) - # shellcheck disable=SC2162 - saved_IFS=$IFS - IFS="-" read kernel os <&2 - exit 1 - ;; -esac - -# As a final step for OS-related things, validate the OS-kernel combination -# (given a valid OS), if there is a kernel. -case $kernel-$os in - linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ - | linux-musl* | linux-relibc* | linux-uclibc* ) - ;; - uclinux-uclibc* ) - ;; - -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) - # These are just libc implementations, not actual OSes, and thus - # require a kernel. - echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 - exit 1 - ;; - kfreebsd*-gnu* | kopensolaris*-gnu*) - ;; - vxworks-simlinux | vxworks-simwindows | vxworks-spe) - ;; - nto-qnx*) - ;; - os2-emx) - ;; - *-eabi* | *-gnueabi*) - ;; - -*) - # Blank kernel with real OS is always fine. - ;; - *-*) - echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 - exit 1 - ;; -esac - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -case $vendor in - unknown) - case $cpu-$os in - *-riscix*) - vendor=acorn - ;; - *-sunos*) - vendor=sun - ;; - *-cnk* | *-aix*) - vendor=ibm - ;; - *-beos*) - vendor=be - ;; - *-hpux*) - vendor=hp - ;; - *-mpeix*) - vendor=hp - ;; - *-hiux*) - vendor=hitachi - ;; - *-unos*) - vendor=crds - ;; - *-dgux*) - vendor=dg - ;; - *-luna*) - vendor=omron - ;; - *-genix*) - vendor=ns - ;; - *-clix*) - vendor=intergraph - ;; - *-mvs* | *-opened*) - vendor=ibm - ;; - *-os400*) - vendor=ibm - ;; - s390-* | s390x-*) - vendor=ibm - ;; - *-ptx*) - vendor=sequent - ;; - *-tpf*) - vendor=ibm - ;; - *-vxsim* | *-vxworks* | *-windiss*) - vendor=wrs - ;; - *-aux*) - vendor=apple - ;; - *-hms*) - vendor=hitachi - ;; - *-mpw* | *-macos*) - vendor=apple - ;; - *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) - vendor=atari - ;; - *-vos*) - vendor=stratus - ;; - esac - ;; -esac - -echo "$cpu-$vendor-${kernel:+$kernel-}$os" -exit - -# Local variables: -# eval: (add-hook 'before-save-hook 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/paddlespeech/audio/third_party/patches/libmad.patch b/paddlespeech/audio/third_party/patches/libmad.patch deleted file mode 100644 index a805787831f48ecde0eebc9468440ee179f55c75..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/patches/libmad.patch +++ /dev/null @@ -1,86 +0,0 @@ -See the followings for the origin of this patch -http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libmad.html -http://www.linuxfromscratch.org/patches/blfs/svn/libmad-0.15.1b-fixes-1.patch ---- src/libmad/configure 2004-02-05 09:34:07.000000000 +0000 -+++ src/libmad/configure.new 2020-06-30 21:10:28.528018931 +0000 -@@ -19083,71 +19083,7 @@ - - if test "$GCC" = yes - then -- if test -z "$arch" -- then -- case "$host" in -- i386-*) ;; -- i?86-*) arch="-march=i486" ;; -- arm*-empeg-*) arch="-march=armv4 -mtune=strongarm1100" ;; -- armv4*-*) arch="-march=armv4 -mtune=strongarm" ;; -- powerpc-*) ;; -- mips*-agenda-*) arch="-mcpu=vr4100" ;; -- mips*-luxsonor-*) arch="-mips1 -mcpu=r3000 -Wa,-m4010" ;; -- esac -- fi -- -- case "$optimize" in -- -O|"-O "*) -- optimize="-O" -- optimize="$optimize -fforce-mem" -- optimize="$optimize -fforce-addr" -- : #x optimize="$optimize -finline-functions" -- : #- optimize="$optimize -fstrength-reduce" -- optimize="$optimize -fthread-jumps" -- optimize="$optimize -fcse-follow-jumps" -- optimize="$optimize -fcse-skip-blocks" -- : #x optimize="$optimize -frerun-cse-after-loop" -- : #x optimize="$optimize -frerun-loop-opt" -- : #x optimize="$optimize -fgcse" -- optimize="$optimize -fexpensive-optimizations" -- optimize="$optimize -fregmove" -- : #* optimize="$optimize -fdelayed-branch" -- : #x optimize="$optimize -fschedule-insns" -- optimize="$optimize -fschedule-insns2" -- : #? optimize="$optimize -ffunction-sections" -- : #? optimize="$optimize -fcaller-saves" -- : #> optimize="$optimize -funroll-loops" -- : #> optimize="$optimize -funroll-all-loops" -- : #x optimize="$optimize -fmove-all-movables" -- : #x optimize="$optimize -freduce-all-givs" -- : #? optimize="$optimize -fstrict-aliasing" -- : #* optimize="$optimize -fstructure-noalias" -- -- case "$host" in -- arm*-*) -- optimize="$optimize -fstrength-reduce" -- ;; -- mips*-*) -- optimize="$optimize -fstrength-reduce" -- optimize="$optimize -finline-functions" -- ;; -- i?86-*) -- optimize="$optimize -fstrength-reduce" -- ;; -- powerpc-apple-*) -- # this triggers an internal compiler error with gcc2 -- : #optimize="$optimize -fstrength-reduce" -- -- # this is really only beneficial with gcc3 -- : #optimize="$optimize -finline-functions" -- ;; -- *) -- # this sometimes provokes bugs in gcc 2.95.2 -- : #optimize="$optimize -fstrength-reduce" -- ;; -- esac -- ;; -- esac -+ optimize="-O2" - fi - - case "$host" in -@@ -21497,6 +21433,7 @@ - then - case "$host" in - i?86-*) FPM="INTEL" ;; -+ x86_64*) FPM="64BIT" ;; - arm*-*) FPM="ARM" ;; - mips*-*) FPM="MIPS" ;; - sparc*-*) FPM="SPARC" ;; diff --git a/paddlespeech/audio/third_party/patches/sox.patch b/paddlespeech/audio/third_party/patches/sox.patch deleted file mode 100644 index fe8df945c078045f58dc661a5a02d8c5f38599ca..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/patches/sox.patch +++ /dev/null @@ -1,16 +0,0 @@ -See https://github.com/pytorch/audio/pull/1297 -diff -ru sox/src/formats.c sox/src/formats.c ---- sox/src/formats.c 2014-10-26 19:55:50.000000000 -0700 -+++ sox/src/formats.c 2021-02-22 16:01:02.833144070 -0800 -@@ -333,6 +333,10 @@ - assert(ft); - if (!ft->fp) - return sox_false; -- fstat(fileno((FILE*)ft->fp), &st); -+ int fd = fileno((FILE*)ft->fp); -+ if (fd < 0) -+ return sox_false; -+ if (fstat(fd, &st) < 0) -+ return sox_false; - return ((st.st_mode & S_IFMT) == S_IFREG); - } diff --git a/paddlespeech/audio/third_party/sox/CMakeLists.txt b/paddlespeech/audio/third_party/sox/CMakeLists.txt deleted file mode 100644 index 8a5bc55c731f1cbcfa5304166645324b0700fed6..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/third_party/sox/CMakeLists.txt +++ /dev/null @@ -1,254 +0,0 @@ -find_package(PkgConfig REQUIRED) - -include(ExternalProject) - -set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install) -set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives) -set(patch_dir ${CMAKE_CURRENT_SOURCE_DIR}/../patches) -set(COMMON_ARGS --quiet --disable-shared --enable-static --prefix=${INSTALL_DIR} --with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc) - -# To pass custom environment variables to ExternalProject_Add command, -# we need to do `${CMAKE_COMMAND} -E env ${envs} `. -# https://stackoverflow.com/a/62437353 -# We constrcut the custom environment variables here -set(envs - "PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig" - "LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}" - "CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}" -) - -if (BUILD_MAD) - ExternalProject_Add(mad - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz - URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690 - PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS} - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON - ) -endif (BUILD_MAD) - -ExternalProject_Add(amr - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.5.tar.gz - URL_HASH SHA256=2c006cb9d5f651bfb5e60156dbff6af3c9d35c7bbcc9015308c0aff1e14cd341 - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/amr/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/amr/configure ${COMMON_ARGS} - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -ExternalProject_Add(lame - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://downloads.sourceforge.net/project/lame/lame/3.99/lame-3.99.5.tar.gz - URL_HASH SHA256=24346b4158e4af3bd9f2e194bb23eb473c75fb7377011523353196b19b9a23ff - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/lame/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/lame/configure ${COMMON_ARGS} --enable-nasm - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -ExternalProject_Add(ogg - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://ftp.osuosl.org/pub/xiph/releases/ogg/libogg-1.3.3.tar.gz - URL_HASH SHA256=c2e8a485110b97550f453226ec644ebac6cb29d1caef2902c007edab4308d985 - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/configure ${COMMON_ARGS} - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -ExternalProject_Add(flac - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ogg - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz - URL_HASH SHA256=91cfc3ed61dc40f47f050a109b08610667d73477af6ef36dcad31c31a4a8d53f - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/flac/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/flac/configure ${COMMON_ARGS} --with-ogg --disable-cpplibs - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -ExternalProject_Add(vorbis - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ogg - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://ftp.osuosl.org/pub/xiph/releases/vorbis/libvorbis-1.3.6.tar.gz - URL_HASH SHA256=6ed40e0241089a42c48604dc00e362beee00036af2d8b3f46338031c9e0351cb - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/configure ${COMMON_ARGS} --with-ogg - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -ExternalProject_Add(opus - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ogg - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://ftp.osuosl.org/pub/xiph/releases/opus/opus-1.3.1.tar.gz - URL_HASH SHA256=65b58e1e25b2a114157014736a3d9dfeaad8d41be1c8179866f144a2fb44ff9d - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opus/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opus/configure ${COMMON_ARGS} --with-ogg - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -ExternalProject_Add(opusfile - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS opus - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://ftp.osuosl.org/pub/xiph/releases/opus/opusfile-0.12.tar.gz - URL_HASH SHA256=118d8601c12dd6a44f52423e68ca9083cc9f2bfe72da7a8c1acb22a80ae3550b - PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/configure ${COMMON_ARGS} --disable-http - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -# OpenMP is by default compiled against GNU OpenMP, which conflicts with the version of OpenMP that PyTorch uses. -# See https://github.com/pytorch/audio/pull/1026 -# TODO: Add flags like https://github.com/suphoff/pytorch_parallel_extension_cpp/blob/master/setup.py -set(SOX_OPTIONS - --disable-openmp - --with-amrnb - --with-amrwb - --with-flac - --with-lame - --with-oggvorbis - --with-opus - --without-alsa - --without-ao - --without-coreaudio - --without-oss - --without-id3tag - --without-ladspa - --without-magic - --without-png - --without-pulseaudio - --without-sndfile - --without-sndio - --without-sunaudio - --without-waveaudio - --without-wavpack - --without-twolame - ) - -set(SOX_LIBRARIES - ${INSTALL_DIR}/lib/libsox.a - ${INSTALL_DIR}/lib/libopencore-amrnb.a - ${INSTALL_DIR}/lib/libopencore-amrwb.a - ${INSTALL_DIR}/lib/libmp3lame.a - ${INSTALL_DIR}/lib/libFLAC.a - ${INSTALL_DIR}/lib/libopusfile.a - ${INSTALL_DIR}/lib/libopus.a - ${INSTALL_DIR}/lib/libvorbisenc.a - ${INSTALL_DIR}/lib/libvorbisfile.a - ${INSTALL_DIR}/lib/libvorbis.a - ${INSTALL_DIR}/lib/libogg.a - ) - -set(sox_depends - ogg flac vorbis opusfile lame amr - ) - -if (BUILD_MAD) - list( - APPEND - SOX_OPTIONS - --with-mad - ) - list( - APPEND - SOX_LIBRARIES - ${INSTALL_DIR}/lib/libmad.a - ) - list( - APPEND - sox_depends - mad - ) -else () - list( - APPEND - SOX_OPTIONS - --without-mad - ) -endif (BUILD_MAD) - -ExternalProject_Add(sox - PREFIX ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${sox_depends} - DOWNLOAD_DIR ${ARCHIVE_DIR} - URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2 - URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c - PATCH_COMMAND patch -p1 < ${patch_dir}/sox.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/ - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMMON_ARGS} ${SOX_OPTIONS} - BUILD_BYPRODUCTS ${SOX_LIBRARIES} - DOWNLOAD_NO_PROGRESS ON - LOG_DOWNLOAD ON - LOG_UPDATE ON - LOG_CONFIGURE ON - LOG_BUILD ON - LOG_INSTALL ON - LOG_MERGED_STDOUTERR ON - LOG_OUTPUT_ON_FAILURE ON -) - -add_library(libsox INTERFACE) -add_dependencies(libsox sox) -target_include_directories(libsox INTERFACE ${INSTALL_DIR}/include) -target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES}) \ No newline at end of file diff --git a/paddlespeech/audio/transform/spectrogram.py b/paddlespeech/audio/transform/spectrogram.py index 864f3f9940b7a34d81c1836157c2740c9b85c1ca..86c0b953e38126f6567663128e1d96f8e70f176e 100644 --- a/paddlespeech/audio/transform/spectrogram.py +++ b/paddlespeech/audio/transform/spectrogram.py @@ -17,7 +17,7 @@ import numpy as np import paddle from python_speech_features import logfbank -from ..compliance import kaldi +from paddleaudio.compliance import kaldi def stft(x, diff --git a/paddlespeech/audio/utils/sox_utils.py b/paddlespeech/audio/utils/sox_utils.py deleted file mode 100644 index 37696a5d91f29a3b83d1e661b2a87f6d66b98670..0000000000000000000000000000000000000000 --- a/paddlespeech/audio/utils/sox_utils.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Dict, List - -from paddlespeech.audio._internal import module_utils as _mod_utils -from paddlespeech.audio import _paddleaudio - -@_mod_utils.requires_sox() -def set_seed(seed: int): - """Set libsox's PRNG - - Args: - seed (int): seed value. valid range is int32. - - See Also: - http://sox.sourceforge.net/sox.html - """ - _paddleaudio.sox_utils_set_seed(seed) - - -@_mod_utils.requires_sox() -def set_verbosity(verbosity: int): - """Set libsox's verbosity - - Args: - verbosity (int): Set verbosity level of libsox. - - * ``1`` failure messages - * ``2`` warnings - * ``3`` details of processing - * ``4``-``6`` increasing levels of debug messages - - See Also: - http://sox.sourceforge.net/sox.html - """ - _paddleaudio.sox_utils_set_verbosity(verbosity) - - -@_mod_utils.requires_sox() -def set_buffer_size(buffer_size: int): - """Set buffer size for sox effect chain - - Args: - buffer_size (int): Set the size in bytes of the buffers used for processing audio. - - See Also: - http://sox.sourceforge.net/sox.html - """ - _paddleaudio.sox_utils_set_buffer_size(buffer_size) - - -@_mod_utils.requires_sox() -def set_use_threads(use_threads: bool): - """Set multithread option for sox effect chain - - Args: - use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing. - To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support. - - See Also: - http://sox.sourceforge.net/sox.html - """ - _paddleaudio.sox_utils_set_use_threads(use_threads) - - -@_mod_utils.requires_sox() -def list_effects() -> Dict[str, str]: - """List the available sox effect names - - Returns: - Dict[str, str]: Mapping from ``effect name`` to ``usage`` - """ - return dict(_paddleaudio.sox_utils_list_effects()) - - -@_mod_utils.requires_sox() -def list_read_formats() -> List[str]: - """List the supported audio formats for read - - Returns: - List[str]: List of supported audio formats - """ - return _paddleaudio.sox_utils_list_read_formats() - - -@_mod_utils.requires_sox() -def list_write_formats() -> List[str]: - """List the supported audio formats for write - - Returns: - List[str]: List of supported audio formats - """ - return _paddleaudio.sox_utils_list_write_formats() - - -@_mod_utils.requires_sox() -def get_buffer_size() -> int: - """Get buffer size for sox effect chain - - Returns: - int: size in bytes of buffers used for processing audio. - """ - return _paddleaudio.sox_utils_get_buffer_size()