"""Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.
ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to 80.0.
ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
def hz_to_mel(freq: Union[Tensor, float],
htk: bool=False) -> Union[Tensor, float]:
freq (Union[Tensor, float]): The input tensor with arbitrary shape.
htk (bool, optional): Use htk scaling. Defaults to False.
Union[Tensor, float]: Frequency in mels.
if htk:
if isinstance(freq, Tensor):
if isinstance(freq, Tensor):
def mel_to_hz(mel: Union[float, Tensor],
htk: bool=False) -> Union[float, Tensor]:
mel (Union[float, Tensor]): The mel frequency represented as a tensor with arbitrary shape.
htk (bool, optional): Use htk scaling. Defaults to False.
Union[float, Tensor]: Frequencies in Hz.
if isinstance(mel, Tensor):
f_min: float=0.0,
f_max: float=11025.0,
htk: bool=False,
dtype: str='float32') -> Tensor:
"""Compute mel frequencies.
n_mels (int, optional): Number of mel bins. Defaults to 64.
f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
htk (bool, optional): Use htk scaling. Defaults to False.
dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.
Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`.
def fft_frequencies(sr: int, n_fft: int, dtype: str='float32') -> Tensor:
"""Compute fourier frequencies.
sr (int): Sample rate.
n_fft (int): Number of fft bins.
dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.
Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
dtype: str='float32') -> Tensor:
"""Compute fbank matrix.
sr (int): Sample rate.
n_fft (int): Number of fft bins.
n_mels (int, optional): Number of mel bins. Defaults to 64.
f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
htk (bool, optional): Use htk scaling. Defaults to False.
norm (Union[str, float], optional): Type of normalization. Defaults to 'slaney'.
dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
return weights
def power_to_db(spect: Tensor,
top_db: Optional[float]=None) -> Tensor:
"""Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.
spect (Tensor): STFT power spectrogram.
ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
amin (float, optional): Minimum threshold. Defaults to 1e-10.
top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.
Tensor: Power spectrogram in db scale.
ones = paddle.ones_like(spect)
log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, spect))
dtype: str='float32') -> Tensor:
"""Create a discrete cosine transform(DCT) matrix.
n_mels (int): Number of mel filterbanks.
norm (str, optional): Normalizaiton type. Defaults to 'ortho'.
dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`.
__all__ = [
def _cat(x: List[Tensor], data_type: str) -> Tensor:
l = [paddle.to_tensor(_, data_type) for _ in x]
"""Extend window by 1 sample if needed for DFT-even symmetry"""
"""Extend window by 1 sample if needed for DFT-even symmetry. """
if not sym:
return M + 1, True
......@@ -56,7 +43,7 @@ def _extend(M: int, sym: bool) -> bool:
"""Handle small or incorrect window lengths"""
"""Handle small or incorrect window lengths. """
if int(M) != M or M < 0:
raise ValueError('Window length M must be a non-negative integer')
......@@ -64,14 +51,14 @@ def _len_guards(M: int) -> bool:
"""Truncate window by 1 sample if needed for DFT-even symmetry"""
"""Truncate window by 1 sample if needed for DFT-even symmetry. """
if needed:
return w[:-1]
return w
def _general_gaussian(M: int, p, sig, sym: bool=True,
dtype: str='float64') -> Tensor:
def _general_cosine(M: int, a: float, sym: bool=True,
dtype: str='float64') -> Tensor:
def _general_hamming(M: int, alpha: float, sym: bool=True,
dtype: str='float64') -> Tensor:
return _general_cosine(M, [alpha, 1. - alpha], sym, dtype=dtype)
def taylor(M: int,
......@@ -118,14 +105,6 @@ def taylor(M: int,
def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return general_hamming(M, 0.54, sym, dtype=dtype)
return _general_hamming(M, 0.54, sym, dtype=dtype)
def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return general_hamming(M, 0.5, sym, dtype=dtype)
return _general_hamming(M, 0.5, sym, dtype=dtype)
def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor:
def _kaiser(M: int, beta: float, sym: bool=True,
dtype: str='float64') -> Tensor:
def _gaussian(M: int, std: float, sym: bool=True,
dtype: str='float64') -> Tensor:
def _exponential(M: int,
"""Compute an exponential (or Poisson) window. """
def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
def _bohman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)
def _cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
win_length: int,
fftbins: bool=True,
dtype: str='float64') -> Tensor:
window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
win_length (int): Number of samples.
fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
dtype (str, optional): The data type of the return window. Defaults to 'float64'.
Tensor: The window represented as a tensor.
winfunc = eval('_' + winstr)
"""Dynamic Time Warping.
return input_ and os.path.isfile(input_) and (input_.endswith('.job') or
