From 0adfd7e3db2fdcb773c746789ffc58163d7822c8 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 15 Jun 2021 15:17:56 +0000 Subject: [PATCH] more frontend utils --- third_party/paddle_audio/frontend/common.py | 88 ++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/third_party/paddle_audio/frontend/common.py b/third_party/paddle_audio/frontend/common.py index 2664de16..aa7ea62c 100644 --- a/third_party/paddle_audio/frontend/common.py +++ b/third_party/paddle_audio/frontend/common.py @@ -96,6 +96,22 @@ def dft_matrix_fast(n_fft:int, winlen:int=None, n_bin:int=None) -> Tuple[np.ndar w_real = weight.real w_imag = weight.imag return w_real, w_imag, kernel_size + + +def bin2hz(bin:Union[List[int], np.ndarray], N:int, sr:int)->List[float]: + """FFT bins to Hz. + + http://practicalcryptography.com/miscellaneous/machine-learning/intuitive-guide-discrete-fourier-transform/ + + Args: + bins (List[int] or np.ndarray): bin index. + N (int): the number of samples, or FFT points. + sr (int): sampling rate. + + Returns: + List[float]: Hz's. + """ + hz = bin * float(sr) / N def hz2mel(hz): @@ -106,10 +122,80 @@ def hz2mel(hz): """ return 1127 * np.log(1+hz/700.0) + def mel2hz(mel): """Convert a value in Mels to Hertz :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. """ - return 700 * (np.exp(mel/1127.0)-1) \ No newline at end of file + return 700 * (np.exp(mel/1127.0)-1) + + + +def rms_to_db(rms: float): + """Root Mean Square to dB. + + Args: + rms ([float]): root mean square + + Returns: + float: dB + """ + return 20.0 * math.log10(max(1e-16, rms)) + + +def rms_to_dbfs(rms: float): + """Root Mean Square to dBFS. + https://fireattack.wordpress.com/2017/02/06/replaygain-loudness-normalization-and-applications/ + Audio is mix of sine wave, so 1 amp sine wave's Full scale is 0.7071, equal to -3.0103dB. + + dB = dBFS + 3.0103 + dBFS = db - 3.0103 + e.g. 0 dB = -3.0103 dBFS + + Args: + rms ([float]): root mean square + + Returns: + float: dBFS + """ + return rms_to_db(rms) - 3.0103 + + +def max_dbfs(sample_data: np.ndarray): + """Peak dBFS based on the maximum energy sample. + + Args: + sample_data ([np.ndarray]): float array, [-1, 1]. + + Returns: + float: dBFS + """ + # Peak dBFS based on the maximum energy sample. Will prevent overdrive if used for normalization. + return rms_to_dbfs(max(abs(np.min(sample_data)), abs(np.max(sample_data)))) + + +def mean_dbfs(sample_data): + """Peak dBFS based on the RMS energy. + + Args: + sample_data ([np.ndarray]): float array, [-1, 1]. + + Returns: + float: dBFS + """ + return rms_to_dbfs( + math.sqrt(np.mean(np.square(sample_data, dtype=np.float64)))) + + +def gain_db_to_ratio(gain_db: float): + """dB to ratio + + Args: + gain_db (float): gain in dB + + Returns: + float: scale in amp + """ + return math.pow(10.0, gain_db / 20.0) \ No newline at end of file -- GitLab