common.py 5.6 KB
Newer Older
H
Hui Zhang 已提交
1 2
import paddle
import numpy as np
H
Hui Zhang 已提交
3
from typing import Tuple, Optional, Union
H
Hui Zhang 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34


# https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/src/feat/feature-window.cc#L109
def povey_window(frame_len:int) -> np.ndarray:
    win = np.empty(frame_len)
    a = 2 * np.pi / (frame_len -1)
    for i in range(frame_len):
        win[i] = (0.5 - 0.5 * np.cos(a * i) )**0.85 
    return win

def hann_window(frame_len:int) -> np.ndarray:
    win = np.empty(frame_len)
    a = 2 * np.pi / (frame_len -1)
    for i in range(frame_len):
        win[i] = 0.5 - 0.5 * np.cos(a * i)
    return win

def sine_window(frame_len:int) -> np.ndarray:
    win = np.empty(frame_len)
    a = 2 * np.pi / (frame_len -1)
    for i in range(frame_len):
        win[i] = np.sin(0.5 * a * i)
    return win

def hamm_window(frame_len:int) -> np.ndarray:
    win = np.empty(frame_len)
    a = 2 * np.pi / (frame_len -1)
    for i in range(frame_len):
        win[i] = 0.54 - 0.46 * np.cos(a * i)
    return win

H
Hui Zhang 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47
def get_window(wintype:Optional[str], winlen:int) -> np.ndarray:
    """get window function

    Args:
        wintype (Optional[str]): window type.
        winlen (int): window length in samples.

    Raises:
        ValueError: not support window.

    Returns:
        np.ndarray: window coeffs.
    """
H
Hui Zhang 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    # calculate window
    if not wintype or wintype == 'rectangular':
        window = np.ones(winlen)
    elif wintype == "hann":
        window = hann_window(winlen)
    elif wintype == "hamm":
        window = hamm_window(winlen)
    elif wintype == "povey":
        window = povey_window(winlen)
    else:
        msg = f"{wintype} Not supported yet!"
        raise ValueError(msg)
    return window
    
   
def dft_matrix(n_fft:int, winlen:int=None, n_bin:int=None) -> Tuple[np.ndarray, np.ndarray, int]:
    # https://en.wikipedia.org/wiki/Discrete_Fourier_transform
    # (n_bins, n_fft) complex
    if n_bin is None:
        n_bin = 1 + n_fft // 2
    if winlen is None:
        winlen = n_bin
    # https://github.com/numpy/numpy/blob/v1.20.0/numpy/fft/_pocketfft.py#L49
    kernel_size = min(n_fft, winlen)
        
    n = np.arange(0, n_fft, 1.)
    wsin = np.empty((n_bin, kernel_size)) #[Cout, kernel_size]
    wcos = np.empty((n_bin, kernel_size)) #[Cout, kernel_size]
    for k in range(n_bin): # Only half of the bins contain useful info
        wsin[k,:] = -np.sin(2*np.pi*k*n/n_fft)[:kernel_size]
        wcos[k,:] = np.cos(2*np.pi*k*n/n_fft)[:kernel_size]
    w_real = wcos
    w_imag = wsin
    return w_real, w_imag, kernel_size
    

def dft_matrix_fast(n_fft:int, winlen:int=None, n_bin:int=None) -> Tuple[np.ndarray, np.ndarray, int]:
    # (n_bins, n_fft) complex
    if n_bin is None:
        n_bin = 1 + n_fft // 2
    if winlen is None:
        winlen = n_bin
    # https://github.com/numpy/numpy/blob/v1.20.0/numpy/fft/_pocketfft.py#L49
    kernel_size = min(n_fft, winlen)
    
    # https://en.wikipedia.org/wiki/DFT_matrix
    # https://ccrma.stanford.edu/~jos/st/Matrix_Formulation_DFT.html
    weight = np.fft.fft(np.eye(n_fft))[:self.n_bin, :kernel_size]
    w_real = weight.real
    w_imag = weight.imag
    return w_real, w_imag, kernel_size
H
Hui Zhang 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
    

def bin2hz(bin:Union[List[int], np.ndarray], N:int, sr:int)->List[float]:
    """FFT bins to Hz.
    
    http://practicalcryptography.com/miscellaneous/machine-learning/intuitive-guide-discrete-fourier-transform/

    Args:
        bins (List[int] or np.ndarray): bin index.
        N (int): the number of samples, or FFT points.
        sr (int): sampling rate.

    Returns:
        List[float]: Hz's.
    """
    hz = bin * float(sr) / N
H
Hui Zhang 已提交
115 116 117 118 119 120 121 122 123 124
        
        
def hz2mel(hz):
    """Convert a value in Hertz to Mels

    :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Mels. If an array was passed in, an identical sized array is returned.
    """
    return 1127 * np.log(1+hz/700.0)

H
Hui Zhang 已提交
125

H
Hui Zhang 已提交
126 127 128 129 130 131
def mel2hz(mel):
    """Convert a value in Mels to Hertz

    :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
    :returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
    """
H
Hui Zhang 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
    return 700 * (np.exp(mel/1127.0)-1)



def rms_to_db(rms: float):
    """Root Mean Square to dB.

    Args:
        rms ([float]): root mean square

    Returns:
        float: dB
    """
    return 20.0 * math.log10(max(1e-16, rms))


def rms_to_dbfs(rms: float):
    """Root Mean Square to dBFS.
    https://fireattack.wordpress.com/2017/02/06/replaygain-loudness-normalization-and-applications/
    Audio is mix of sine wave, so 1 amp sine wave's Full scale is 0.7071, equal to -3.0103dB.
   
    dB = dBFS + 3.0103
    dBFS = db - 3.0103
    e.g. 0 dB = -3.0103 dBFS

    Args:
        rms ([float]): root mean square

    Returns:
        float: dBFS
    """
    return rms_to_db(rms) - 3.0103


def max_dbfs(sample_data: np.ndarray):
    """Peak dBFS based on the maximum energy sample. 

    Args:
        sample_data ([np.ndarray]): float array, [-1, 1].

    Returns:
        float: dBFS 
    """
    # Peak dBFS based on the maximum energy sample. Will prevent overdrive if used for normalization.
    return rms_to_dbfs(max(abs(np.min(sample_data)), abs(np.max(sample_data))))


def mean_dbfs(sample_data):
    """Peak dBFS based on the RMS energy. 

    Args:
        sample_data ([np.ndarray]): float array, [-1, 1].

    Returns:
        float: dBFS 
    """
    return rms_to_dbfs(
        math.sqrt(np.mean(np.square(sample_data, dtype=np.float64))))


def gain_db_to_ratio(gain_db: float):
    """dB to ratio

    Args:
        gain_db (float): gain in dB

    Returns:
        float: scale in amp
    """
    return math.pow(10.0, gain_db / 20.0)