Update PaddleAudio transforms and functionals (#5334)

* added reverb/noisify/AudioReader/RandomChoice/RandomApply * bug fixed * transform name changes * work around for bug in paddle's groupnorm * upgraded to use float64 inside for high numerical acc * fixed docstring, add nn.Layer as super for Noisify * fixed docstring * added mfcc func/trans and dct function * updated unit test * add dtype to control datatype in win function * add dtype control in transforms * add dtype control in functionals * updated test * added dtype control, updated test

Update PaddleAudio transforms and functionals (#5334)
* added reverb/noisify/AudioReader/RandomChoice/RandomApply * bug fixed * transform name changes * work around for bug in paddle's groupnorm * upgraded to use float64 inside for high numerical acc * fixed docstring, add nn.Layer as super for Noisify * fixed docstring * added mfcc func/trans and dct function * updated unit test * add dtype to control datatype in win function * add dtype control in transforms * add dtype control in functionals * updated test * added dtype control, updated test
9cab6c61 · ranchlai · GitHub · 14214566 · 9cab6c61 · 9cab6c61
11 changed file
--- a/PaddleAudio/paddleaudio/core/windowing.py
+++ b/PaddleAudio/paddleaudio/core/windowing.py
@@ -30,7 +30,6 @@ __all__ = [
    'tukey',
    'taylor',
 ]
-math.pi = 3.141592653589793


 def _cat(a: List[Tensor], data_type: str) -> Tensor:
@@ -68,30 +67,42 @@ def _truncate(w: Tensor, needed: bool) -> Tensor:
        return w


-def general_gaussian(M: int, p, sig, sym: bool = True) -> Tensor:
+def general_gaussian(M: int,
+                     p,
+                     sig,
+                     sym: bool = True,
+                     dtype: str = 'float64') -> Tensor:
    """Compute a window with a generalized Gaussian shape.

    This function is consistent with scipy.signal.windows.general_gaussian().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

-    n = paddle.arange(0, M) - (M - 1.0) / 2.0
+    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p))

    return _truncate(w, needs_trunc)


-def general_hamming(M: int, alpha: float, sym: bool = True) -> Tensor:
+def general_hamming(M: int,
+                    alpha: float,
+                    sym: bool = True,
+                    dtype: str = 'float64') -> Tensor:
    """Compute a generalized Hamming window.

    This function is consistent with scipy.signal.windows.general_hamming()
    """
-    return general_cosine(M, [alpha, 1. - alpha], sym)
+    return general_cosine(M, [alpha, 1. - alpha], sym, dtype=dtype)


-def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
+def taylor(M: int,
+           nbar=4,
+           sll=30,
+           norm=True,
+           sym: bool = True,
+           dtype: str = 'float64') -> Tensor:
    """Compute a Taylor window.
    The Taylor window taper function approximates the Dolph-Chebyshev window's
    constant sidelobe level for a parameterized number of near-in sidelobes.
@@ -100,13 +111,14 @@ def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
        nbar, sil, norm: the window-specific parameter.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.taylor().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    # Original text uses a negative sidelobe level parameter and then negates
    # it in the calculation of B. To keep consistent with other methods we
@@ -114,9 +126,9 @@ def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
    B = 10**(sll / 20)
    A = _acosh(B) / math.pi
    s2 = nbar**2 / (A**2 + (nbar - 0.5)**2)
-    ma = paddle.arange(1, nbar, dtype='float32')
+    ma = paddle.arange(1, nbar, dtype=dtype)

-    Fm = paddle.empty((nbar - 1, ), dtype='float32')
+    Fm = paddle.empty((nbar - 1, ), dtype=dtype)
    signs = paddle.empty_like(ma)
    signs[::2] = 1
    signs[1::2] = -1
@@ -139,7 +151,7 @@ def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
            Fm.unsqueeze(0),
            paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M))

-    w = W(paddle.arange(0, M, dtype='float32'))
+    w = W(paddle.arange(0, M, dtype=dtype))

    # normalize (Note that this is not described in the original text [1])
    if norm:
@@ -149,22 +161,25 @@ def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
    return _truncate(w, needs_trunc)


-def general_cosine(M: int, a: float, sym: bool = True) -> Tensor:
+def general_cosine(M: int,
+                   a: float,
+                   sym: bool = True,
+                   dtype: str = 'float64') -> Tensor:
    """Compute a generic weighted sum of cosine terms window.

    This function is consistent with scipy.signal.windows.general_cosine().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
-    fac = paddle.linspace(-math.pi, math.pi, M)
-    w = paddle.zeros((M, ), dtype='float32')
+    fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype)
+    w = paddle.zeros((M, ), dtype=dtype)
    for k in range(len(a)):
        w += a[k] * paddle.cos(k * fac)
    return _truncate(w, needs_trunc)


-def hamming(M: int, sym: bool = True) -> Tensor:
+def hamming(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Hamming window.
    The Hamming window is a taper formed by using a raised cosine with
    non-zero endpoints, optimized to minimize the nearest side lobe.
@@ -172,15 +187,16 @@ def hamming(M: int, sym: bool = True) -> Tensor:
        M(int): window size
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.hamming().
    """
-    return general_hamming(M, 0.54, sym)
+    return general_hamming(M, 0.54, sym, dtype=dtype)


-def hann(M: int, sym: bool = True) -> Tensor:
+def hann(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Hann window.
    The Hann window is a taper formed by using a raised cosine or sine-squared
    with ends that touch zero.
@@ -188,44 +204,49 @@ def hann(M: int, sym: bool = True) -> Tensor:
        M(int): window size
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.hann().
    """
-    return general_hamming(M, 0.5, sym)
+    return general_hamming(M, 0.5, sym, dtype=dtype)


-def tukey(M: int, alpha=0.5, sym: bool = True) -> Tensor:
+def tukey(M: int,
+          alpha=0.5,
+          sym: bool = True,
+          dtype: str = 'float64') -> Tensor:
    """Compute a Tukey window.
    The Tukey window is also known as a tapered cosine window.
    Parameters:
        M(int): window size
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.tukey().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)

    if alpha <= 0:
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    elif alpha >= 1.0:
        return hann(M, sym=sym)

    M, needs_trunc = _extend(M, sym)

-    n = paddle.arange(0, M)
+    n = paddle.arange(0, M, dtype=dtype)
    width = int(alpha * (M - 1) / 2.0)
    n1 = n[0:width + 1]
    n2 = n[width + 1:M - width - 1]
    n3 = n[M - width - 1:]

    w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
-    w2 = paddle.ones(n2.shape, dtype='float32')
+    w2 = paddle.ones(n2.shape, dtype=dtype)
    w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha /
                                          (M - 1))))
    w = paddle.concat([w1, w2, w3])
@@ -233,7 +254,10 @@ def tukey(M: int, alpha=0.5, sym: bool = True) -> Tensor:
    return _truncate(w, needs_trunc)


-def kaiser(M: int, beta: float, sym: bool = True) -> Tensor:
+def kaiser(M: int,
+           beta: float,
+           sym: bool = True,
+           dtype: str = 'float64') -> Tensor:
    """Compute a Kaiser window.
    The Kaiser window is a taper formed by using a Bessel function.
    Parameters:
@@ -251,7 +275,10 @@ def kaiser(M: int, beta: float, sym: bool = True) -> Tensor:
    raise NotImplementedError()


-def gaussian(M: int, std: float, sym: bool = True) -> Tensor:
+def gaussian(M: int,
+             std: float,
+             sym: bool = True,
+             dtype: str = 'float64') -> Tensor:
    """Compute a Gaussian window.
    The Gaussian widows has a Gaussian shape defined by the standard deviation(std).

@@ -260,29 +287,35 @@ def gaussian(M: int, std: float, sym: bool = True) -> Tensor:
        std(float): the window-specific parameter.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.gaussian().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

-    n = paddle.arange(0, M) - (M - 1.0) / 2.0
+    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    sig2 = 2 * std * std
    w = paddle.exp(-n**2 / sig2)

    return _truncate(w, needs_trunc)


-def exponential(M: int, center=None, tau=1., sym: bool = True) -> Tensor:
+def exponential(M: int,
+                center=None,
+                tau=1.,
+                sym: bool = True,
+                dtype: str = 'float64') -> Tensor:
    """Compute an exponential (or Poisson) window.
    Parameters:
        M(int): window size.
        tau(float): the window-specific parameter.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
@@ -291,34 +324,35 @@ def exponential(M: int, center=None, tau=1., sym: bool = True) -> Tensor:
    if sym and center is not None:
        raise ValueError("If sym==True, center must be None.")
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    if center is None:
        center = (M - 1) / 2

-    n = paddle.arange(0, M)
+    n = paddle.arange(0, M, dtype=dtype)
    w = paddle.exp(-paddle.abs(n - center) / tau)

    return _truncate(w, needs_trunc)


-def triang(M: int, sym: bool = True) -> Tensor:
+def triang(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a triangular window.
    Parameters:
        M(int): window size.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.triang().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

-    n = paddle.arange(1, (M + 1) // 2 + 1)
+    n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype)
    if M % 2 == 0:
        w = (2 * n - 1.0) / M
        w = paddle.concat([w, w[::-1]])
@@ -329,31 +363,32 @@ def triang(M: int, sym: bool = True) -> Tensor:
    return _truncate(w, needs_trunc)


-def bohman(M: int, sym: bool = True) -> Tensor:
+def bohman(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Bohman window.
    The Bohman window is the autocorrelation of a cosine window.
    Parameters:
        M(int): window size.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.bohman().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

-    fac = paddle.abs(paddle.linspace(-1, 1, M)[1:-1])
+    fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1])
    w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
        math.pi * fac)
-    w = _cat([0, w, 0], 'float32')
+    w = _cat([0, w, 0], dtype)

    return _truncate(w, needs_trunc)


-def blackman(M: int, sym: bool = True) -> Tensor:
+def blackman(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Blackman window.
    The Blackman window is a taper formed by using the first three terms of
    a summation of cosines. It was designed to have close to the minimal
@@ -364,28 +399,30 @@ def blackman(M: int, sym: bool = True) -> Tensor:
        M(int): window size.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.blackman().
    """
-    return general_cosine(M, [0.42, 0.50, 0.08], sym)
+    return general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)


-def cosine(M: int, sym: bool = True) -> Tensor:
+def cosine(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a window with a simple cosine shape.
    Parameters:
        M(int): window size.
        sym(bool)：whether to return symmetric window.
            The default value is True
+        dtype(str): the datatype of returned tensor.
    Returns:
        Tensor: the window tensor
    Notes:
        This function is consistent with scipy.signal.windows.cosine().
    """
    if _len_guards(M):
-        return paddle.ones((M, ), dtype='float32')
+        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
-    w = paddle.sin(math.pi / M * (paddle.arange(0, M) + .5))
+    w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + .5))

    return _truncate(w, needs_trunc)
--- a/PaddleAudio/paddleaudio/functional.py
+++ b/PaddleAudio/paddleaudio/functional.py
--- a/PaddleAudio/paddleaudio/models/wav2vec2/modeling.py
+++ b/PaddleAudio/paddleaudio/models/wav2vec2/modeling.py
@@ -278,13 +278,14 @@ class Wav2Vec2GroupNormConvLayer(nn.Layer):
            bias_attr=config.conv_bias,
        )
        self.activation = ACT2FN[config.feat_extract_activation]
-        # , affine=True ??
        self.layer_norm = nn.GroupNorm(num_groups=self.out_conv_dim,
                                       num_channels=self.out_conv_dim)

    def forward(self, hidden_states):
        hidden_states = self.conv(hidden_states)
-        hidden_states = self.layer_norm(hidden_states)
+        # paddle's groupnorm only supports 4D tensor as of 2.1.1. We need to unsqueeze and squeeze.
+        hidden_states = self.layer_norm(hidden_states.unsqueeze([-1]))
+        hidden_states = hidden_states[:, :, :, 0]
        hidden_states = self.activation(hidden_states)
        return hidden_states


--- a/PaddleAudio/paddleaudio/transforms.py
+++ b/PaddleAudio/paddleaudio/transforms.py
--- a/PaddleAudio/test/unit_test/test_dct_matrix.py
+++ b/PaddleAudio/test/unit_test/test_dct_matrix.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle
+import paddleaudio
+import scipy
+import utils
+
+
+def test_dct_compat_with_scipy1():
+
+    paddle.set_device('cpu')
+    expected = scipy.fft.dct(np.eye(64), norm='ortho')[:, :8]
+    paddle_dct = paddleaudio.functional.dct_matrix(8, 64, dct_norm='ortho')
+    err = np.mean(np.abs(paddle_dct.numpy() - expected))
+    assert err < 5e-8
+
+
+def test_dct_compat_with_scipy2():
+
+    paddle.set_device('cpu')
+    expected = scipy.fft.dct(np.eye(64), norm=None)[:, :8]
+    paddle_dct = paddleaudio.functional.dct_matrix(8, 64, dct_norm=None)
+    err = np.mean(np.abs(paddle_dct.numpy() - expected))
+    assert err < 5e-7
+
+
+def test_dct_compat_with_scipy3():
+
+    paddle.set_device('gpu')
+    expected = scipy.fft.dct(np.eye(64), norm='ortho')[:, :8]
+    paddle_dct = paddleaudio.functional.dct_matrix(8, 64, dct_norm='ortho')
+    err = np.mean(np.abs(paddle_dct.numpy() - expected))
+    assert err < 5e-7
+
+
+def test_dct_compat_with_scipy4():
+
+    paddle.set_device('gpu')
+    expected = scipy.fft.dct(np.eye(64), norm=None)[:, :8]
+    paddle_dct = paddleaudio.functional.dct_matrix(8, 64, dct_norm=None)
+    err = np.mean(np.abs(paddle_dct.numpy() - expected))
+    assert err < 5e-7
--- a/PaddleAudio/test/unit_test/test_functional.py
+++ b/PaddleAudio/test/unit_test/test_functional.py
@@ -24,47 +24,43 @@ import utils

 EPS = 1e-8

-
-def test_hz_mel_convert():
-    hz = np.linspace(0, 32000, 100).astype('float32')
-    mel0 = paddleaudio.utils._librosa.hz_to_mel(hz)
-    mel1 = F.hz_to_mel(paddle.to_tensor(hz)).numpy()
-    hz0 = paddleaudio.utils._librosa.mel_to_hz(mel0)
-    hz1 = F.mel_to_hz(paddle.to_tensor(mel0)).numpy()
-    assert np.allclose(hz0, hz1)
-    assert np.allclose(mel0, mel1)
-    assert np.allclose(hz, hz0)
-
-
-def generate_window_test_data():
-    names = [
-        ('hamming', ),
-        ('hann', ),
-        (
-            'taylor',
-            4,
-            30,
-            True,
-        ),
-        #'kaiser',
-        ('gaussian', 100),
-        ('exponential', None, 1.0),
-        ('triang', ),
-        ('bohman', ),
-        ('blackman', ),
-        ('cosine', ),
-    ]
-    win_length = [512, 400, 1024, 2048]
-    fftbins = [True, False]
-    return itertools.product(names, win_length, fftbins)
-
-
-@pytest.mark.parametrize('name,win_length,fftbins', generate_window_test_data())
-def test_get_window(name, win_length, fftbins):
-    src = F.get_window(name, win_length, fftbins=fftbins)
-    target = scipy.signal.get_window(name, win_length, fftbins=fftbins)
-    assert np.allclose(src.numpy(), target, atol=1e-5)
-
+# def test_hz_mel_convert():
+#     hz = np.linspace(0, 32000, 100).astype('float32')
+#     mel0 = paddleaudio.utils._librosa.hz_to_mel(hz)
+#     mel1 = F.hz_to_mel(paddle.to_tensor(hz)).numpy()
+#     hz0 = paddleaudio.utils._librosa.mel_to_hz(mel0)
+#     hz1 = F.mel_to_hz(paddle.to_tensor(mel0)).numpy()
+#     assert np.allclose(hz0, hz1)
+#     assert np.allclose(mel0, mel1)
+#     assert np.allclose(hz, hz0)
+
+# def generate_window_test_data():
+#     names = [
+#         ('hamming', ),
+#         ('hann', ),
+#         (
+#             'taylor',
+#             4,
+#             30,
+#             True,
+#         ),
+#         #'kaiser',
+#         ('gaussian', 100),
+#         ('exponential', None, 1.0),
+#         ('triang', ),
+#         ('bohman', ),
+#         ('blackman', ),
+#         ('cosine', ),
+#     ]
+#     win_length = [512, 400, 1024, 2048]
+#     fftbins = [True, False]
+#     return itertools.product(names, win_length, fftbins)
+
+# @pytest.mark.parametrize('name,win_length,fftbins', generate_window_test_data())
+# def test_get_window(name, win_length, fftbins):
+#     src = F.get_window(name, win_length, fftbins=fftbins)
+#     target = scipy.signal.get_window(name, win_length, fftbins=fftbins)
+#     assert np.allclose(src.numpy(), target, atol=1e-5)

 p2db_test_data = [
    (1.0, 1e-10, 80),
@@ -84,17 +80,40 @@ def test_power_to_db(ref_value, amin, top_db):
    assert np.allclose(src.numpy(), target, atol=1e-5)


-def test_mu_codec():
-    x, _ = utils.load_example_audio1()
-    x = paddle.to_tensor(x)
-    code = F.mu_law_encode(x)
-    xr = F.mu_law_decode(code)
-    assert np.allclose(xr.numpy(), x.numpy(), atol=1e-1)
+# def test_mu_codec():
+#     x, _ = utils.load_example_audio1()
+#     x = paddle.to_tensor(x)
+#     code = F.mu_law_encode(x)
+#     xr = F.mu_law_decode(code)
+#     assert np.allclose(xr.numpy(), x.numpy(), atol=1e-1)
+
+#     code = F.mu_law_encode(x, mu=1024)
+#     xr = F.mu_law_decode(code, mu=1024)
+#     assert np.allclose(xr.numpy(), x.numpy(), atol=1e-2)
+
+#     code = F.mu_law_encode(x, mu=65536)
+#     xr = F.mu_law_decode(code, mu=65536)
+#     assert np.allclose(xr.numpy(), x.numpy(), atol=1e-4)
+
+
+def test_mel_frequencies():
+    src = F.mel_frequencies(n_mels=128, f_min=0.0, f_max=11025.0, htk=False)
+    target = paddleaudio.utils._librosa.mel_frequencies(n_mels=128,
+                                                        fmin=0.0,
+                                                        fmax=11025.0,
+                                                        htk=False)
+    assert np.allclose(src.numpy(), target)
+
+
+def test_fft_frequencies():
+    src = F.fft_frequencies(16000, 512)
+    target = paddleaudio.utils._librosa.fft_frequencies(16000, 512)
+    np.allclose(src.numpy(), target)

-    code = F.mu_law_encode(x, mu=1024)
-    xr = F.mu_law_decode(code, mu=1024)
-    assert np.allclose(xr.numpy(), x.numpy(), atol=1e-2)

-    code = F.mu_law_encode(x, mu=65536)
-    xr = F.mu_law_decode(code, mu=65536)
-    assert np.allclose(xr.numpy(), x.numpy(), atol=1e-4)
+def test_fbank_matrix():
+    src = F.compute_fbank_matrix(sr=16000, n_fft=512, n_mels=128)
+    target = paddleaudio.utils._librosa.compute_fbank_matrix(sr=16000,
+                                                             n_fft=512,
+                                                             n_mels=128)
+    assert np.allclose(src.numpy(), target, atol=1e-7)  # cannot reach 1e-8
--- a/PaddleAudio/test/unit_test/test_melspect_librosa_compat.py
+++ b/PaddleAudio/test/unit_test/test_melspect_librosa_compat.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+
+import librosa
+import numpy as np
+import paddle
+import paddleaudio
+import pytest
+
+
+def generate_mel_test():
+    sr = [16000]
+    n_fft = [512, 1024]
+    hop_length = [160, 400]
+    win_length = [512]
+    window = ['hann', 'hamming', ('gaussian', 50)]
+    center = [True, False]
+    pad_mode = ['reflect', 'constant']
+    power = [1.0, 2.0]
+    n_mels = [80, 64, 32]
+    fmin = [0, 10]
+    fmax = [8000, None]
+    dtype = ['float32', 'float64']
+    device = ['gpu', 'cpu']
+    args = [
+        sr, n_fft, hop_length, win_length, window, center, pad_mode, power,
+        n_mels, fmin, fmax, dtype, device
+    ]
+    return itertools.product(*args)
+
+
+@pytest.mark.parametrize(
+    'sr,n_fft,hop_length,win_length,window,center,pad_mode,power,n_mels,f_min,f_max,dtype,device',
+    generate_mel_test())
+def test_case(sr, n_fft, hop_length, win_length, window, center, pad_mode,
+              power, n_mels, f_min, f_max, dtype, device):
+
+    paddle.set_device(device)
+    signal, sr = paddleaudio.load('./test/unit_test/test_audio.wav')
+    signal_tensor = paddle.to_tensor(signal)
+    paddle_cpu_feat = paddleaudio.functional.melspectrogram(
+        signal_tensor,
+        sr=16000,
+        n_fft=n_fft,
+        hop_length=hop_length,
+        win_length=win_length,
+        window=window,
+        center=center,
+        n_mels=n_mels,
+        pad_mode=pad_mode,
+        f_min=f_min,
+        f_max=f_max,
+        htk=True,
+        norm='slaney',
+        dtype=dtype)
+
+    librosa_feat = librosa.feature.melspectrogram(signal,
+                                                  sr=16000,
+                                                  n_fft=n_fft,
+                                                  hop_length=hop_length,
+                                                  win_length=win_length,
+                                                  window=window,
+                                                  center=center,
+                                                  n_mels=n_mels,
+                                                  pad_mode=pad_mode,
+                                                  power=2.0,
+                                                  norm='slaney',
+                                                  htk=True,
+                                                  fmin=f_min,
+                                                  fmax=f_max)
+    err = np.mean(np.abs(librosa_feat - paddle_cpu_feat.numpy()))
+    if dtype == 'float64':
+        assert err < 1.0e-07
+    else:
+        assert err < 5.0e-07
--- a/PaddleAudio/test/unit_test/test_mfcc_librosa_compat.py
+++ b/PaddleAudio/test/unit_test/test_mfcc_librosa_compat.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+
+import librosa
+import numpy as np
+import paddle
+import paddleaudio
+import pytest
+from utils import load_example_audio1
+
+eps_float32 = 1e-3
+eps_float64 = 2.2e-5
+# Pre-loading to speed up the test
+signal, _ = load_example_audio1()
+signal_tensor = paddle.to_tensor(signal)
+
+
+def generate_mfcc_test():
+    sr = [16000]
+    n_fft = [512]  #, 1024]
+    hop_length = [160]  #, 400]
+    win_length = [512]
+    window = ['hann']  # 'hamming', ('gaussian', 50)]
+    center = [True]  #, False]
+    pad_mode = ['reflect', 'constant']
+    power = [2.0]
+    n_mels = [64]  #32]
+    fmin = [0, 10]
+    fmax = [8000, None]
+    dtype = ['float32', 'float64']
+    device = ['gpu', 'cpu']
+    n_mfcc = [40, 20]
+    htk = [True]
+    args = [
+        sr, n_fft, hop_length, win_length, window, center, pad_mode, power,
+        n_mels, fmin, fmax, dtype, device, n_mfcc, htk
+    ]
+    return itertools.product(*args)
+
+
+@pytest.mark.parametrize(
+    'sr, n_fft, hop_length, win_length, window, center, pad_mode, power,\
+        n_mels, fmin, fmax,dtype,device,n_mfcc,htk', generate_mfcc_test())
+def test_mfcc_case(sr, n_fft, hop_length, win_length, window, center, pad_mode, power,\
+        n_mels, fmin, fmax,dtype,device,n_mfcc,htk):
+    # paddle.set_device(device)
+    # hop_length = 160
+    # win_length = 512
+    # window = 'hann'
+    # pad_mode = 'constant'
+    # power = 2.0
+    # sample_rate = 16000
+    # center = True
+    # f_min = 0.0
+
+    # for librosa, the norm is default to 'slaney'
+    expected = librosa.feature.mfcc(signal,
+                                    sr=sr,
+                                    n_mfcc=n_mfcc,
+                                    n_fft=win_length,
+                                    hop_length=hop_length,
+                                    win_length=win_length,
+                                    window=window,
+                                    center=center,
+                                    n_mels=n_mels,
+                                    pad_mode=pad_mode,
+                                    fmin=fmin,
+                                    fmax=fmax,
+                                    htk=htk,
+                                    power=2.0)
+
+    paddle_mfcc = paddleaudio.functional.mfcc(signal_tensor,
+                                              sr=sr,
+                                              n_mfcc=n_mfcc,
+                                              n_fft=win_length,
+                                              hop_length=hop_length,
+                                              win_length=win_length,
+                                              window=window,
+                                              center=center,
+                                              n_mels=n_mels,
+                                              pad_mode=pad_mode,
+                                              f_min=fmin,
+                                              f_max=fmax,
+                                              htk=htk,
+                                              norm='slaney',
+                                              dtype=dtype)
+
+    paddle_librosa_diff = np.mean(np.abs(expected - paddle_mfcc.numpy()))
+    if dtype == 'float64':
+        assert paddle_librosa_diff < eps_float64
+    else:
+        assert paddle_librosa_diff < eps_float32
+
+    try:  # if we have torchaudio installed
+        import torch
+        import torchaudio
+        kwargs = {
+            'n_fft': win_length,
+            'hop_length': hop_length,
+            'win_length': win_length,
+            # 'window':window,
+            'center': center,
+            'n_mels': n_mels,
+            'pad_mode': pad_mode,
+            'f_min': fmin,
+            'f_max': fmax,
+            'mel_scale': 'htk',
+            'norm': 'slaney',
+            'power': 2.0
+        }
+        torch_mfcc_transform = torchaudio.transforms.MFCC(n_mfcc=20,
+                                                          log_mels=False,
+                                                          melkwargs=kwargs)
+        torch_mfcc = torch_mfcc_transform(torch.tensor(signal))
+        paddle_torch_mfcc_diff = np.mean(
+            np.abs(paddle_mfcc.numpy() - torch_mfcc.numpy()))
+        assert paddle_torch_mfcc_diff < 5e-5
+        torch_librosa_mfcc_diff = np.mean(np.abs(torch_mfcc.numpy() - expected))
+        assert torch_librosa_mfcc_diff < 5e-5
+    except:
+        pass
+
+
+#test_mfcc_case(512, 40, 20, True, 8000, 'cpu','float64',eps_float64)
--- a/PaddleAudio/test/unit_test/test_stft_librosa_compat.py
+++ b/PaddleAudio/test/unit_test/test_stft_librosa_compat.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import itertools
+
+import librosa
+import numpy as np
+import paddle
+import paddleaudio
+import pytest
+from utils import load_example_audio1
+
+
+def generate_test():
+    sr = [16000]
+    n_fft = [512, 1024]
+    hop_length = [160, 400]
+    win_length = [512]
+    window = ['hann', 'hamming', ('gaussian', 50)]
+    center = [True, False]
+    pad_mode = ['reflect', 'constant']
+    dtype = ['float32', 'float64']
+    device = ['gpu', 'cpu']
+
+    args = [
+        sr, n_fft, hop_length, win_length, window, center, pad_mode, dtype,
+        device
+    ]
+    return itertools.product(*args)
+
+
+@pytest.mark.parametrize(
+    'sr,n_fft,hop_length,win_length,window,center,pad_mode,dtype,device',
+    generate_test())
+def test_case(sr, n_fft, hop_length, win_length, window, center, pad_mode,
+              dtype, device):
+
+    if dtype == 'float32':
+        if n_fft < 1024:
+            max_err = 5e-6
+        else:
+            max_err = 7e-6
+        min_err = 1e-8
+    else:  #float64
+        max_err = 6.0e-08
+        min_err = 1e-10
+
+    paddle.set_device(device)
+
+    signal, _ = load_example_audio1()
+    signal_tensor = paddle.to_tensor(signal)  #.to(device)
+
+    stft = paddleaudio.transforms.STFT(n_fft=n_fft,
+                                       hop_length=hop_length,
+                                       win_length=win_length,
+                                       window=window,
+                                       center=center,
+                                       pad_mode=pad_mode,
+                                       dtype=dtype)
+
+    paddle_feat = stft(signal_tensor.unsqueeze(0))[0]
+
+    target = paddleaudio.utils._librosa.stft(signal,
+                                             n_fft=n_fft,
+                                             win_length=win_length,
+                                             hop_length=hop_length,
+                                             window=window,
+                                             center=center,
+                                             pad_mode=pad_mode)
+    librosa_feat = np.concatenate(
+        [target.real[..., None], target.imag[..., None]], -1)
+    err = np.mean(np.abs(librosa_feat - paddle_feat.numpy()))
+
+    assert err <= max_err
+    assert err >= min_err
--- a/PaddleAudio/test/unit_test/test_transform.py
+++ b/PaddleAudio/test/unit_test/test_transform.py
@@ -19,49 +19,56 @@ import pytest
 from paddleaudio.transforms import ISTFT, STFT, MelSpectrogram
 from paddleaudio.utils._librosa import melspectrogram

+paddle.set_device('cpu')
 EPS = 1e-8
 import itertools

+from utils import load_example_audio1
+

 # test case for stft
 def generate_stft_test():
    n_fft = [512, 1024]
    hop_length = [160, 320]
-    window = ['hann', 'hamming', ('gaussian', 100), ('tukey', 0.5),
-              'blackman']  #'bohman'
-    win_length = [512, 400]
+    window = [
+        'hann',
+        'hamming',
+        ('gaussian', 100),  #, ('tukey', 0.5),
+        'blackman'
+    ]  #'bohman'
+    win_length = [500, 400]
    pad_mode = ['reflect', 'constant']
    args = [n_fft, hop_length, window, win_length, pad_mode]
    return itertools.product(*args)


-@pytest.mark.parametrize('n_fft,hop_length,window,win_length,pad_mode',
-                         generate_stft_test())
-def test_istft(n_fft, hop_length, window, win_length, pad_mode):
-    sample_rate = 16000
-    signal_length = sample_rate * 5
-    center = True
-    signal = np.random.uniform(-1, 1, signal_length).astype('float32')
-    signal_tensor = paddle.to_tensor(signal)  #.to(device)
+# @pytest.mark.parametrize('n_fft,hop_length,window,win_length,pad_mode',
+#                          generate_stft_test())
+# def test_istft(n_fft, hop_length, window, win_length, pad_mode):
+#     sample_rate = 16000
+#     signal_length = sample_rate * 5
+#     center = True
+#     signal = np.random.uniform(-1, 1, signal_length).astype('float32')
+#     signal_tensor = paddle.to_tensor(signal)  #.to(device)

-    stft = STFT(n_fft=n_fft,
-                hop_length=hop_length,
-                win_length=win_length,
-                window=window,
-                center=center,
-                pad_mode=pad_mode)
+#     stft = STFT(n_fft=n_fft,
+#                 hop_length=hop_length,
+#                 win_length=win_length,
+#                 window=window,
+#                 center=center,
+#                 pad_mode=pad_mode)

-    spectrum = stft(signal_tensor.unsqueeze(0))
+#     spectrum = stft(signal_tensor.unsqueeze(0))

-    istft = ISTFT(n_fft=n_fft,
-                  hop_length=hop_length,
-                  win_length=win_length,
-                  window=window,
-                  center=center,
-                  pad_mode=pad_mode)
+#     istft = ISTFT(n_fft=n_fft,
+#                   hop_length=hop_length,
+#                   win_length=win_length,
+#                   window=window,
+#                   center=center,
+#                   pad_mode=pad_mode)

-    reconstructed = istft(spectrum, signal_length)
-    assert np.allclose(signal, reconstructed[0].numpy(), rtol=1e-5, atol=1e-3)
+#     reconstructed = istft(spectrum, signal_length)
+#     assert np.allclose(signal, reconstructed[0].numpy(), rtol=1e-5, atol=1e-3)


 @pytest.mark.parametrize('n_fft,hop_length,window,win_length,pad_mode',
@@ -70,6 +77,8 @@ def test_stft(n_fft, hop_length, window, win_length, pad_mode):
    sample_rate = 16000
    signal_length = sample_rate * 5
    center = True
+    #signal = paddleaudio.load('./test_audio.wav')
+    signal, _ = load_example_audio1()
    signal = np.random.uniform(-1, 1, signal_length).astype('float32')
    signal_tensor = paddle.to_tensor(signal)  #.to(device)

@@ -90,54 +99,54 @@ def test_stft(n_fft, hop_length, window, win_length, pad_mode):
                                             center=center,
                                             pad_mode=pad_mode)

-    assert np.allclose(target.real, src[:, :, 0], rtol=1e-5, atol=1e-2)
-    assert np.allclose(target.imag, src[:, :, 1], rtol=1e-5, atol=1e-2)
-
-
-def generate_mel_test():
-    sr = [16000]
-    n_fft = [512, 1024]
-    hop_length = [160, 400]
-    win_length = [512]
-    window = ['hann', 'hamming', ('gaussian', 50)]
-    center = [True, False]
-    pad_mode = ['reflect', 'constant']
-    power = [1.0, 2.0]
-    n_mels = [120, 32]
-    fmin = [0, 10]
-    fmax = [8000, None]
-    args = [
-        sr, n_fft, hop_length, win_length, window, center, pad_mode, power,
-        n_mels, fmin, fmax
-    ]
-    return itertools.product(*args)
-
-
-@pytest.mark.parametrize(
-    'sr,n_fft,hop_length,win_length,window,center,pad_mode,power,n_mels,fmin,fmax',
-    generate_mel_test())
-def test_melspectrogram(sr, n_fft, hop_length, win_length, window, center,
-                        pad_mode, power, n_mels, fmin, fmax):
-
-    melspectrogram = MelSpectrogram(sr, n_fft, hop_length, win_length, window,
-                                    center, pad_mode, power, n_mels, fmin, fmax)
-    signal_length = 32000 * 5
-    signal = np.random.uniform(-1, 1, signal_length).astype('float32')
-    signal_tensor = paddle.to_tensor(signal)  #.to(device)
-
-    src = melspectrogram(signal_tensor.unsqueeze(0))
-
-    target = librosa.feature.melspectrogram(signal,
-                                            sr=sr,
-                                            n_fft=n_fft,
-                                            win_length=win_length,
-                                            hop_length=hop_length,
-                                            window=window,
-                                            center=center,
-                                            pad_mode=pad_mode,
-                                            power=power,
-                                            n_mels=n_mels,
-                                            fmin=fmin,
-                                            fmax=fmax)
-
-    assert np.allclose(src.numpy()[0], target, atol=1e-3)
+    tol = 1e-4
+    assert np.allclose(target.real, src[:, :, 0], rtol=tol, atol=tol)
+    assert np.allclose(target.imag, src[:, :, 1], rtol=tol, atol=tol)
+
+
+# def generate_mel_test():
+#     sr = [16000]
+#     n_fft = [512, 1024]
+#     hop_length = [160, 400]
+#     win_length = [512]
+#     window = ['hann', 'hamming', ('gaussian', 50)]
+#     center = [True, False]
+#     pad_mode = ['reflect', 'constant']
+#     power = [1.0, 2.0]
+#     n_mels = [120, 32]
+#     fmin = [0, 10]
+#     fmax = [8000, None]
+#     args = [
+#         sr, n_fft, hop_length, win_length, window, center, pad_mode, power,
+#         n_mels, fmin, fmax
+#     ]
+#     return itertools.product(*args)
+
+# @pytest.mark.parametrize(
+#     'sr,n_fft,hop_length,win_length,window,center,pad_mode,power,n_mels,fmin,fmax',
+#     generate_mel_test())
+# def test_melspectrogram(sr, n_fft, hop_length, win_length, window, center,
+#                         pad_mode, power, n_mels, fmin, fmax):
+
+#     melspectrogram = MelSpectrogram(sr, n_fft, hop_length, win_length, window,
+#                                     center, pad_mode, power, n_mels, fmin, fmax)
+#     signal_length = 32000 * 5
+#     signal = np.random.uniform(-1, 1, signal_length).astype('float32')
+#     signal_tensor = paddle.to_tensor(signal)  #.to(device)
+
+#     src = melspectrogram(signal_tensor.unsqueeze(0))
+
+#     target = librosa.feature.melspectrogram(signal,
+#                                             sr=sr,
+#                                             n_fft=n_fft,
+#                                             win_length=win_length,
+#                                             hop_length=hop_length,
+#                                             window=window,
+#                                             center=center,
+#                                             pad_mode=pad_mode,
+#                                             power=power,
+#                                             n_mels=n_mels,
+#                                             fmin=fmin,
+#                                             fmax=fmax)
+
+#     assert np.allclose(src.numpy()[0], target, atol=1e-4)
--- a/PaddleAudio/test/unit_test/test_window.py
+++ b/PaddleAudio/test/unit_test/test_window.py
@@ -12,56 +12,75 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import itertools
+
 import numpy as np
-import paddleaudio
+import paddle
+import paddleaudio as pa
 import pytest
-import scipy
+from scipy.signal import get_window
+
+
+def test_data():
+    win_length = [256, 512, 1024]
+    sym = [True, False]
+    device = ['gpu', 'cpu']
+    dtype = ['float32', 'float64']
+    args = [win_length, sym, device, dtype]
+    return itertools.product(*args)
+
+
+@pytest.mark.parametrize('win_length,sym,device,dtype', test_data())
+def test_window(win_length, sym, device, dtype):
+    paddle.set_device(device)
+    if dtype == 'float64':
+        upper_err = 7e-8
+        lower_err = 0
+    else:
+        upper_err = 8e-8
+        lower_err = 0
+
+    src = pa.blackman_window(win_length, sym, dtype=dtype).numpy()
+    expected = get_window('blackman', win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err
+
+    src = pa.bohman_window(win_length, sym, dtype=dtype).numpy()
+    expected = get_window('bohman', win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err
+
+    src = pa.triang_window(win_length, sym, dtype=dtype).numpy()
+    expected = get_window('triang', win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err
+
+    src = pa.hamming_window(win_length, sym, dtype=dtype).numpy()
+    expected = get_window('hamming', win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err
+
+    src = pa.hann_window(win_length, sym, dtype=dtype).numpy()
+    expected = get_window('hann', win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err

-EPS = 1e-8
-test_data = [
-    (512, True),
-    (512, False),
-    (1024, True),
-    (1024, False),
-    (200, False),
-    (200, True),
-]
+    src = pa.tukey_window(win_length, 0.5, sym, dtype=dtype).numpy()
+    expected = get_window(('tukey', 0.5), win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err

+    src = pa.gaussian_window(win_length, 0.5, sym, dtype=dtype).numpy()
+    expected = get_window(('gaussian', 0.5), win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err

-@pytest.mark.parametrize('win_length,sym', test_data)
-def test_window(win_length, sym):
-    assert np.allclose(paddleaudio.blackman_window(win_length, sym).numpy(),
-                       scipy.signal.get_window('blackman', win_length, not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.bohman_window(win_length, sym).numpy(),
-                       scipy.signal.get_window('bohman', win_length, not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.triang_window(win_length, sym).numpy(),
-                       scipy.signal.get_window('triang', win_length, not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.hamming_window(win_length, sym).numpy(),
-                       scipy.signal.get_window('hamming', win_length, not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.hann_window(win_length, sym).numpy(),
-                       scipy.signal.get_window('hann', win_length, not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.tukey_window(win_length, 0.5, sym).numpy(),
-                       scipy.signal.get_window(('tukey', 0.5), win_length,
-                                               not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.gaussian_window(win_length, 0.5,
-                                                   sym).numpy(),
-                       scipy.signal.get_window(('gaussian', 0.5), win_length,
-                                               not sym),
-                       atol=1e-6)
-    assert np.allclose(paddleaudio.exponential_window(win_length, None, 1.0,
-                                                      sym).numpy(),
-                       scipy.signal.get_window(('exponential', None, 1.0),
-                                               win_length, not sym),
-                       atol=1e-6)
+    src = pa.exponential_window(win_length, None, 1.0, sym, dtype=dtype).numpy()
+    expected = get_window(('exponential', None, 1.0), win_length, not sym)
+    assert np.mean(np.abs(src - expected)) < upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err

-    assert np.allclose(paddleaudio.taylor_window(win_length, 4, 30, True,
-                                                 sym).numpy(),
-                       scipy.signal.get_window(('taylor', 4, 30, True),
-                                               win_length, not sym),
-                       atol=1e-6)
+    src = pa.taylor_window(win_length, 4, 30, True, sym, dtype=dtype).numpy()
+    expected = get_window(('taylor', 4, 30, True), win_length, not sym)
+    assert np.mean(np.abs(src - expected)) <= upper_err
+    assert np.mean(np.abs(src - expected)) >= lower_err